Environment:

  • vSphere 7. VM Hardware Version 19
  • Two VMs running Debian 11.9
  • SSH Enabled
  • Root login via SSH enabled. Not best practise, but as this is a lab, I did not want to troubleshoot permission related issues.
Edit /etc/ssh/sshd_config
Add "PermitRootLogin: yes"
Restart SSHD - systemctl restart sshd

Install govc

root@control1:~# export GOVC_INSECURE=1

root@control1:~# export GOVC_URL='https://administrator@vsphere.local:<PASSWORD>@vc802.gs.labs'

root@control1:~# curl -L -o - "https://github.com/vmware/govmomi/releases/latest/download/govc_$(uname -s)_$(uname -m).tar.gz" | tar -C /usr/local/bin -xvzf - govc

Enable UUID

# List existing VMs using govc ls /Datacenter/vm/VMName
root@control1:~# govc vm.change -vm /Sydney/vm/control1 -e disk.enableUUID=TRUE
root@control1:~# govc vm.change -vm /Sydney/vm/worker1 -e disk.enableUUID=TRUE

Extract VM UUID

root@control1:~# govc vm.info -json -dc=Sydney -vm.ipath="/Sydney/vm/control1" -e=true | jq -r ' .virtualMachines[] | .config.uuid '
420342b3-7528-4f19-d083-bfe4172ca677

root@control1:~# govc vm.info -json -dc=Sydney -vm.ipath="/Sydney/vm/worker1" -e=true | jq -r ' .virtualMachines[] | .config.uuid '
4203148f-7bc3-c8ec-5a40-c7283654597f

Initialize the K3S cluster on the first VM as the master node.

Notes:

  1. We will disable the inbuilt cloud provider, as we will be using the vSphere Cloud Provider.
  2. The ProviderID will have to be the VM UUID that we got from the previous step.
  3. We will also have to disable the inbuilt servicelb. We will use metallb as LoadBalancer.
root@control1:~#  curl -sfL https://get.k3s.io | sh -s - server --cluster-init --disable-cloud-controller --node-name control1.gs.labs --kubelet-arg="cloud-provider=external" --disable="servicelb" --kubelet-arg="provider-id=vsphere://420342b3-7528-4f19-d083-bfe4172ca677"

[INFO]  Finding release for channel stable
[INFO]  Using v1.28.6+k3s2 as release
[INFO]  Downloading hash https://github.com/k3s-io/k3s/releases/download/v1.28.6+k3s2/sha256sum-amd64.txt
[INFO]  Downloading binary https://github.com/k3s-io/k3s/releases/download/v1.28.6+k3s2/k3s
[INFO]  Verifying binary download
[INFO]  Installing k3s to /usr/local/bin/k3s
[INFO]  Skipping installation of SELinux RPM
[INFO]  Creating /usr/local/bin/kubectl symlink to k3s
[INFO]  Creating /usr/local/bin/crictl symlink to k3s
[INFO]  Creating /usr/local/bin/ctr symlink to k3s
[INFO]  Creating killall script /usr/local/bin/k3s-killall.sh
[INFO]  Creating uninstall script /usr/local/bin/k3s-uninstall.sh
[INFO]  env: Creating environment file /etc/systemd/system/k3s.service.env
[INFO]  systemd: Creating service file /etc/systemd/system/k3s.service
[INFO]  systemd: Enabling k3s unit
Created symlink /etc/systemd/system/multi-user.target.wants/k3s.service → /etc/systemd/system/k3s.service.
[INFO]  Host iptables-save/iptables-restore tools not found
[INFO]  Host ip6tables-save/ip6tables-restore tools not found
[INFO]  systemd: Starting k3s

root@control1:~# kubectl get nodes
NAME       STATUS   ROLES                       AGE     VERSION
control1   Ready    control-plane,etcd,master   2m25s   v1.28.6+k3s2

Extract the node token

root@control1:~# cat /var/lib/rancher/k3s/server/node-token
K100ee021e3ebb9cdf1e00ff00d5668748091b06c62a954472482469864719616d7::server:bdb7f1eb05d2678e04777f5132c7f0e7

Install the worker node

root@control1:~# curl -sfL https://get.k3s.io | sh -s - agent --server https://control1.gs.labs:6443 --node-name worker1.gs.labs --kubelet-arg="cloud-provider=external" --kubelet-arg="provider-id=vsphere://4203148f-7bc3-c8ec-5a40-c7283654597f"
[INFO]  Finding release for channel stable
[INFO]  Using v1.28.6+k3s2 as release
[INFO]  Downloading hash https://github.com/k3s-io/k3s/releases/download/v1.28.6+k3s2/sha256sum-amd64.txt
[INFO]  Downloading binary https://github.com/k3s-io/k3s/releases/download/v1.28.6+k3s2/k3s
[INFO]  Verifying binary download
[INFO]  Installing k3s to /usr/local/bin/k3s
[INFO]  Skipping installation of SELinux RPM
[INFO]  Creating /usr/local/bin/kubectl symlink to k3s
[INFO]  Creating /usr/local/bin/crictl symlink to k3s
[INFO]  Creating /usr/local/bin/ctr symlink to k3s
[INFO]  Creating killall script /usr/local/bin/k3s-killall.sh
[INFO]  Creating uninstall script /usr/local/bin/k3s-agent-uninstall.sh
[INFO]  env: Creating environment file /etc/systemd/system/k3s-agent.service.env
[INFO]  systemd: Creating service file /etc/systemd/system/k3s-agent.service
[INFO]  systemd: Enabling k3s-agent unitls
Created symlink /etc/systemd/system/multi-user.target.wants/k3s-agent.service → /etc/systemd/system/k3s-agent.service.
[INFO]  Host iptables-save/iptables-restore tools not found
[INFO]  Host ip6tables-save/ip6tables-restore tools not found
[INFO]  systemd: Starting k3s-agent

root@control1:~# kubectl get nodes
NAME       STATUS   ROLES                       AGE    VERSION
control1   Ready    control-plane,etcd,master   5m9s   v1.28.6+k3s2
worker1    Ready    <none>                      11s    v1.28.6+k3s2

Validate that the below taints are set.

root@control1:~# kubectl describe nodes | egrep "Taints:|Name:"
Name:               control1
Taints:             node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule
Name:               worker1
Taints:             node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule

# No Pods will be scheulded until the vsphere cloud provider is deployed.
root@control1:~# kubectl get pods -A
NAMESPACE     NAME                                      READY   STATUS    RESTARTS   AGE
kube-system   coredns-6799fbcd5-7hhth                   0/1     Pending   0          9m56s
kube-system   helm-install-traefik-crd-27bbg            0/1     Pending   0          9m56s
kube-system   helm-install-traefik-l5hxk                0/1     Pending   0          9m56s
kube-system   local-path-provisioner-84db5d44d9-6lj95   0/1     Pending   0          9m56s
kube-system   metrics-server-67c658944b-qnxrh           0/1     Pending   0          9m56s

Prepare to install the vSphere Cloud Provider

# Set the kubernetes version - "kubectl get nodes" will give you the currrent version
root@control1:~# export VERSION=1.28

# Donload the vSphere Cloud Controller Manager config file
root@control1:~#wget https://raw.githubusercontent.com/kubernetes/cloud-provider-vsphere/release-$VERSION/releases/v$VERSION/vsphere-cloud-controller-manager.yaml

# Modify the config file with environment specific values
root@control1~#cat vsphere-cloud-controller-manager.yaml
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: cloud-controller-manager
  labels:
    vsphere-cpi-infra: service-account
    component: cloud-controller-manager
  namespace: kube-system
---
apiVersion: v1
kind: Secret
metadata:
  name: vsphere-cloud-secret
  labels:
    vsphere-cpi-infra: secret
    component: cloud-controller-manager
  namespace: kube-system
  # NOTE: this is just an example configuration, update with real values based on your environment
stringData:
  172.16.0.20.username: "administrator@vsphere.local"
  172.16.0.20.password: "<PASSWORD>"
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: vsphere-cloud-config
  labels:
    vsphere-cpi-infra: config
    component: cloud-controller-manager
  namespace: kube-system
data:
  # NOTE: this is just an example configuration, update with real values based on your environment
  vsphere.conf: |
    # Global properties in this section will be used for all specified vCenters unless overriden in VirtualCenter section.
    global:
      port: 443
      # set insecureFlag to true if the vCenter uses a self-signed cert
      insecureFlag: true
      # settings for using k8s secret
      secretName: vsphere-cloud-secret
      secretNamespace: kube-system

    # vcenter section
    vcenter:
      vc802:
        server: 172.16.0.20
        user: administrator@vsphere.local
        password: <PASSWORD>
        datacenters:
          - Sydney
---

Install the vSphere Cloud Provider

root@control1:~# kubectl apply -f vsphere-cloud-controller-manager.yaml 
serviceaccount/cloud-controller-manager created
secret/vsphere-cloud-secret created
configmap/vsphere-cloud-config created
rolebinding.rbac.authorization.k8s.io/servicecatalog.k8s.io:apiserver-authentication-reader created
clusterrolebinding.rbac.authorization.k8s.io/system:cloud-controller-manager created
clusterrole.rbac.authorization.k8s.io/system:cloud-controller-manager created
daemonset.apps/vsphere-cloud-controller-manager created

root@control1:~# kubectl get pods -A
NAMESPACE     NAME                                      READY   STATUS    RESTARTS   AGE
kube-system   coredns-6799fbcd5-7hhth                   0/1     Pending   0          35m
kube-system   helm-install-traefik-crd-27bbg            0/1     Pending   0          35m
kube-system   helm-install-traefik-l5hxk                0/1     Pending   0          35m
kube-system   local-path-provisioner-84db5d44d9-6lj95   0/1     Pending   0          35m
kube-system   metrics-server-67c658944b-qnxrh           0/1     Pending   0          35m

root@control1:~# kubectl get ds -n kube-system
NAME                               DESIRED   CURRENT   READY   UP-TO-DATE   AVAILABLE   NODE SELECTOR   AGE
vsphere-cloud-controller-manager   1         1         1       1            1           <none>          54s

root@control1:~# kubectl get pods -A
NAMESPACE     NAME                                      READY   STATUS              RESTARTS   AGE
kube-system   coredns-6799fbcd5-7hhth                   1/1     Running             0          36m
kube-system   helm-install-traefik-crd-27bbg            0/1     Completed           0          36m
kube-system   helm-install-traefik-l5hxk                0/1     Completed           1          36m
kube-system   local-path-provisioner-84db5d44d9-6lj95   1/1     Running             0          36m
kube-system   metrics-server-67c658944b-qnxrh           1/1     Running             0          36m
kube-system   traefik-f4564c4f4-vsjvc                   0/1     ContainerCreating   0          3s
kube-system   vsphere-cloud-controller-manager-dhmrs    1/1     Running             0          39s

Install Helm

# Export KUBECONFIG
root@control1:~# export KUBECONFIG=/etc/rancher/k3s/k3s.yaml 

# Install Helm
root@control1:~# curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash

Install metallb

# Add metallb helm repo
root@control1:~# helm repo add metallb https://metallb.github.io/metallb
"metallb" has been added to your repositories

# Install metallb
root@control1:~#  helm upgrade --install metallb  metallb/metallb --create-namespace --namespace metallb-system --set crds.validationFailurePolicy=Ignore --wait
Release "metallb" does not exist. Installing it now.
NAME: metallb
LAST DEPLOYED: Thu Feb 15 17:31:22 2024
NAMESPACE: metallb-system
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
MetalLB is now running in the cluster.

Now you can configure it via its CRs. Please refer to the metallb official docs

# Validate if the metallb pods are running
root@control1:~# k get pods -A
NAMESPACE        NAME                                      READY   STATUS      RESTARTS   AGE
kube-system      coredns-6799fbcd5-nxzzn                   1/1     Running     0          22m
kube-system      helm-install-traefik-8xgmb                0/1     Completed   2          22m
kube-system      helm-install-traefik-crd-b5jdg            0/1     Completed   0          22m
kube-system      local-path-provisioner-84db5d44d9-m8m52   1/1     Running     0          22m
kube-system      metrics-server-67c658944b-jhxc4           1/1     Running     0          22m
kube-system      traefik-f4564c4f4-mvslp                   1/1     Running     0          19m
kube-system      vsphere-cloud-controller-manager-dxtwq    1/1     Running     0          20m
metallb-system   metallb-controller-648b76f565-7vdrs       1/1     Running     0          2m9s
metallb-system   metallb-speaker-d7sw4                     4/4     Running     0          2m9s
metallb-system   metallb-speaker-dtccv                     4/4     Running     0          2m9s

# Review metallb-webhook-service
root@control1:~# k get svc -A
NAMESPACE        NAME                      TYPE           CLUSTER-IP     EXTERNAL-IP   PORT(S)                      AGE
default          kubernetes                ClusterIP      10.43.0.1      <none>        443/TCP                      23m
kube-system      kube-dns                  ClusterIP      10.43.0.10     <none>        53/UDP,53/TCP,9153/TCP       23m
kube-system      metrics-server            ClusterIP      10.43.174.89   <none>        443/TCP                      23m
kube-system      traefik                   LoadBalancer   10.43.88.253   <pending>     80:32437/TCP,443:31234/TCP   19m
metallb-system   metallb-webhook-service   ClusterIP      10.43.36.216   <none>        443/TCP                      2m28s

# 

Create L2 IP Pool config for Metallb. This IP Pool will be used for extneral IPs

# Create the IP Pool
root@control1:~# cat ip-pool.yaml 
apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
  name: default-pool
  namespace: metallb-system
spec:
  addresses:
  - 172.16.1.201-172.16.1.250
---
apiVersion: metallb.io/v1beta1
kind: L2Advertisement
metadata:
  name: default
  namespace: metallb-system
spec:
  ipAddressPools:
  - default-pool

# Apply the ip pool config
root@control1:~# k apply -f ip-pool.yaml 
ipaddresspool.metallb.io/default-pool created
l2advertisement.metallb.io/default created

# Validate external ips are being assigned to services
root@control1:~# k get svc -A
NAMESPACE        NAME                      TYPE           CLUSTER-IP     EXTERNAL-IP    PORT(S)                      AGE
default          kubernetes                ClusterIP      10.43.0.1      <none>         443/TCP                      25m
kube-system      kube-dns                  ClusterIP      10.43.0.10     <none>         53/UDP,53/TCP,9153/TCP       25m
kube-system      metrics-server            ClusterIP      10.43.174.89   <none>         443/TCP                      25m
kube-system      traefik                   LoadBalancer   10.43.88.253   172.16.1.201   80:32437/TCP,443:31234/TCP   22m
metallb-system   metallb-webhook-service   ClusterIP      10.43.36.216   <none>         443/TCP                      4m57s

Prepare to Install the vSphere CSI driver

# Create the vmware-system-csi namespace
root@control1:~# kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/vsphere-csi-driver/v3.0.0/manifests/vanilla/namespace.yaml
namespace/vmware-system-csi created

# Taint the control plane nodes
root@control1:~# kubectl taint nodes control1  node-role.kubernetes.io/control-plane=:NoSchedule
node/control1 tainted

root@control1:~# kubectl describe nodes | egrep "Taints:|Name:"
Name:               control1
Taints:             node-role.kubernetes.io/control-plane:NoSchedule
Name:               worker1
Taints:             <none>

# Prepare the config for the vsphere-config-secret

root@control1:~# cat csi-vsphere.conf 
[Global]
cluster-id = "k3s-cluster-01"
cluster-distribution = "k3s"

[VirtualCenter "<172.16.0.20"]
insecure-flag = "true"
user = "administrator@vsphere.local"
password = "VMware1234!"
port = "443"
datacenters = "Sydney"
targetvSANFileShareDatastoreURLs="ds:///vmfs/volumes/vsan:52f1359c42e51bbb-42b33e05f0cd895a/"

# Create the secret
root@control1:~# kubectl create secret generic vsphere-config-secret --from-file=csi-vsphere.conf --namespace=vmware-system-csi
secret/vsphere-config-secret created

Install the vSphere CSI driver

# Installing CSI driver v3.0.0
root@control1:~# kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/vsphere-csi-driver/v3.0.0/manifests/vanilla/vsphere-csi-driver.yaml
csidriver.storage.k8s.io/csi.vsphere.vmware.com created
serviceaccount/vsphere-csi-controller created
clusterrole.rbac.authorization.k8s.io/vsphere-csi-controller-role created
clusterrolebinding.rbac.authorization.k8s.io/vsphere-csi-controller-binding created
serviceaccount/vsphere-csi-node created
clusterrole.rbac.authorization.k8s.io/vsphere-csi-node-cluster-role created
clusterrolebinding.rbac.authorization.k8s.io/vsphere-csi-node-cluster-role-binding created
role.rbac.authorization.k8s.io/vsphere-csi-node-role created
rolebinding.rbac.authorization.k8s.io/vsphere-csi-node-binding created
configmap/internal-feature-states.csi.vsphere.vmware.com created
service/vsphere-csi-controller created
deployment.apps/vsphere-csi-controller created
daemonset.apps/vsphere-csi-node created
daemonset.apps/vsphere-csi-node-windows created

# Scale the CSI Controller deployment based on the number of control plane nodes.
root@control1:~# k scale deploy vsphere-csi-controller --replicas=1 -n vmware-system-csi
deployment.apps/vsphere-csi-controller scaled

# Review NodeSelector configuraion if the CSI Controller pod does not get created.

root@control1:~# k edit deploy vsphere-csi-controller -n vmware-system-csi
# I had to change the below config 
# From:
nodeSelector:              
        node-role.kubernetes.io/control-plane: ""

# To
nodeSelector:              
        node-role.kubernetes.io/control-plane: "true"

# Validate that the vsphere-csi-* pods are running
root@control1:~# k get pods -A
NAMESPACE           NAME                                      READY   STATUS      RESTARTS        AGE
kube-system         coredns-6799fbcd5-9hwf7                   1/1     Running     0               83m
kube-system         helm-install-traefik-9clqx                0/1     Completed   1               83m
kube-system         helm-install-traefik-crd-bxtf6            0/1     Completed   0               83m
kube-system         local-path-provisioner-84db5d44d9-qws7p   1/1     Running     0               83m
kube-system         metrics-server-67c658944b-jnh4m           1/1     Running     0               83m
kube-system         traefik-f4564c4f4-mnshc                   1/1     Running     0               76m
kube-system         vsphere-cloud-controller-manager-k5jct    1/1     Running     0               76m
metallb-system      metallb-controller-648b76f565-kmdx2       1/1     Running     0               58m
metallb-system      metallb-speaker-9hm5c                     4/4     Running     0               58m
metallb-system      metallb-speaker-rpgr9                     4/4     Running     0               58m
vmware-system-csi   vsphere-csi-controller-656ddc7cf9-zgswg   7/7     Running     0               4m24s
vmware-system-csi   vsphere-csi-node-d49t9                    3/3     Running     5 (4m26s ago)   6m38s
vmware-system-csi   vsphere-csi-node-pn4jc                    3/3     Running     5 (4m47s ago)   6m38s