Environment:
- vSphere 7. VM Hardware Version 19
- Two VMs running Debian 11.9
- SSH Enabled
- Root login via SSH enabled. Not best practise, but as this is a lab, I did not want to troubleshoot permission related issues.
Edit /etc/ssh/sshd_config
Add "PermitRootLogin: yes"
Restart SSHD - systemctl restart sshd
Install govc
root@control1:~# export GOVC_INSECURE=1
root@control1:~# export GOVC_URL='https://administrator@vsphere.local:<PASSWORD>@vc802.gs.labs'
root@control1:~# curl -L -o - "https://github.com/vmware/govmomi/releases/latest/download/govc_$(uname -s)_$(uname -m).tar.gz" | tar -C /usr/local/bin -xvzf - govc
Enable UUID
# List existing VMs using govc ls /Datacenter/vm/VMName
root@control1:~# govc vm.change -vm /Sydney/vm/control1 -e disk.enableUUID=TRUE
root@control1:~# govc vm.change -vm /Sydney/vm/worker1 -e disk.enableUUID=TRUE
Extract VM UUID
root@control1:~# govc vm.info -json -dc=Sydney -vm.ipath="/Sydney/vm/control1" -e=true | jq -r ' .virtualMachines[] | .config.uuid '
420342b3-7528-4f19-d083-bfe4172ca677
root@control1:~# govc vm.info -json -dc=Sydney -vm.ipath="/Sydney/vm/worker1" -e=true | jq -r ' .virtualMachines[] | .config.uuid '
4203148f-7bc3-c8ec-5a40-c7283654597f
Initialize the K3S cluster on the first VM as the master node.
Notes:
- We will disable the inbuilt cloud provider, as we will be using the vSphere Cloud Provider.
- The ProviderID will have to be the VM UUID that we got from the previous step.
- We will also have to disable the inbuilt servicelb. We will use metallb as LoadBalancer.
root@control1:~# curl -sfL https://get.k3s.io | sh -s - server --cluster-init --disable-cloud-controller --node-name control1.gs.labs --kubelet-arg="cloud-provider=external" --disable="servicelb" --kubelet-arg="provider-id=vsphere://420342b3-7528-4f19-d083-bfe4172ca677"
[INFO] Finding release for channel stable
[INFO] Using v1.28.6+k3s2 as release
[INFO] Downloading hash https://github.com/k3s-io/k3s/releases/download/v1.28.6+k3s2/sha256sum-amd64.txt
[INFO] Downloading binary https://github.com/k3s-io/k3s/releases/download/v1.28.6+k3s2/k3s
[INFO] Verifying binary download
[INFO] Installing k3s to /usr/local/bin/k3s
[INFO] Skipping installation of SELinux RPM
[INFO] Creating /usr/local/bin/kubectl symlink to k3s
[INFO] Creating /usr/local/bin/crictl symlink to k3s
[INFO] Creating /usr/local/bin/ctr symlink to k3s
[INFO] Creating killall script /usr/local/bin/k3s-killall.sh
[INFO] Creating uninstall script /usr/local/bin/k3s-uninstall.sh
[INFO] env: Creating environment file /etc/systemd/system/k3s.service.env
[INFO] systemd: Creating service file /etc/systemd/system/k3s.service
[INFO] systemd: Enabling k3s unit
Created symlink /etc/systemd/system/multi-user.target.wants/k3s.service → /etc/systemd/system/k3s.service.
[INFO] Host iptables-save/iptables-restore tools not found
[INFO] Host ip6tables-save/ip6tables-restore tools not found
[INFO] systemd: Starting k3s
root@control1:~# kubectl get nodes
NAME STATUS ROLES AGE VERSION
control1 Ready control-plane,etcd,master 2m25s v1.28.6+k3s2
Extract the node token
root@control1:~# cat /var/lib/rancher/k3s/server/node-token
K100ee021e3ebb9cdf1e00ff00d5668748091b06c62a954472482469864719616d7::server:bdb7f1eb05d2678e04777f5132c7f0e7
Install the worker node
root@control1:~# curl -sfL https://get.k3s.io | sh -s - agent --server https://control1.gs.labs:6443 --node-name worker1.gs.labs --kubelet-arg="cloud-provider=external" --kubelet-arg="provider-id=vsphere://4203148f-7bc3-c8ec-5a40-c7283654597f"
[INFO] Finding release for channel stable
[INFO] Using v1.28.6+k3s2 as release
[INFO] Downloading hash https://github.com/k3s-io/k3s/releases/download/v1.28.6+k3s2/sha256sum-amd64.txt
[INFO] Downloading binary https://github.com/k3s-io/k3s/releases/download/v1.28.6+k3s2/k3s
[INFO] Verifying binary download
[INFO] Installing k3s to /usr/local/bin/k3s
[INFO] Skipping installation of SELinux RPM
[INFO] Creating /usr/local/bin/kubectl symlink to k3s
[INFO] Creating /usr/local/bin/crictl symlink to k3s
[INFO] Creating /usr/local/bin/ctr symlink to k3s
[INFO] Creating killall script /usr/local/bin/k3s-killall.sh
[INFO] Creating uninstall script /usr/local/bin/k3s-agent-uninstall.sh
[INFO] env: Creating environment file /etc/systemd/system/k3s-agent.service.env
[INFO] systemd: Creating service file /etc/systemd/system/k3s-agent.service
[INFO] systemd: Enabling k3s-agent unitls
Created symlink /etc/systemd/system/multi-user.target.wants/k3s-agent.service → /etc/systemd/system/k3s-agent.service.
[INFO] Host iptables-save/iptables-restore tools not found
[INFO] Host ip6tables-save/ip6tables-restore tools not found
[INFO] systemd: Starting k3s-agent
root@control1:~# kubectl get nodes
NAME STATUS ROLES AGE VERSION
control1 Ready control-plane,etcd,master 5m9s v1.28.6+k3s2
worker1 Ready <none> 11s v1.28.6+k3s2
Validate that the below taints are set.
root@control1:~# kubectl describe nodes | egrep "Taints:|Name:"
Name: control1
Taints: node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule
Name: worker1
Taints: node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule
# No Pods will be scheulded until the vsphere cloud provider is deployed.
root@control1:~# kubectl get pods -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system coredns-6799fbcd5-7hhth 0/1 Pending 0 9m56s
kube-system helm-install-traefik-crd-27bbg 0/1 Pending 0 9m56s
kube-system helm-install-traefik-l5hxk 0/1 Pending 0 9m56s
kube-system local-path-provisioner-84db5d44d9-6lj95 0/1 Pending 0 9m56s
kube-system metrics-server-67c658944b-qnxrh 0/1 Pending 0 9m56s
Prepare to install the vSphere Cloud Provider
# Set the kubernetes version - "kubectl get nodes" will give you the currrent version
root@control1:~# export VERSION=1.28
# Donload the vSphere Cloud Controller Manager config file
root@control1:~#wget https://raw.githubusercontent.com/kubernetes/cloud-provider-vsphere/release-$VERSION/releases/v$VERSION/vsphere-cloud-controller-manager.yaml
# Modify the config file with environment specific values
root@control1~#cat vsphere-cloud-controller-manager.yaml
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: cloud-controller-manager
labels:
vsphere-cpi-infra: service-account
component: cloud-controller-manager
namespace: kube-system
---
apiVersion: v1
kind: Secret
metadata:
name: vsphere-cloud-secret
labels:
vsphere-cpi-infra: secret
component: cloud-controller-manager
namespace: kube-system
# NOTE: this is just an example configuration, update with real values based on your environment
stringData:
172.16.0.20.username: "administrator@vsphere.local"
172.16.0.20.password: "<PASSWORD>"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: vsphere-cloud-config
labels:
vsphere-cpi-infra: config
component: cloud-controller-manager
namespace: kube-system
data:
# NOTE: this is just an example configuration, update with real values based on your environment
vsphere.conf: |
# Global properties in this section will be used for all specified vCenters unless overriden in VirtualCenter section.
global:
port: 443
# set insecureFlag to true if the vCenter uses a self-signed cert
insecureFlag: true
# settings for using k8s secret
secretName: vsphere-cloud-secret
secretNamespace: kube-system
# vcenter section
vcenter:
vc802:
server: 172.16.0.20
user: administrator@vsphere.local
password: <PASSWORD>
datacenters:
- Sydney
---
Install the vSphere Cloud Provider
root@control1:~# kubectl apply -f vsphere-cloud-controller-manager.yaml
serviceaccount/cloud-controller-manager created
secret/vsphere-cloud-secret created
configmap/vsphere-cloud-config created
rolebinding.rbac.authorization.k8s.io/servicecatalog.k8s.io:apiserver-authentication-reader created
clusterrolebinding.rbac.authorization.k8s.io/system:cloud-controller-manager created
clusterrole.rbac.authorization.k8s.io/system:cloud-controller-manager created
daemonset.apps/vsphere-cloud-controller-manager created
root@control1:~# kubectl get pods -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system coredns-6799fbcd5-7hhth 0/1 Pending 0 35m
kube-system helm-install-traefik-crd-27bbg 0/1 Pending 0 35m
kube-system helm-install-traefik-l5hxk 0/1 Pending 0 35m
kube-system local-path-provisioner-84db5d44d9-6lj95 0/1 Pending 0 35m
kube-system metrics-server-67c658944b-qnxrh 0/1 Pending 0 35m
root@control1:~# kubectl get ds -n kube-system
NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE
vsphere-cloud-controller-manager 1 1 1 1 1 <none> 54s
root@control1:~# kubectl get pods -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system coredns-6799fbcd5-7hhth 1/1 Running 0 36m
kube-system helm-install-traefik-crd-27bbg 0/1 Completed 0 36m
kube-system helm-install-traefik-l5hxk 0/1 Completed 1 36m
kube-system local-path-provisioner-84db5d44d9-6lj95 1/1 Running 0 36m
kube-system metrics-server-67c658944b-qnxrh 1/1 Running 0 36m
kube-system traefik-f4564c4f4-vsjvc 0/1 ContainerCreating 0 3s
kube-system vsphere-cloud-controller-manager-dhmrs 1/1 Running 0 39s
Install Helm
# Export KUBECONFIG
root@control1:~# export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
# Install Helm
root@control1:~# curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
Install metallb
# Add metallb helm repo
root@control1:~# helm repo add metallb https://metallb.github.io/metallb
"metallb" has been added to your repositories
# Install metallb
root@control1:~# helm upgrade --install metallb metallb/metallb --create-namespace --namespace metallb-system --set crds.validationFailurePolicy=Ignore --wait
Release "metallb" does not exist. Installing it now.
NAME: metallb
LAST DEPLOYED: Thu Feb 15 17:31:22 2024
NAMESPACE: metallb-system
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
MetalLB is now running in the cluster.
Now you can configure it via its CRs. Please refer to the metallb official docs
# Validate if the metallb pods are running
root@control1:~# k get pods -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system coredns-6799fbcd5-nxzzn 1/1 Running 0 22m
kube-system helm-install-traefik-8xgmb 0/1 Completed 2 22m
kube-system helm-install-traefik-crd-b5jdg 0/1 Completed 0 22m
kube-system local-path-provisioner-84db5d44d9-m8m52 1/1 Running 0 22m
kube-system metrics-server-67c658944b-jhxc4 1/1 Running 0 22m
kube-system traefik-f4564c4f4-mvslp 1/1 Running 0 19m
kube-system vsphere-cloud-controller-manager-dxtwq 1/1 Running 0 20m
metallb-system metallb-controller-648b76f565-7vdrs 1/1 Running 0 2m9s
metallb-system metallb-speaker-d7sw4 4/4 Running 0 2m9s
metallb-system metallb-speaker-dtccv 4/4 Running 0 2m9s
# Review metallb-webhook-service
root@control1:~# k get svc -A
NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
default kubernetes ClusterIP 10.43.0.1 <none> 443/TCP 23m
kube-system kube-dns ClusterIP 10.43.0.10 <none> 53/UDP,53/TCP,9153/TCP 23m
kube-system metrics-server ClusterIP 10.43.174.89 <none> 443/TCP 23m
kube-system traefik LoadBalancer 10.43.88.253 <pending> 80:32437/TCP,443:31234/TCP 19m
metallb-system metallb-webhook-service ClusterIP 10.43.36.216 <none> 443/TCP 2m28s
#
Create L2 IP Pool config for Metallb. This IP Pool will be used for extneral IPs
# Create the IP Pool
root@control1:~# cat ip-pool.yaml
apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
name: default-pool
namespace: metallb-system
spec:
addresses:
- 172.16.1.201-172.16.1.250
---
apiVersion: metallb.io/v1beta1
kind: L2Advertisement
metadata:
name: default
namespace: metallb-system
spec:
ipAddressPools:
- default-pool
# Apply the ip pool config
root@control1:~# k apply -f ip-pool.yaml
ipaddresspool.metallb.io/default-pool created
l2advertisement.metallb.io/default created
# Validate external ips are being assigned to services
root@control1:~# k get svc -A
NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
default kubernetes ClusterIP 10.43.0.1 <none> 443/TCP 25m
kube-system kube-dns ClusterIP 10.43.0.10 <none> 53/UDP,53/TCP,9153/TCP 25m
kube-system metrics-server ClusterIP 10.43.174.89 <none> 443/TCP 25m
kube-system traefik LoadBalancer 10.43.88.253 172.16.1.201 80:32437/TCP,443:31234/TCP 22m
metallb-system metallb-webhook-service ClusterIP 10.43.36.216 <none> 443/TCP 4m57s
Prepare to Install the vSphere CSI driver
# Create the vmware-system-csi namespace
root@control1:~# kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/vsphere-csi-driver/v3.0.0/manifests/vanilla/namespace.yaml
namespace/vmware-system-csi created
# Taint the control plane nodes
root@control1:~# kubectl taint nodes control1 node-role.kubernetes.io/control-plane=:NoSchedule
node/control1 tainted
root@control1:~# kubectl describe nodes | egrep "Taints:|Name:"
Name: control1
Taints: node-role.kubernetes.io/control-plane:NoSchedule
Name: worker1
Taints: <none>
# Prepare the config for the vsphere-config-secret
root@control1:~# cat csi-vsphere.conf
[Global]
cluster-id = "k3s-cluster-01"
cluster-distribution = "k3s"
[VirtualCenter "<172.16.0.20"]
insecure-flag = "true"
user = "administrator@vsphere.local"
password = "VMware1234!"
port = "443"
datacenters = "Sydney"
targetvSANFileShareDatastoreURLs="ds:///vmfs/volumes/vsan:52f1359c42e51bbb-42b33e05f0cd895a/"
# Create the secret
root@control1:~# kubectl create secret generic vsphere-config-secret --from-file=csi-vsphere.conf --namespace=vmware-system-csi
secret/vsphere-config-secret created
Install the vSphere CSI driver
# Installing CSI driver v3.0.0
root@control1:~# kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/vsphere-csi-driver/v3.0.0/manifests/vanilla/vsphere-csi-driver.yaml
csidriver.storage.k8s.io/csi.vsphere.vmware.com created
serviceaccount/vsphere-csi-controller created
clusterrole.rbac.authorization.k8s.io/vsphere-csi-controller-role created
clusterrolebinding.rbac.authorization.k8s.io/vsphere-csi-controller-binding created
serviceaccount/vsphere-csi-node created
clusterrole.rbac.authorization.k8s.io/vsphere-csi-node-cluster-role created
clusterrolebinding.rbac.authorization.k8s.io/vsphere-csi-node-cluster-role-binding created
role.rbac.authorization.k8s.io/vsphere-csi-node-role created
rolebinding.rbac.authorization.k8s.io/vsphere-csi-node-binding created
configmap/internal-feature-states.csi.vsphere.vmware.com created
service/vsphere-csi-controller created
deployment.apps/vsphere-csi-controller created
daemonset.apps/vsphere-csi-node created
daemonset.apps/vsphere-csi-node-windows created
# Scale the CSI Controller deployment based on the number of control plane nodes.
root@control1:~# k scale deploy vsphere-csi-controller --replicas=1 -n vmware-system-csi
deployment.apps/vsphere-csi-controller scaled
# Review NodeSelector configuraion if the CSI Controller pod does not get created.
root@control1:~# k edit deploy vsphere-csi-controller -n vmware-system-csi
# I had to change the below config
# From:
nodeSelector:
node-role.kubernetes.io/control-plane: ""
# To
nodeSelector:
node-role.kubernetes.io/control-plane: "true"
# Validate that the vsphere-csi-* pods are running
root@control1:~# k get pods -A
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system coredns-6799fbcd5-9hwf7 1/1 Running 0 83m
kube-system helm-install-traefik-9clqx 0/1 Completed 1 83m
kube-system helm-install-traefik-crd-bxtf6 0/1 Completed 0 83m
kube-system local-path-provisioner-84db5d44d9-qws7p 1/1 Running 0 83m
kube-system metrics-server-67c658944b-jnh4m 1/1 Running 0 83m
kube-system traefik-f4564c4f4-mnshc 1/1 Running 0 76m
kube-system vsphere-cloud-controller-manager-k5jct 1/1 Running 0 76m
metallb-system metallb-controller-648b76f565-kmdx2 1/1 Running 0 58m
metallb-system metallb-speaker-9hm5c 4/4 Running 0 58m
metallb-system metallb-speaker-rpgr9 4/4 Running 0 58m
vmware-system-csi vsphere-csi-controller-656ddc7cf9-zgswg 7/7 Running 0 4m24s
vmware-system-csi vsphere-csi-node-d49t9 3/3 Running 5 (4m26s ago) 6m38s
vmware-system-csi vsphere-csi-node-pn4jc 3/3 Running 5 (4m47s ago) 6m38s