From 5bd82a4d3318aac66682601842f6945caa8fb0c1 Mon Sep 17 00:00:00 2001 From: michael Date: Thu, 3 Nov 2022 01:34:34 +1300 Subject: [PATCH] ceph seems broken --- .../files/config/blockpool_hdd_ec.yaml | 55 ++++++ .../files/config/blockpool_ssd_replica.yaml | 48 +++++ .../files/config/filesystem_multi.yaml | 91 +++++++++ .../roles/k8s_storage_deploy/tasks/main.yaml | 174 +++++++++--------- 4 files changed, 286 insertions(+), 82 deletions(-) create mode 100644 ansible/roles/k8s_storage_deploy/files/config/blockpool_hdd_ec.yaml create mode 100644 ansible/roles/k8s_storage_deploy/files/config/blockpool_ssd_replica.yaml create mode 100644 ansible/roles/k8s_storage_deploy/files/config/filesystem_multi.yaml diff --git a/ansible/roles/k8s_storage_deploy/files/config/blockpool_hdd_ec.yaml b/ansible/roles/k8s_storage_deploy/files/config/blockpool_hdd_ec.yaml new file mode 100644 index 0000000..6ff656a --- /dev/null +++ b/ansible/roles/k8s_storage_deploy/files/config/blockpool_hdd_ec.yaml @@ -0,0 +1,55 @@ +# This probbaly won't work, as multiple OSDs are required +# on a physical disk in order to place it in multiple pools + +# HDD should probably be placed in an EC fs only. + + +apiVersion: ceph.rook.io/v1 +kind: CephBlockPool +metadata: + name: hdd-block-ec-pool + namespace: rook-ceph +spec: + failureDomain: host + erasureCoded: + dataChunks: 2 + codingChunks: 1 + deviceClass: hdd + +--- + +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: rook-ssd-block-replica +# Change "rook-ceph" provisioner prefix to match the operator namespace if needed +provisioner: rook-ceph.rbd.csi.ceph.com +parameters: + #Parameters are relative to the provisioner being referenced. + # clusterID is the namespace where the rook cluster is running + clusterID: rook-ceph + # Ceph pool into which the RBD image shall be created + pool: ssd-block-replica-pool + # RBD image format. Defaults to "2". + # Options are only 1 (old-gen) or 2 (current). + imageFormat: "2" + # RBD image features. Available for imageFormat: "2". CSI RBD currently supports only `layering` feature. + imageFeatures: layering + # The secrets contain Ceph admin credentials. + csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph + csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph + csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node + csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph + + # Specify the filesystem type of the volume. If not specified, csi-provisioner + # will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock + # in hyperconverged settings where the volume is mounted on the same node as the osds. + csi.storage.k8s.io/fstype: ext4 + +# Delete the rbd volume when a PVC is deleted +reclaimPolicy: Retain + +# Optional, if you want to add dynamic resize for PVC. +allowVolumeExpansion: true \ No newline at end of file diff --git a/ansible/roles/k8s_storage_deploy/files/config/blockpool_ssd_replica.yaml b/ansible/roles/k8s_storage_deploy/files/config/blockpool_ssd_replica.yaml new file mode 100644 index 0000000..5d03966 --- /dev/null +++ b/ansible/roles/k8s_storage_deploy/files/config/blockpool_ssd_replica.yaml @@ -0,0 +1,48 @@ +apiVersion: ceph.rook.io/v1 +kind: CephBlockPool +metadata: + name: ssd-block-replica-pool + namespace: rook-ceph +spec: + failureDomain: host + replicated: + size: 3 + deviceClass: ssd + +--- + +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: rook-ssd-block-replica +# Change "rook-ceph" provisioner prefix to match the operator namespace if needed +provisioner: rook-ceph.rbd.csi.ceph.com +parameters: + #Parameters are relative to the provisioner being referenced. + # clusterID is the namespace where the rook cluster is running + clusterID: rook-ceph + # Ceph pool into which the RBD image shall be created + pool: ssd-block-replica-pool + # RBD image format. Defaults to "2". + # Options are only 1 (old-gen) or 2 (current). + imageFormat: "2" + # RBD image features. Available for imageFormat: "2". CSI RBD currently supports only `layering` feature. + imageFeatures: layering + # The secrets contain Ceph admin credentials. + csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph + csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph + csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node + csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph + + # Specify the filesystem type of the volume. If not specified, csi-provisioner + # will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock + # in hyperconverged settings where the volume is mounted on the same node as the osds. + csi.storage.k8s.io/fstype: ext4 + +# Delete the rbd volume when a PVC is deleted +reclaimPolicy: Retain + +# Optional, if you want to add dynamic resize for PVC. +allowVolumeExpansion: true \ No newline at end of file diff --git a/ansible/roles/k8s_storage_deploy/files/config/filesystem_multi.yaml b/ansible/roles/k8s_storage_deploy/files/config/filesystem_multi.yaml new file mode 100644 index 0000000..71af2ed --- /dev/null +++ b/ansible/roles/k8s_storage_deploy/files/config/filesystem_multi.yaml @@ -0,0 +1,91 @@ +apiVersion: ceph.rook.io/v1 +kind: CephFilesystem +metadata: + name: fs-pool + namespace: rook-ceph +spec: + metadataPool: + failureDomain: host + replicated: + size: 3 + deviceClass: ssd + dataPools: + - name: ssd-replica + failureDomain: host + replicated: + size: 3 + deviceClass: ssd + - name: hdd-ec + failureDomain: host + erasureCoded: + dataChunks: 2 + codingChunks: 1 + deviceClass: hdd + preserveFilesystemOnDelete: true + metadataServer: + activeCount: 1 + activeStandby: true + +--- + +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: rook-fs-ssd-replica +# Change "rook-ceph" provisioner prefix to match the operator namespace if needed +provisioner: rook-ceph.cephfs.csi.ceph.com +parameters: + #Parameters are relative to the provisioner being referenced. + # clusterID is the namespace where the rook cluster is running + clusterID: rook-ceph + + # CephFS filesystem name into which the volume shall be created + fsName: fs-pool + + # Ceph pool into which the volume shall be created + # Required for provisionVolume: "true" + pool: ssd-replica + + # The secrets contain Ceph admin credentials. These are generated automatically by the operator + # in the same namespace as the cluster. + csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner + csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph + csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner + csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph + csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node + csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph + +reclaimPolicy: Retain +allowVolumeExpansion: true + +--- + +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: rook-fs-hdd-ec +# Change "rook-ceph" provisioner prefix to match the operator namespace if needed +provisioner: rook-ceph.cephfs.csi.ceph.com +parameters: + #Parameters are relative to the provisioner being referenced. + # clusterID is the namespace where the rook cluster is running + clusterID: rook-ceph + + # CephFS filesystem name into which the volume shall be created + fsName: fs-pool + + # Ceph pool into which the volume shall be created + # Required for provisionVolume: "true" + pool: hdd-ec + + # The secrets contain Ceph admin credentials. These are generated automatically by the operator + # in the same namespace as the cluster. + csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner + csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph + csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner + csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph + csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node + csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph + +reclaimPolicy: Retain +allowVolumeExpansion: true \ No newline at end of file diff --git a/ansible/roles/k8s_storage_deploy/tasks/main.yaml b/ansible/roles/k8s_storage_deploy/tasks/main.yaml index 46c8296..9a66302 100644 --- a/ansible/roles/k8s_storage_deploy/tasks/main.yaml +++ b/ansible/roles/k8s_storage_deploy/tasks/main.yaml @@ -1,91 +1,101 @@ --- -- name: create target directory for rook files - ansible.builtin.file: - path: "{{ ansible_search_path[0] }}/files/rook" - state: directory - mode: 0775 +# - name: create target directory for rook files +# ansible.builtin.file: +# path: "{{ ansible_search_path[0] }}/files/rook" +# state: directory +# mode: 0775 -- name: load rbd kernel module - become: true - delegate_to: "{{ item }}" - with_items: "{{ groups['k8s_worker'] }}" - community.general.modprobe: - name: rbd - state: present +# - name: load rbd kernel module +# become: true +# delegate_to: "{{ item }}" +# with_items: "{{ groups['k8s_worker'] }}" +# community.general.modprobe: +# name: rbd +# state: present -- name: set rbd kernel module to load at boot - become: true - delegate_to: "{{ item }}" - with_items: "{{ groups['k8s_worker'] }}" - ansible.builtin.copy: - dest: /etc/modules-load.d/rbd.conf - content: rbd - owner: root - group: root - mode: 0660 +# - name: set rbd kernel module to load at boot +# become: true +# delegate_to: "{{ item }}" +# with_items: "{{ groups['k8s_worker'] }}" +# ansible.builtin.copy: +# dest: /etc/modules-load.d/rbd.conf +# content: rbd +# owner: root +# group: root +# mode: 0660 -- name: install lvm2 package - become: true - delegate_to: "{{ item }}" - with_items: "{{ groups['k8s_worker'] }}" - community.general.pacman: - name: lvm2 - state: latest - update_cache: true +# - name: install lvm2 package +# become: true +# delegate_to: "{{ item }}" +# with_items: "{{ groups['k8s_worker'] }}" +# community.general.pacman: +# name: lvm2 +# state: latest +# update_cache: true -- name: download the rook manifests - ansible.builtin.uri: - url: https://raw.githubusercontent.com/rook/rook/{{ rook_version }}/deploy/examples/{{ item }}.yaml - dest: "{{ ansible_search_path[0] }}/files/rook/rook_{{ item }}_{{ rook_version }}.yaml" - creates: "{{ ansible_search_path[0] }}/files/rook/rook_{{ item }}_{{ rook_version }}.yaml" - mode: 0664 - with_items: - - crds - - common - - operator - - cluster +# - name: download the rook manifests +# ansible.builtin.uri: +# url: https://raw.githubusercontent.com/rook/rook/{{ rook_version }}/deploy/examples/{{ item }}.yaml +# dest: "{{ ansible_search_path[0] }}/files/rook/rook_{{ item }}_{{ rook_version }}.yaml" +# creates: "{{ ansible_search_path[0] }}/files/rook/rook_{{ item }}_{{ rook_version }}.yaml" +# mode: 0664 +# with_items: +# - crds +# - common +# - operator +# - cluster -- name: deploy the rook manifest # The order of the items is important, crds > common > operator , see https://github.com/rook/rook/blob/v1.10.4/deploy/examples/common.yaml +# - name: deploy the rook manifest # The order of the items is important, crds > common > operator , see https://github.com/rook/rook/blob/v1.10.4/deploy/examples/common.yaml +# kubernetes.core.k8s: +# src: "{{ ansible_search_path[0] }}/files/rook/rook_{{ item }}_{{ rook_version }}.yaml" +# state: present +# with_items: +# - crds +# - common +# - operator + +# # TODO somehow turn this command: +# # kubectl -n rook-ceph get pod -o json | jq '.items[].status.containerStatuses[].ready' +# # into a gate, not proceeding until it returns true, and timing out at some limit, ~2m + +# - name: read the default rook cluster config into memory +# ansible.builtin.slurp: +# src: "{{ ansible_search_path[0] }}/files/rook/rook_cluster_{{ rook_version }}.yaml" +# register: rook_file_raw + +# - name: parse rook cluster settings from the file data +# ansible.builtin.set_fact: +# rook_default_cluster: "{{ rook_file_raw['content'] | b64decode | from_yaml }}" + +# - name: update the rook cluster settings with desired changes +# ansible.utils.update_fact: +# updates: +# - path: rook_default_cluster.spec.storage.useAllDevices +# value: "{{ k8s_storage_all_devices }}" +# - path: rook_default_cluster.spec.storage.deviceFilter +# value: "{{ k8s_storage_device_filter }}" +# register: rook_updated_cluster + +# - name: debug the updated rook cluster settings +# ansible.builtin.debug: +# var: rook_updated_cluster.rook_default_cluster + +# - name: write the updated rook cluster settings out to file +# ansible.builtin.copy: +# content: "{{ rook_updated_cluster.rook_default_cluster | to_nice_yaml }}" +# dest: "{{ ansible_search_path[0] }}/files/rook/rook_cluster_modified.yaml" + +# - name: apply the rook cluster manifest +# kubernetes.core.k8s: +# src: "{{ ansible_search_path[0] }}/files/rook/rook_cluster_modified.yaml" +# state: present + +# TODO create a check and wait until cluster is created and running + +- name: create the storage providers kubernetes.core.k8s: - src: "{{ ansible_search_path[0] }}/files/rook/rook_{{ item }}_{{ rook_version }}.yaml" + src: "{{ ansible_search_path[0] }}/files/config/{{ item }}" state: present with_items: - - crds - - common - - operator - -# TODO somehow turn this command: -# kubectl -n rook-ceph get pod -o json | jq '.items[].status.containerStatuses[].ready' -# into a gate, not proceeding until it returns true, and timing out at some limit, ~2m - -- name: read the default rook cluster config into memory - ansible.builtin.slurp: - src: "{{ ansible_search_path[0] }}/files/rook/rook_cluster_{{ rook_version }}.yaml" - register: rook_file_raw - -- name: parse rook cluster settings from the file data - ansible.builtin.set_fact: - rook_default_cluster: "{{ rook_file_raw['content'] | b64decode | from_yaml }}" - -- name: update the rook cluster settings with desired changes - ansible.utils.update_fact: - updates: - - path: rook_default_cluster.spec.storage.useAllDevices - value: "{{ k8s_storage_all_devices }}" - - path: rook_default_cluster.spec.storage.deviceFilter - value: "{{ k8s_storage_device_filter }}" - register: rook_updated_cluster - -- name: debug the updated rook cluster settings - ansible.builtin.debug: - var: rook_updated_cluster.rook_default_cluster - -- name: write the updated rook cluster settings out to file - ansible.builtin.copy: - content: "{{ rook_updated_cluster.rook_default_cluster | to_nice_yaml }}" - dest: "{{ ansible_search_path[0] }}/files/rook/rook_cluster_modified.yaml" - -- name: apply the rook cluster manifest - kubernetes.core.k8s: - src: "{{ ansible_search_path[0] }}/files/rook/rook_cluster_modified.yaml" - state: present \ No newline at end of file + - blockpool_ssd_replica.yaml + - filesystem_multi.yaml \ No newline at end of file