1
0

ceph seems broken

This commit is contained in:
michael 2022-11-03 01:34:34 +13:00
parent d9a28d21a8
commit 5bd82a4d33
4 changed files with 286 additions and 82 deletions

View File

@ -0,0 +1,55 @@
# This probbaly won't work, as multiple OSDs are required
# on a physical disk in order to place it in multiple pools
# HDD should probably be placed in an EC fs only.
apiVersion: ceph.rook.io/v1
kind: CephBlockPool
metadata:
name: hdd-block-ec-pool
namespace: rook-ceph
spec:
failureDomain: host
erasureCoded:
dataChunks: 2
codingChunks: 1
deviceClass: hdd
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: rook-ssd-block-replica
# Change "rook-ceph" provisioner prefix to match the operator namespace if needed
provisioner: rook-ceph.rbd.csi.ceph.com
parameters:
#Parameters are relative to the provisioner being referenced.
# clusterID is the namespace where the rook cluster is running
clusterID: rook-ceph
# Ceph pool into which the RBD image shall be created
pool: ssd-block-replica-pool
# RBD image format. Defaults to "2".
# Options are only 1 (old-gen) or 2 (current).
imageFormat: "2"
# RBD image features. Available for imageFormat: "2". CSI RBD currently supports only `layering` feature.
imageFeatures: layering
# The secrets contain Ceph admin credentials.
csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph
csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph
# Specify the filesystem type of the volume. If not specified, csi-provisioner
# will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock
# in hyperconverged settings where the volume is mounted on the same node as the osds.
csi.storage.k8s.io/fstype: ext4
# Delete the rbd volume when a PVC is deleted
reclaimPolicy: Retain
# Optional, if you want to add dynamic resize for PVC.
allowVolumeExpansion: true

View File

@ -0,0 +1,48 @@
apiVersion: ceph.rook.io/v1
kind: CephBlockPool
metadata:
name: ssd-block-replica-pool
namespace: rook-ceph
spec:
failureDomain: host
replicated:
size: 3
deviceClass: ssd
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: rook-ssd-block-replica
# Change "rook-ceph" provisioner prefix to match the operator namespace if needed
provisioner: rook-ceph.rbd.csi.ceph.com
parameters:
#Parameters are relative to the provisioner being referenced.
# clusterID is the namespace where the rook cluster is running
clusterID: rook-ceph
# Ceph pool into which the RBD image shall be created
pool: ssd-block-replica-pool
# RBD image format. Defaults to "2".
# Options are only 1 (old-gen) or 2 (current).
imageFormat: "2"
# RBD image features. Available for imageFormat: "2". CSI RBD currently supports only `layering` feature.
imageFeatures: layering
# The secrets contain Ceph admin credentials.
csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph
csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph
# Specify the filesystem type of the volume. If not specified, csi-provisioner
# will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock
# in hyperconverged settings where the volume is mounted on the same node as the osds.
csi.storage.k8s.io/fstype: ext4
# Delete the rbd volume when a PVC is deleted
reclaimPolicy: Retain
# Optional, if you want to add dynamic resize for PVC.
allowVolumeExpansion: true

View File

@ -0,0 +1,91 @@
apiVersion: ceph.rook.io/v1
kind: CephFilesystem
metadata:
name: fs-pool
namespace: rook-ceph
spec:
metadataPool:
failureDomain: host
replicated:
size: 3
deviceClass: ssd
dataPools:
- name: ssd-replica
failureDomain: host
replicated:
size: 3
deviceClass: ssd
- name: hdd-ec
failureDomain: host
erasureCoded:
dataChunks: 2
codingChunks: 1
deviceClass: hdd
preserveFilesystemOnDelete: true
metadataServer:
activeCount: 1
activeStandby: true
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: rook-fs-ssd-replica
# Change "rook-ceph" provisioner prefix to match the operator namespace if needed
provisioner: rook-ceph.cephfs.csi.ceph.com
parameters:
#Parameters are relative to the provisioner being referenced.
# clusterID is the namespace where the rook cluster is running
clusterID: rook-ceph
# CephFS filesystem name into which the volume shall be created
fsName: fs-pool
# Ceph pool into which the volume shall be created
# Required for provisionVolume: "true"
pool: ssd-replica
# The secrets contain Ceph admin credentials. These are generated automatically by the operator
# in the same namespace as the cluster.
csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner
csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner
csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph
csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node
csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph
reclaimPolicy: Retain
allowVolumeExpansion: true
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: rook-fs-hdd-ec
# Change "rook-ceph" provisioner prefix to match the operator namespace if needed
provisioner: rook-ceph.cephfs.csi.ceph.com
parameters:
#Parameters are relative to the provisioner being referenced.
# clusterID is the namespace where the rook cluster is running
clusterID: rook-ceph
# CephFS filesystem name into which the volume shall be created
fsName: fs-pool
# Ceph pool into which the volume shall be created
# Required for provisionVolume: "true"
pool: hdd-ec
# The secrets contain Ceph admin credentials. These are generated automatically by the operator
# in the same namespace as the cluster.
csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner
csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph
csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner
csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph
csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node
csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph
reclaimPolicy: Retain
allowVolumeExpansion: true

View File

@ -1,91 +1,101 @@
--- ---
- name: create target directory for rook files # - name: create target directory for rook files
ansible.builtin.file: # ansible.builtin.file:
path: "{{ ansible_search_path[0] }}/files/rook" # path: "{{ ansible_search_path[0] }}/files/rook"
state: directory # state: directory
mode: 0775 # mode: 0775
- name: load rbd kernel module # - name: load rbd kernel module
become: true # become: true
delegate_to: "{{ item }}" # delegate_to: "{{ item }}"
with_items: "{{ groups['k8s_worker'] }}" # with_items: "{{ groups['k8s_worker'] }}"
community.general.modprobe: # community.general.modprobe:
name: rbd # name: rbd
state: present # state: present
- name: set rbd kernel module to load at boot # - name: set rbd kernel module to load at boot
become: true # become: true
delegate_to: "{{ item }}" # delegate_to: "{{ item }}"
with_items: "{{ groups['k8s_worker'] }}" # with_items: "{{ groups['k8s_worker'] }}"
ansible.builtin.copy: # ansible.builtin.copy:
dest: /etc/modules-load.d/rbd.conf # dest: /etc/modules-load.d/rbd.conf
content: rbd # content: rbd
owner: root # owner: root
group: root # group: root
mode: 0660 # mode: 0660
- name: install lvm2 package # - name: install lvm2 package
become: true # become: true
delegate_to: "{{ item }}" # delegate_to: "{{ item }}"
with_items: "{{ groups['k8s_worker'] }}" # with_items: "{{ groups['k8s_worker'] }}"
community.general.pacman: # community.general.pacman:
name: lvm2 # name: lvm2
state: latest # state: latest
update_cache: true # update_cache: true
- name: download the rook manifests # - name: download the rook manifests
ansible.builtin.uri: # ansible.builtin.uri:
url: https://raw.githubusercontent.com/rook/rook/{{ rook_version }}/deploy/examples/{{ item }}.yaml # url: https://raw.githubusercontent.com/rook/rook/{{ rook_version }}/deploy/examples/{{ item }}.yaml
dest: "{{ ansible_search_path[0] }}/files/rook/rook_{{ item }}_{{ rook_version }}.yaml" # dest: "{{ ansible_search_path[0] }}/files/rook/rook_{{ item }}_{{ rook_version }}.yaml"
creates: "{{ ansible_search_path[0] }}/files/rook/rook_{{ item }}_{{ rook_version }}.yaml" # creates: "{{ ansible_search_path[0] }}/files/rook/rook_{{ item }}_{{ rook_version }}.yaml"
mode: 0664 # mode: 0664
with_items: # with_items:
- crds # - crds
- common # - common
- operator # - operator
- cluster # - cluster
- name: deploy the rook manifest # The order of the items is important, crds > common > operator , see https://github.com/rook/rook/blob/v1.10.4/deploy/examples/common.yaml # - name: deploy the rook manifest # The order of the items is important, crds > common > operator , see https://github.com/rook/rook/blob/v1.10.4/deploy/examples/common.yaml
# kubernetes.core.k8s:
# src: "{{ ansible_search_path[0] }}/files/rook/rook_{{ item }}_{{ rook_version }}.yaml"
# state: present
# with_items:
# - crds
# - common
# - operator
# # TODO somehow turn this command:
# # kubectl -n rook-ceph get pod -o json | jq '.items[].status.containerStatuses[].ready'
# # into a gate, not proceeding until it returns true, and timing out at some limit, ~2m
# - name: read the default rook cluster config into memory
# ansible.builtin.slurp:
# src: "{{ ansible_search_path[0] }}/files/rook/rook_cluster_{{ rook_version }}.yaml"
# register: rook_file_raw
# - name: parse rook cluster settings from the file data
# ansible.builtin.set_fact:
# rook_default_cluster: "{{ rook_file_raw['content'] | b64decode | from_yaml }}"
# - name: update the rook cluster settings with desired changes
# ansible.utils.update_fact:
# updates:
# - path: rook_default_cluster.spec.storage.useAllDevices
# value: "{{ k8s_storage_all_devices }}"
# - path: rook_default_cluster.spec.storage.deviceFilter
# value: "{{ k8s_storage_device_filter }}"
# register: rook_updated_cluster
# - name: debug the updated rook cluster settings
# ansible.builtin.debug:
# var: rook_updated_cluster.rook_default_cluster
# - name: write the updated rook cluster settings out to file
# ansible.builtin.copy:
# content: "{{ rook_updated_cluster.rook_default_cluster | to_nice_yaml }}"
# dest: "{{ ansible_search_path[0] }}/files/rook/rook_cluster_modified.yaml"
# - name: apply the rook cluster manifest
# kubernetes.core.k8s:
# src: "{{ ansible_search_path[0] }}/files/rook/rook_cluster_modified.yaml"
# state: present
# TODO create a check and wait until cluster is created and running
- name: create the storage providers
kubernetes.core.k8s: kubernetes.core.k8s:
src: "{{ ansible_search_path[0] }}/files/rook/rook_{{ item }}_{{ rook_version }}.yaml" src: "{{ ansible_search_path[0] }}/files/config/{{ item }}"
state: present state: present
with_items: with_items:
- crds - blockpool_ssd_replica.yaml
- common - filesystem_multi.yaml
- operator
# TODO somehow turn this command:
# kubectl -n rook-ceph get pod -o json | jq '.items[].status.containerStatuses[].ready'
# into a gate, not proceeding until it returns true, and timing out at some limit, ~2m
- name: read the default rook cluster config into memory
ansible.builtin.slurp:
src: "{{ ansible_search_path[0] }}/files/rook/rook_cluster_{{ rook_version }}.yaml"
register: rook_file_raw
- name: parse rook cluster settings from the file data
ansible.builtin.set_fact:
rook_default_cluster: "{{ rook_file_raw['content'] | b64decode | from_yaml }}"
- name: update the rook cluster settings with desired changes
ansible.utils.update_fact:
updates:
- path: rook_default_cluster.spec.storage.useAllDevices
value: "{{ k8s_storage_all_devices }}"
- path: rook_default_cluster.spec.storage.deviceFilter
value: "{{ k8s_storage_device_filter }}"
register: rook_updated_cluster
- name: debug the updated rook cluster settings
ansible.builtin.debug:
var: rook_updated_cluster.rook_default_cluster
- name: write the updated rook cluster settings out to file
ansible.builtin.copy:
content: "{{ rook_updated_cluster.rook_default_cluster | to_nice_yaml }}"
dest: "{{ ansible_search_path[0] }}/files/rook/rook_cluster_modified.yaml"
- name: apply the rook cluster manifest
kubernetes.core.k8s:
src: "{{ ansible_search_path[0] }}/files/rook/rook_cluster_modified.yaml"
state: present