1
0

Add k8s shutdown/openup scripts

Add nut ansible roles
Add acme certificate ansible role
This commit is contained in:
michael 2024-01-11 01:11:16 +13:00
parent 92df824252
commit 7eddbba696
25 changed files with 450 additions and 43 deletions

View File

@ -0,0 +1,6 @@
acme_certificate_csr_organization: Balsillie Family
acme_certificate_csr_locality: Queenstown
acme_certificate_csr_state: Otago
acme_certificate_csr_country: NZ
acme_certificate_csr_email: admin@balsillie.net
acme_certificate_directory: https://acme-v02.api.letsencrypt.org/directory

View File

@ -1 +0,0 @@
---

View File

@ -0,0 +1,2 @@
nut_client_primary_username: nut-primary
nut_client_secondary_username: nut-secondary

View File

@ -0,0 +1,3 @@
rfc2136_key_algorithm: hmac-sha256
rfc2136_key_name: rndc-house
rfc2136_server_address: 10.208.240.1

View File

@ -1,2 +0,0 @@
nut_packages:
nut

View File

@ -1,2 +0,0 @@
nut_packages:
nut

View File

@ -0,0 +1 @@
acme_certifcate_account_email: acme.hv00@balsillie.email

View File

@ -1,7 +1,9 @@
ansible_connection: ssh ansible_connection: ssh
ansible_host: 192.168.1.250 ansible_host: hv00.balsillie.house
ansible_fqdn: hv00.balsillie.house ansible_fqdn: hv00.balsillie.house
ansible_remote_addr: 10.192.110.100
ansible_port: 22 ansible_port: 22
ansible_user: ladmin
ansible_become_user: root
ansible_become_method: sudo ansible_become_method: sudo
static_fqdn: hv00.balsillie.house static_fqdn: hv00.balsillie.house

View File

@ -1,13 +0,0 @@
$ANSIBLE_VAULT;1.1;AES256
65303065306531633065386131316639323033623166636331386435393231623763356336646337
3430333966353561336334333332343130643065323663610a393664353431623037363731373837
61653866666536383365393434613933393437343135346430643136396236313138613762316438
3439303064366639380a316563666330306636613734666136633066656234363936623536383130
65363364393937343231346133343435383336366464666661663432663663316337356637643165
34303238653334663764633534393237643639636435633436353862663533346634396339343935
34396363306461623564623566356139613564633136313965386337373138316365383732663139
34396438636436376566323435316430376261323835303231663735373465326666666161616330
33663132613733663337393636643736313863643566343366633032396134303462656162376432
62666563376663323537396638306233346238306434643434366131656438303035666265613336
37336135373061393036326633333137356531303038613061373638306435396135383365323265
33623061633139626431

View File

@ -0,0 +1,14 @@
nut_client_local_server: true
nut_client_shutdown_cmd: /usr/bin/poweroff
nut_client_shutdown_exit: "true"
nut_client_hostsync: 120
nut_client_notify_cmd: /scripts/notify.sh
nut_client_ups_devices:
- name: ups0
host: hv00.balsillie.house
type: primary
port: 3493
powervalue: 1
nut_client_notify_flags:
- name: SHUTDOWN
flags: EXEC

View File

@ -2,6 +2,6 @@ nut_server_listen_address: 10.192.110.100
nut_server_listen_port: 3493 nut_server_listen_port: 3493
nut_server_ups_devices: nut_server_ups_devices:
- name: APC 650 - name: ups0
driver: usbhid-ups driver: usbhid-ups
port: auto port: auto

View File

@ -0,0 +1 @@
acme_certifcate_account_email: acme.kube00@balsillie.email

View File

@ -1,8 +1,8 @@
--- nut_client_local_server: false
nut_client_server_list: nut_client_server_list:
- host: hv00.balsillie.house - host: hv00.balsillie.house
port: 3493 port: 3493
ssl: true ssl: true
username: "{{ nut_client_username }}" username: "{{ nut_client_username }}"
password: "{{ nut_client_password }}" password: "{{ nut_client_password }}"

View File

@ -44,6 +44,7 @@ all:
hv00.balsillie.house: hv00.balsillie.house:
nut_clients: nut_clients:
hosts: hosts:
hv00.balsillie.house:
kube00.balsillie.house: kube00.balsillie.house:
workstations: workstations:
children: children:

View File

@ -3,26 +3,30 @@
- nut_servers - nut_servers
- nut_clients - nut_clients
become: true become: true
gather_facts: true
tasks: tasks:
- name: Install NUT packages - name: Install NUT package on Archlinux
when: ansible_facts['os_family'] == "Archlinux" when: ansible_facts['os_family'] == "Archlinux"
community.general.pacman: community.general.pacman:
name: "{{ nut_packages }}" name: nut
state: latest state: latest
update_cache: true update_cache: true
upgrade: false upgrade: false
- name: Setup NUT servers - name: Setup NUT servers
gather_facts: true gather_facts: false
hosts: nut_servers hosts: nut_servers
become: true become: true
roles: roles:
- ssl_certificate - role: acme_certificate
- nut_server vars:
acme_certificate_subject: "{{ ansible_hostname }}"
acme_certificate_zone: balsillie.house
- role: nut_server
- name: Setup NUT clients - name: Setup NUT clients
gather_facts: true gather_facts: false
hosts: nut_clients hosts: nut_clients
become: true become: true
roles: roles:

View File

@ -0,0 +1,128 @@
---
- name: Create ACME account directory
ansible.builtin.file:
group: root
mode: '0700'
owner: root
path: /etc/ssl/private/ACME
state: directory
- name: Create ACME account key
community.crypto.openssl_privatekey:
cipher: auto
curve: secp384r1
format: pkcs1
group: root
mode: '0600'
owner: root
passphrase: "{{ acme_certificate_account_key_passphrase }}"
path: /etc/ssl/private/ACME/account.key
size: 384
state: present
type: Ed25519
- name: Generate RSA private key
community.crypto.openssl_privatekey:
cipher: auto
curve: secp384r1
format: pkcs1
group: root
mode: '0600'
owner: root
passphrase: "{{ ssl_passphrase }}"
path: "/etc/ssl/private/{{ acme_certificate_subject }}.key"
size: 4096
state: present
type: RSA
- name: Generate CSR
community.crypto.openssl_csr:
common_name: "{{ acme_certificate_subject }}"
country_name: "{{ acme_certificate_csr_country }}"
digest: sha256
email_address: "{{ acme_certificate_csr_email }}"
group: root
locality_name: "{{ acme_certificate_csr_locality }}"
mode: '0600'
organization_name: "{{ acme_certificate_csr_organization }}"
owner: root
path: "/etc/ssl/private/{{ acme_certificate_subject }}.csr"
privatekey_path: "/etc/ssl/private/{{ acme_certificate_subject }}.key"
state: present
state_or_province_name: "{{ acme_certificate_csr_state }}"
use_common_name_for_san: true
- name: Submit ACME certificate request
community.crypto.acme_certificate:
account_email: "{{ acme_certificate_account_email }}"
account_key_passphrase: "{{ acme_certificate_account_key_passphrase }}"
account_key_src: /etc/ssl/private/ACME/account.key
acme_directory: "{{ acme_certificate_directory }}"
acme_version: 2
chain_dest: "/etc/ssl/private/{{ acme_certificate_subject }}.chain"
challenge: dns-01
csr: "/etc/ssl/private/{{ acme_certificate_subject }}.csr"
dest: "/etc/ssl/private/{{ acme_certificate_subject }}.crt"
modify_account: true
select_crypto_backend: cryptography
terms_agreed: true
validate_certs: true
register: challenge
- name: Debug ACME certificate challenge
ansible.builtin.debug:
var: challenge
- name: Proceed if challenge is changed
when:
- challenge is changed
- acme_certificate_subject in challenge.challenge_data
block:
- name: Answer ACME certificate challenge
community.general.nsupdate:
key_algorithm: "{{ rfc2136_key_algorithm }}"
key_name: "{{ rfc2136_key_name }}"
key_secret: "{{ rfc2136_key_secret }}"
port: 53
protocol: tcp
record: "{{ challenge.challenge_data[acme_certificate_subject]['dns-01'].record }}"
server: "{{ rfc2136_server_address }}"
state: present
ttl: 3600
type: TXT
value: "{{ challenge.challenge_data[acme_certificate_subject]['dns-01'].resource_value }}"
zone: "{{ acme_certificate_zone }}"
- name: Retrieve ACME certificate
community.crypto.acme_certificate:
account_email: "{{ acme_certificate_account_email }}"
account_key_passphrase: "{{ acme_certificate_account_key_passphrase }}"
account_key_src: /etc/ssl/private/ACME/account.key
acme_directory: "{{ acme_certificate_directory }}"
acme_version: 2
chain_dest: "/etc/ssl/private/{{ acme_certificate_subject }}.chain"
challenge: dns-01
csr: "/etc/ssl/private/{{ acme_certificate_subject }}.csr"
data: "{{ challenge }}"
dest: "/etc/ssl/private/{{ acme_certificate_subject }}.crt"
modify_account: true
select_crypto_backend: cryptography
terms_agreed: true
validate_certs: true
- name: Cleanup ACME challenge
community.general.nsupdate:
key_algorithm: "{{ rfc2136_key_algorithm }}"
key_name: "{{ rfc2136_key_name }}"
key_secret: "{{ rfc2136_key_secret }}"
port: 53
protocol: tcp
record: "{{ challenge.challenge_data[acme_certificate_subject]['dns-01'].record }}"
server: "{{ rfc2136_server_address }}"
state: absent
ttl: 3600
type: TXT
value: "{{ challenge.challenge_data[acme_certificate_subject]['dns-01'].resource_value }}"
zone: "{{ acme_certificate_zone }}"

View File

@ -0,0 +1,3 @@
[Unit]
Requires=network-online.target
After=network-online.target

View File

@ -0,0 +1,23 @@
- name: Template out upsmon.conf
ansible.builtin.template:
src: upsmon.conf.j2
dest: /etc/nut/upsmon.conf
trim_blocks: true
owner: root
group: nut
mode: '0640'
- name: Copy nut-monitor systemd drop in file
when: not ( nut_client_local_server | default(true) )
ansible.builtin.template:
src: nut-monitor_override.conf
dest: /etc/systemd/system/nut-monitor.service.d/override.conf
owner: root
group: root
mode: '0644'
- name: Start and enable nut-monitor
ansible.builtin.service:
name: nut-monitor
state: restarted
enabled: true

View File

@ -0,0 +1,33 @@
# File configured by Ansible playbook
# Configuration reference:
# https://man.archlinux.org/man/upsmon.conf.5
{% for ups in nut_client_ups_devices %}
{% if ups.type == 'primary' %}
MONITOR {{ ups.name }}@{{ ups.host | default('localhost') }}:{{ ups.port | default(3493) }} {{ ups.powervalue | default('1') }} {{ nut_client_primary_username }} {{ nut_client_primary_password }} primary
{% elif ups.type == 'secondary' %}
MONITOR {{ ups.name }}@{{ ups.host | default('localhost') }}:{{ ups.port | default(3493) }} {{ ups.powervalue | default('1') }} {{ nut_client_secondary_username }} {{ nut_client_secondary_password }} secondary
{% endif %}
{% endfor %}
FINALDELAY {{ nut_client_final_delay | default('5') }}
NOCOMMWARNTIME 300
RBWARNTIME 43200
OFFDURATION 30
SHUTDOWNCMD "{{ nut_client_shutdown_cmd | default('/usr/bin/poweroff') }}"
SHUTDOWNEXIT {{ nut_client_shutdown_exit | default('true') }}
POWERDOWNFLAG "/etc/killpower"
DEADTIME 15
HOSTSYNC {{ nut_client_hostsync | default('30') }}
POLLFREQALERT 5
POLLFREQ 5
MINSUPPLIES {{ nut_client_min_supplies | default('1') }}
CERTPATH /usr/ssl/certs
FORCESSL 1
CERTVERIFY 1
NOTIFYCMD {{ nut_client_notify_cmd | default('/usr/bin/notify-send') }}
{% for message in nut_client_notify_messages %}
NOTIFYMSG {{ message.name }} {{ message.message }}
{% endfor %}
{% for notify in nut_client_notify_flags %}
NOTIFYFLAG {{ notify.name }} {{ notify.flags }}
{% endfor %}

View File

@ -41,17 +41,3 @@
name: nut-server name: nut-server
state: restarted state: restarted
enabled: true enabled: true
- name: Template out upsmon.conf
ansible.builtin.template:
src: upsmon.conf.j2
dest: /etc/nut/upsmon.conf
owner: root
group: nut
mode: '0640'
- name: Start and enable nut-monitor
ansible.builtin.service:
name: nut-monitor
state: restarted
enabled: true

View File

@ -0,0 +1,11 @@
[{{ nut_client_primary_username }}]
password = {{ nut_client_primary_password }}
upsmon primary
actions = SET
instcmds = ALL
[{{ nut_client_secondary_username }}]
password = {{ nut_client_secondary_password }}
upsmon secondary
actions = SET
instcmds = ALL

3
scripts/notify.sh Executable file
View File

@ -0,0 +1,3 @@
#!/bin/bash
curl -d "$1" -X POST https://$NOTIFY_HOST/$NOTIFY_CHANNEL

131
scripts/openup.sh Executable file
View File

@ -0,0 +1,131 @@
#!/bin/bash
# Set array variables
OSDS=(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14)
DATABASE_LIST=(
"nextcloud"
"gitea"
"mastodon-db"
"synapse"
"mas"
"mss"
"coturn"
"keycloak"
"signal-bridge"
"whatsapp-bridge"
"telegram-bridge"
"discord-bridge"
"facebook-bridge"
)
# Uncordon node
echo "Uncordoning node."
kubectl uncordon kube00
# Scale bind to 1
echo "Scaling up Bind."
kubectl -n dns scale deployment bind --replicas=1
# Wait for the ceph monitors and managers to be ready
echo "Waiting for ceph monitors and managers to be ready..."
kubectl wait --for=condition=available=True deployment/rook-ceph-mon-a --timeout=30m -n rook-ceph
kubectl wait --for=condition=available=True deployment/rook-ceph-mgr-a --timeout=30m -n rook-ceph
# Wait for ceph block pool OSDs to be ready
echo "Waiting for ceph block pool OSDs to be ready..."
kubectl wait --for=condition=available=True deployment/rook-ceph-osd-10 --timeout=30m -n rook-ceph
kubectl wait --for=condition=available=True deployment/rook-ceph-osd-11 --timeout=30m -n rook-ceph
# Scale up the non-psql db workloads
echo "Scaling up non-psql db workloads."
kubectl -n db scale deployment --all --replicas=1
# Take all databases out of hibernation
echo "Un-hibernate DB 1/13..."
kubectl cnpg hibernate off gitea -n db
echo "Un-hibernate DB 2/13..."
kubectl cnpg hibernate off keycloak -n db
echo "Un-hibernate DB 3/13..."
kubectl cnpg hibernate off mastodon-db -n db
echo "Un-hibernate DB 4/13..."
kubectl cnpg hibernate off nextcloud -n db
echo "Un-hibernate DB 5/13..."
kubectl cnpg hibernate off synapse -n db
echo "Un-hibernate DB 6/13..."
kubectl cnpg hibernate off mss -n db
echo "Un-hibernate DB 7/13..."
kubectl cnpg hibernate off mas -n db
echo "Un-hibernate DB 8/13..."
kubectl cnpg hibernate off coturn -n db
echo "Un-hibernate DB 9/13..."
kubectl cnpg hibernate off signal-bridge -n db
echo "Un-hibernate DB 10/13..."
kubectl cnpg hibernate off whatsapp-bridge -n db
echo "Un-hibernate DB 11/13..."
kubectl cnpg hibernate off telegram-bridge -n db
echo "Un-hibernate DB 12/13..."
kubectl cnpg hibernate off discord-bridge -n db
echo "Un-hibernate DB 13/13..."
kubectl cnpg hibernate off facebook-bridge -n db
# Scale up Keycloak
echo "Scaling up Keycloak."
kubectl wait --for=jsonpath='{.status.phase}'='Cluster in healthy state' cluster/keycloak --timeout=15m -n db
kubectl -n public scale statefulset keycloak --replicas=1
kubectl -n public scale deployment keycloak-operator --replicas=1
# Wait for the ceph-fs metadata servers to be ready
echo "Waiting for ceph-fs metadata servers to be ready..."
kubectl wait --for=condition=available=True deployment/mds-ceph-fs-hdd-a --timeout=30m -n rook-ceph
kubectl wait --for=condition=available=True deployment/mds-ceph-fs-hdd-b --timeout=30m -n rook-ceph
# Wait for all remaining ceph osds to be ready
echo "Waiting for all remaining ceph osds to be ready..."
for OSD in "${OSDS[@]}"; do
echo "Waiting for OSD $OSD to be ready..."
kubectl wait --for=condition=available=True deployment/rook-ceph-osd-$OSD --timeout=30m -n rook-ceph
done
# Scale up Mail
echo "Scaling up Mail."
kubectl -n public scale deployment postfix dovecot --replicas=1
# Scale up Nextcloud
echo "Scaling up Nextcloud."
kubectl wait --for=jsonpath='{.status.phase}'='Cluster in healthy state' cluster/nextcloud --timeout=15m -n db
kubectl -n private scale deployment sftp --replicas=1
kubectl -n public scale deployment nextcloud collabora --replicas=1
# Scale up Gitea
echo "Scaling up Gitea."
kubectl wait --for=jsonpath='{.status.phase}'='Cluster in healthy state' cluster/gitea --timeout=15m -n db
kubectl -n public scale deployment gitea --replicas=1
# Scale up Mastodon
echo "Scaling up Mastodon."
kubectl wait --for=jsonpath='{.status.phase}'='Cluster in healthy state' cluster/mastodon-db --timeout=15m -n db
kubectl -n public scale deployment mastodon --replicas=1
# Scale up all other deployments
echo "Scaling up all other deployments."
kubectl -n public scale deployment --all --replicas=1
kubectl -n private scale deployment --all --replicas=1
# Notify scale up complete
/scripts/notify.sh "Operations resumed on host $HOSTNAME."

73
scripts/shutdown.sh Executable file
View File

@ -0,0 +1,73 @@
#!/bin/bash
# Set array variables
DATABASE_LIST=(
"nextcloud"
"gitea"
"mastodon-db"
"synapse"
"mas"
"mss"
"coturn"
"keycloak"
"signal-bridge"
"whatsapp-bridge"
"telegram-bridge"
"discord-bridge"
"facebook-bridge"
)
# Notify shutdown commencement
/scripts/notify.sh "Shutdown initiated on host $HOSTNAME."
# Scale keycloak first
kubectl -n public scale deployment keycloak-operator --replicas=0
kubectl -n public scale statefulset keycloak --replicas=0
# Scale all deployments to 0
kubectl -n private scale deployment --all --replicas=0
kubectl -n public scale deployment --selector=delayed.shutdown!=enabled --replicas=0 # Leave ntfy running
kubectl -n db scale deployment --all --replicas=0
# Notify scaling complete
/scripts/notify.sh "Application scale down complete."
# Hibernate all databases
DB_INDEX=1
DB_TOTAL=${#DATABASE_LIST[@]}
for DB in "${DATABASE_LIST[@]}"; do
echo "Hibernating database $DB_INDEX/$DB_TOTAL ($DB)..."
kubectl cnpg hibernate on $DB -n db
DB_INDEX=$((DB_INDEX+1))
done
# Notify hibernation complete
/scripts/notify.sh "Database hibernations complete, initiating final shutdown."
# Scale the last deployments (ntfy + dns)
kubectl -n public scale deployment --selector=delayed.shutdown=enabled --replicas=0
kubectl -n dns scale deployment bind --replicas=0
# Cordon node
kubectl cordon kube00
# Drain remaining pods
kubectl drain kube00 --ignore-daemonsets --delete-local-data
# Shutdown upsmon to notify the UPS primary that secondary shutdown has finished
systemctl stop nut-monitor.service
# shutdown
poweroff