This file contains practical, well-commented scripts for common operations on your NixOS system. These complement the conceptual guides with concrete implementations.
---
This script provides convenient access to the Bluesky PDS admin API for common operations like creating invite codes and listing accounts.
#!/usr/bin/env bash
#
# PDS Admin Helper Script
#
# This script provides convenient commands for administering your Bluesky PDS.
# It reads the admin password from sops-managed secrets and makes API calls.
#
# Usage: ./pds-admin.sh
# Commands:
# create-invite [useCount] - Generate a new invite code
# list-dids [limit] - List account DIDs
# check-health - Check PDS health endpoint
# get-account - Get details for specific account
#
# Exit immediately if a command exits with non-zero status
set -euo pipefail
# ============================================================================
# Configuration
# ============================================================================
# The PDS admin password is stored in sops and decrypted at activation time.
# We read it from the secrets directory. This file is only readable by root
# and is located in /run/secrets (tmpfs, never touches disk).
PDS_ADMIN_PASSWORD_FILE="/run/secrets/PDS_ADMIN_PASSWORD"
# Base URL for the PDS. Change this if your PDS is at a different domain.
PDS_BASE_URL="https://pds.snek.cc"
# ============================================================================
# Helper Functions
# ============================================================================
# Check if we can read the password file
# This catches permission issues early with a helpful error message
check_prerequisites() {
if [[ ! -r "$PDS_ADMIN_PASSWORD_FILE" ]]; then
echo "ERROR: Cannot read admin password from $PDS_ADMIN_PASSWORD_FILE" >&2
echo "Make sure you're running as root and the secret is properly configured" >&2
exit 1
fi
}
# Read the admin password from the secrets file
# The tr command removes any trailing newline that might be in the file
get_admin_password() {
tr -d '\n' < "$PDS_ADMIN_PASSWORD_FILE"
}
# Make an authenticated request to the PDS admin API
# Usage: pds_api_call [data]
pds_api_call() {
local method="$1"
local endpoint="$2"
local data="${3:-}"
# Build curl command
local curl_cmd=(curl -s -X "$method")
# Add authentication header
# We use Basic auth with admin as the username and the password from secrets
curl_cmd+=(-u "admin:$(get_admin_password)")
# Add content type header for requests with data
if [[ -n "$data" ]]; then
curl_cmd+=(-H "Content-Type: application/json")
curl_cmd+=(-d "$data")
fi
# Make the request
# ${curl_cmd[@]} expands the array into individual arguments
# jq . pretty-prints the JSON response for readability
"${curl_cmd[@]}" "${PDS_BASE_URL}${endpoint}" | jq .
}
# ============================================================================
# Command Implementations
# ============================================================================
cmd_create_invite() {
# Default to 1 use if not specified
local use_count="${1:-1}"
echo "Creating invite code with $use_count uses..."
# Call the createInviteCode endpoint
# This returns a JSON object with the invite code
pds_api_call POST "/xrpc/com.atproto.server.createInviteCode" \
"{\"useCount\": $use_count}"
}
cmd_list_dids() {
# Default to 100 accounts if not specified
local limit="${1:-100}"
echo "Listing up to $limit accounts..."
# Call the searchAccounts endpoint
# This returns a list of accounts with their DIDs
pds_api_call GET "/xrpc/com.atproto.admin.searchAccounts?limit=$limit"
}
cmd_check_health() {
echo "Checking PDS health..."
# The health endpoint doesn't require authentication
# We use a simple curl request without auth
curl -s "${PDS_BASE_URL}/xrpc/_health" | jq .
}
cmd_get_account() {
local did="$1"
if [[ -z "$did" ]]; then
echo "ERROR: DID required" >&2
echo "Usage: $0 get-account " >&2
exit 1
fi
echo "Getting account details for $did..."
pds_api_call GET "/xrpc/com.atproto.admin.getAccount?did=$did"
}
# ============================================================================
# Main Script Logic
# ============================================================================
# Check that we have a command
if [[ $# -eq 0 ]]; then
echo "Usage: $0 [args...]" >&2
echo "" >&2
echo "Commands:" >&2
echo " create-invite [useCount] - Generate a new invite code" >&2
echo " list-dids [limit] - List account DIDs" >&2
echo " check-health - Check PDS health" >&2
echo " get-account - Get account details" >&2
exit 1
fi
# Check prerequisites before running any command
check_prerequisites
# Parse the command and dispatch to the appropriate handler
case "$1" in
create-invite)
cmd_create_invite "${2:-1}"
;;
list-dids)
cmd_list_dids "${2:-100}"
;;
check-health)
cmd_check_health
;;
get-account)
cmd_get_account "$2"
;;
*)
echo "ERROR: Unknown command: $1" >&2
echo "Run $0 without arguments for usage" >&2
exit 1
;;
esac
}}}
---
== Backup Operations ==
=== backup-critical.sh ===
This script creates encrypted backups of critical service data. Run it daily via cron or systemd timer.
#!/usr/bin/env bash
#
# Critical Data Backup Script
#
# Backs up essential service data to local storage with optional remote sync.
# Should be run as root to access all service data directories.
#
# Usage: ./backup-critical.sh [--remote user@host:/path]
#
set -euo pipefail
# ============================================================================
# Configuration
# ============================================================================
# Where to store backups locally
BACKUPBASEDIR="/backup"
# How many days of local backups to keep
LOCALRETENTIONDAYS=7
# Services to back up (name:data_directory pairs)
declare -A SERVICES=(
["pds"]="/var/lib/pds"
["tangled-knot"]="/var/lib/tangled-knot"
["tangled-spindle"]="/var/lib/tangled-spindle"
["lycan"]="/var/lib/lycan"
["grafana"]="/var/lib/grafana"
["caddy"]="/var/lib/caddy"
)
# Whether to stop services during backup (safer but causes downtime)
STOP_SERVICES=false
# ============================================================================
# Utility Functions
# ============================================================================
# Logging function with timestamp
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"
}
# Error logging
error() {
log "ERROR: $*" >&2
}
# Create backup directory for today
setupbackupdir() {
local today
today=$(date +%Y%m%d)
BACKUPDIR="${BACKUPBASE_DIR}/${today}"
# Create directory if it doesn't exist
# -p creates parent directories as needed
mkdir -p "$BACKUP_DIR"
log "Backup directory: $BACKUP_DIR"
}
# Backup a service directory
backup_service() {
local service_name="$1"
local source_dir="$2"
local backupfile="${BACKUPDIR}/${service_name}.tar.gz"
log "Backing up $servicename from $sourcedir..."
# Check if source directory exists
if dir" .html"> ! -d "$sourcedir" ; then
error "Directory $source_dir does not exist, skipping"
return 1
fi
# Create tar.gz archive
# -c = create
# -z = gzip compression
# -p = preserve permissions
# -f = file (specifies output file)
# -C = change to directory before adding files
# The 2>/dev/null hides permission errors for files we can't read
if tar -czpf "$backupfile" -C "$(dirname "$sourcedir")" "$(basename "$source_dir")" 2>/dev/null; then
# Get file size for logging
local size
size=$(du -h "$backup_file" | cut -f1)
log "✓ $service_name backed up ($size)"
else
error "Failed to backup $service_name"
return 1
fi
}
# Backup PostgreSQL databases
backup_postgresql() {
log "Backing up PostgreSQL databases..."
local backupfile="${BACKUPDIR}/postgresql.sql.gz"
# Use pg_dumpall to dump all databases
# Pipe through gzip to compress
# 2>/dev/null hides warnings
if pgdumpall 2>/dev/null | gzip > "$backupfile"; then
local size
size=$(du -h "$backup_file" | cut -f1)
log "✓ PostgreSQL backed up ($size)"
else
error "Failed to backup PostgreSQL"
return 1
fi
}
# Backup NixOS configuration
backupnixosconfig() {
log "Backing up NixOS configuration..."
local backupfile="${BACKUPDIR}/nixos-config.tar.gz"
# Backup /etc/nixos which contains all configuration
if tar -czpf "$backup_file" -C /etc nixos 2>/dev/null; then
local size
size=$(du -h "$backup_file" | cut -f1)
log "✓ NixOS config backed up ($size)"
else
error "Failed to backup NixOS config"
return 1
fi
}
# Clean up old local backups
cleanupoldbackups() {
log "Cleaning up backups older than $LOCALRETENTIONDAYS days..."
# Find and delete directories older than retention period
# -mtime +N means "modified more than N days ago"
# -type d means "directories only"
local count
count=$(find "$BACKUPBASEDIR" -maxdepth 1 -type d -mtime +$LOCALRETENTIONDAYS | wc -l)
if $count -gt 0 ; then
find "$BACKUPBASEDIR" -maxdepth 1 -type d -mtime +$LOCALRETENTIONDAYS -exec rm -rf {} +
log "Removed $count old backup directories"
else
log "No old backups to clean up"
fi
}
# Sync to remote location
synctoremote() {
local remote_path="$1"
log "Syncing to remote: $remote_path"
# Use rsync for efficient transfer
# -a = archive mode (preserves permissions, times, etc.)
# -z = compress during transfer
# --delete = delete remote files not present locally
if rsync -az --delete "$BACKUPDIR/" "$remotepath"; then
log "✓ Remote sync complete"
else
error "Remote sync failed"
return 1
fi
}
# Calculate total backup size
calculate_totals() {
local total_size
totalsize=$(du -sh "$BACKUPDIR" | cut -f1)
log "Total backup size: $total_size"
# Count number of files
local file_count
filecount=$(find "$BACKUPDIR" -type f | wc -l)
log "Total files: $file_count"
}
# ============================================================================
# Main Script
# ============================================================================
main() {
log "Starting backup process..."
# Check if running as root (needed for most service data)
if $EUID -ne 0 ; then
error "This script must be run as root"
exit 1
fi
# Setup backup directory
setupbackupdir
# Track if any backups failed
local failed=0
# Backup each service
for service_name in "${!SERVICES[@]}"; do
servicedir="${SERVICES[$servicename]}"
if ! backupservice "$servicename" "$service_dir"; then
failed=1
fi
done
# Backup PostgreSQL
if ! backup_postgresql; then
failed=1
fi
# Backup NixOS configuration
if ! backupnixosconfig; then
failed=1
fi
# Calculate and log totals
calculate_totals
# Sync to remote if specified
if $# -gt 0 && "$1" == --remote ; then
if -n "${2:-}" ; then
synctoremote "$2"
else
error "--remote requires a destination path"
exit 1
fi
fi
# Clean up old backups
cleanupoldbackups
if $failed -eq 0 ; then
log "Backup completed successfully"
exit 0
else
log "Backup completed with some failures"
exit 1
fi
}
# Run main function with all arguments
main "$@"
}}}
---
A comprehensive health check script that monitors service status, resource usage, and connectivity.
#!/usr/bin/env bash
#
# System Health Check Script
#
# Performs various health checks and reports status.
# Can be run manually or via systemd timer for continuous monitoring.
#
# Usage: ./health-check.sh [--detailed]
#
set -euo pipefail
# ============================================================================
# Configuration
# ============================================================================
# Services that should be running
CRITICAL_SERVICES=(
"caddy"
"bluesky-pds"
"tangled-knot"
"tangled-spindle"
"prometheus"
"grafana"
)
# URLs to check (should return 200 OK)
HEALTH_ENDPOINTS=(
"https://snek.cc"
"https://pds.snek.cc/xrpc/_health"
"https://knot.snek.cc"
"https://grafana.snek.cc"
)
# Disk usage threshold (percentage)
DISK_WARNING_THRESHOLD=80
DISK_CRITICAL_THRESHOLD=90
# Memory usage threshold (percentage)
MEMORY_WARNING_THRESHOLD=80
MEMORY_CRITICAL_THRESHOLD=95
# Load average threshold (relative to CPU count)
LOAD_WARNING_THRESHOLD=1.0
LOAD_CRITICAL_THRESHOLD=2.0
# ============================================================================
# Status Tracking
# ============================================================================
declare -i CHECKS_PASSED=0
declare -i CHECKS_FAILED=0
declare -i CHECKS_WARNING=0
# Array to store detailed results
declare -a CHECK_RESULTS=()
# ============================================================================
# Output Functions
# ============================================================================
# Color codes for terminal output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Print with color if terminal supports it
print_status() {
local status="$1"
local message="$2"
case "$status" in
PASS)
echo -e "${GREEN}✓${NC} $message"
;;
FAIL)
echo -e "${RED}✗${NC} $message"
;;
WARN)
echo -e "${YELLOW}⚠${NC} $message"
;;
INFO)
echo " $message"
;;
esac
}
# Record a check result
record_check() {
local status="$1"
local message="$2"
CHECK_RESULTS+=("$status: $message")
case "$status" in
PASS)
((CHECKS_PASSED++))
;;
FAIL)
((CHECKS_FAILED++))
;;
WARN)
((CHECKS_WARNING++))
;;
esac
}
# ============================================================================
# Check Functions
# ============================================================================
check_service_status() {
echo "=== Service Status ==="
for service in "${CRITICAL_SERVICES[@]}"; do
if systemctl is-active --quiet "$service"; then
record_check PASS "Service $service is running"
print_status PASS "$service is running"
else
record_check FAIL "Service $service is not running"
print_status FAIL "$service is not running"
# Show recent log entries for failed services
if [[ "${1:-}" == "--detailed" ]]; then
echo "Recent log entries:"
journalctl -u "$service" --no-pager -n 5 | sed 's/^/ /'
fi
fi
done
echo
}
check_disk_usage() {
echo "=== Disk Usage ==="
# Get disk usage for root filesystem
local usage
usage=$(df / | awk 'NR==2 {print $5}' | tr -d '%')
if [[ $usage -ge $DISK_CRITICAL_THRESHOLD ]]; then
record_check FAIL "Disk usage is ${usage}% (critical)"
print_status FAIL "Disk usage: ${usage}% (critical)"
elif [[ $usage -ge $DISK_WARNING_THRESHOLD ]]; then
record_check WARN "Disk usage is ${usage}% (warning)"
print_status WARN "Disk usage: ${usage}% (warning)"
else
record_check PASS "Disk usage is ${usage}% (healthy)"
print_status PASS "Disk usage: ${usage}%"
fi
# Show top disk consumers if detailed mode
if [[ "${1:-}" == "--detailed" ]]; then
echo "Top disk consumers:"
du -h /var/lib/*/ 2>/dev/null | sort -h | tail -5 | sed 's/^/ /'
fi
echo
}
check_memory_usage() {
echo "=== Memory Usage ==="
# Get memory usage percentage
local mem_info
mem_info=$(free | awk '/Mem:/ {printf "%.0f", $3/$2 * 100.0}')
if [[ $mem_info -ge $MEMORY_CRITICAL_THRESHOLD ]]; then
record_check FAIL "Memory usage is ${mem_info}% (critical)"
print_status FAIL "Memory usage: ${mem_info}% (critical)"
elif [[ $mem_info -ge $MEMORY_WARNING_THRESHOLD ]]; then
record_check WARN "Memory usage is ${mem_info}% (warning)"
print_status WARN "Memory usage: ${mem_info}% (warning)"
else
record_check PASS "Memory usage is ${mem_info}% (healthy)"
print_status PASS "Memory usage: ${mem_info}%"
fi
# Show top memory consumers if detailed mode
if [[ "${1:-}" == "--detailed" ]]; then
echo "Top memory consumers:"
ps aux --sort=-%mem | head -6 | tail -5 | sed 's/^/ /'
fi
echo
}
check_load_average() {
echo "=== Load Average ==="
# Get 1-minute load average
local load
load=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | tr -d ',')
# Get number of CPUs
local cpus
cpus=$(nproc)
# Calculate load relative to CPU count
local relative_load
relative_load=$(echo "$load / $cpus" | bc -l)
if (( $(echo "$relative_load >= $LOAD_CRITICAL_THRESHOLD" | bc -l) )); then
record_check FAIL "Load average is $load (critical, ${cpus} CPUs)"
print_status FAIL "Load average: $load (on ${cpus} CPUs)"
elif (( $(echo "$relative_load >= $LOAD_WARNING_THRESHOLD" | bc -l) )); then
record_check WARN "Load average is $load (warning, ${cpus} CPUs)"
print_status WARN "Load average: $load (on ${cpus} CPUs)"
else
record_check PASS "Load average is $load (healthy, ${cpus} CPUs)"
print_status PASS "Load average: $load (on ${cpus} CPUs)"
fi
echo
}
check_http_endpoints() {
echo "=== HTTP Endpoint Health ==="
for url in "${HEALTH_ENDPOINTS[@]}"; do
# Use curl with timeouts and follow redirects
# -s = silent
# -o /dev/null = don't output body
# -w "%{http_code}" = write HTTP status code to stdout
# --max-time 10 = timeout after 10 seconds
local status
status=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 "$url" || echo "000")
if [[ "$status" == "200" ]]; then
record_check PASS "Endpoint $url is healthy (HTTP $status)"
print_status PASS "$url (HTTP $status)"
elif [[ "$status" == "000" ]]; then
record_check FAIL "Endpoint $url is unreachable"
print_status FAIL "$url (unreachable)"
else
record_check FAIL "Endpoint $url returned HTTP $status"
print_status FAIL "$url (HTTP $status)"
fi
done
echo
}
# ============================================================================
# Main Script
# ============================================================================
main() {
echo "================================"
echo "System Health Check"
echo "================================"
echo "Timestamp: $(date)"
echo "Hostname: $(hostname)"
echo
local detailed_mode=""
if [[ "${1:-}" == "--detailed" ]]; then
detailed_mode="--detailed"
fi
# Run all checks
check_service_status "$detailed_mode"
check_disk_usage "$detailed_mode"
check_memory_usage "$detailed_mode"
check_load_average
check_http_endpoints
# Print summary
echo "================================"
echo "Summary"
echo "================================"
print_status PASS "$CHECKS_PASSED checks passed"
print_status WARN "$CHECKS_WARNING warnings"
print_status FAIL "$CHECKS_FAILED failures"
echo
# Exit with appropriate code
if [[ $CHECKS_FAILED -gt 0 ]]; then
echo "Status: CRITICAL"
exit 2
elif [[ $CHECKS_WARNING -gt 0 ]]; then
echo "Status: WARNING"
exit 1
else
echo "Status: HEALTHY"
exit 0
fi
}
main "$@"
}}}
---
== Service Debugging ==
=== debug-service.sh ===
A script to help debug failing services by gathering relevant information.
#!/usr/bin/env bash
#
# Service Debugging Helper
#
# Gathers diagnostic information about a service to help troubleshoot issues.
#
# Usage: ./debug-service.sh
#
set -euo pipefail
# ============================================================================
# Main Script
# ============================================================================
if $# -eq 0 ; then
echo "Usage: $0
echo "Example: $0 bluesky-pds"
exit 1
fi
SERVICE="$1"
echo "================================"
echo "Debug Report for: $SERVICE"
echo "================================"
echo "Generated: $(date)"
echo
# ---------------------------------------------------------------------------
# 1. Service Status
# ---------------------------------------------------------------------------
echo "=== Service Status ==="
systemctl status "$SERVICE" --no-pager || true
echo
# ---------------------------------------------------------------------------
# 2. Process Information
# ---------------------------------------------------------------------------
echo "=== Process Information ==="
if pgrep -f "$SERVICE" > /dev/null; then
echo "Main process:"
pgrep -f "$SERVICE" | head -1 | xargs ps -f -p || true
echo
echo "All related processes:"
pgrep -f "$SERVICE" | xargs -I {} ps -o pid,ppid,cmd,%mem,%cpu -p {} || true
echo
echo "Open files by service:"
pgrep -f "$SERVICE" | head -1 | xargs -I {} ls -la /proc/{}/fd 2>/dev/null | head -20 || true
else
echo "No running processes found for $SERVICE"
fi
echo
# ---------------------------------------------------------------------------
# 3. Recent Logs
# ---------------------------------------------------------------------------
echo "=== Recent Logs (last 50 lines) ==="
journalctl -u "$SERVICE" --no-pager -n 50 || true
echo
# ---------------------------------------------------------------------------
# 4. Error Logs Only
# ---------------------------------------------------------------------------
echo "=== Error Logs Only (last 20 errors) ==="
journalctl -u "$SERVICE" --no-pager -p err -n 20 || true
echo
# ---------------------------------------------------------------------------
# 5. Network Connections
# ---------------------------------------------------------------------------
echo "=== Network Connections ==="
echo "Listening ports:"
ss -tlnp | grep -i "$SERVICE" || echo "No listening ports found"
echo
echo "Active connections:"
ss -tnp | grep -i "$SERVICE" | head -10 || echo "No active connections"
echo
# ---------------------------------------------------------------------------
# 6. Resource Usage
# ---------------------------------------------------------------------------
echo "=== Resource Usage ==="
echo "Memory:"
systemctl show "$SERVICE" --property=MemoryCurrent,MemoryMax || true
echo
echo "CPU:"
systemctl show "$SERVICE" --property=CPUUsageNSec || true
echo
# ---------------------------------------------------------------------------
# 7. File Descriptors and Limits
# ---------------------------------------------------------------------------
echo "=== File Descriptor Usage ==="
if pgrep -f "$SERVICE" > /dev/null; then
local pid
pid=$(pgrep -f "$SERVICE" | head -1)
echo "Open file descriptors: $(ls /proc/$pid/fd 2>/dev/null | wc -l)"
echo "FD limit: $(cat /proc/$pid/limits 2>/dev/null | grep "Max open files" || echo "Unknown")"
fi
echo
# ---------------------------------------------------------------------------
# 8. Configuration
# ---------------------------------------------------------------------------
echo "=== Systemd Configuration ==="
echo "Service file location:"
systemctl cat "$SERVICE" 2>/dev/null | head -5 || echo "Could not find service file"
echo
echo "Environment:"
systemctl show "$SERVICE" --property=Environment 2>/dev/null || true
echo
# ---------------------------------------------------------------------------
# 9. Data Directory
# ---------------------------------------------------------------------------
echo "=== Data Directory ==="
# Try to find data directory from common locations
for dir in "/var/lib/$SERVICE" "/var/lib/${SERVICE//-/_}" "/var/lib/${SERVICE##*-}"; do
if -d "$dir" ; then
echo "Found: $dir"
echo "Size: $(du -sh "$dir" 2>/dev/null | cut -f1 || echo "Unknown")"
echo "Permissions: $(ls -ld "$dir" 2>/dev/null || echo "Unknown")"
echo "Free space: $(df -h "$dir" 2>/dev/null | tail -1 | awk '{print $4}' || echo "Unknown")"
break
fi
done
echo
# ---------------------------------------------------------------------------
# 10. Restart History
# ---------------------------------------------------------------------------
echo "=== Restart History ==="
journalctl -u "$SERVICE" --no-pager --since "24 hours ago" | grep -E "(Started|Stopped|Failed)" | tail -10 || echo "No recent restarts"
echo
echo "================================"
echo "Debug report complete"
echo "================================"
}}}
---
Script to manually rotate and clean up logs.
#!/usr/bin/env bash
#
# Log Rotation and Cleanup
#
# Manually rotates logs and cleans up old entries.
# NixOS handles most log rotation automatically, but this can be useful
# for manual cleanup or when disk space is critically low.
#
set -euo pipefail
# ============================================================================
# Functions
# ============================================================================
rotate_systemd_logs() {
echo "Rotating systemd journal..."
# Vacuum logs older than 30 days
journalctl --vacuum-time=30d
echo
}
cleanup_nix_store() {
echo "Cleaning up Nix store..."
# Remove generations older than 30 days
nix-env --delete-generations +30 --profile /nix/var/nix/profiles/system 2>/dev/null || true
# Garbage collect
nix-collect-garbage -d
echo
}
cleanup_tmp_files() {
echo "Cleaning up temporary files..."
# Clean /tmp of files older than 7 days
find /tmp -type f -atime +7 -delete 2>/dev/null || true
# Clean /var/tmp
find /var/tmp -type f -atime +30 -delete 2>/dev/null || true
echo
}
show_disk_usage() {
echo "=== Current Disk Usage ==="
df -h /
echo
echo "=== Largest Directories ==="
du -h / 2>/dev/null | sort -hr | head -20 || echo "Some directories not accessible"
echo
}
# ============================================================================
# Main
# ============================================================================
echo "Starting maintenance tasks..."
echo "=============================="
echo
# Must run as root for most operations
if [[ $EUID -ne 0 ]]; then
echo "This script must be run as root"
exit 1
fi
# Show before state
show_disk_usage
# Perform cleanup
rotate_systemd_logs
cleanup_nix_store
cleanup_tmp_files
# Show after state
echo "=============================="
echo "Cleanup complete"
echo "=============================="
show_disk_usage
}}}
---
== Usage Examples ==
=== Running the Scripts ===
**PDS Admin:**
# Make executable and run
chmod +x pds-admin.sh
sudo ./pds-admin.sh create-invite
# Create invite with 5 uses
sudo ./pds-admin.sh create-invite 5
# List accounts
sudo ./pds-admin.sh list-dids
}}}
*Backup:*
# Run backup locally
sudo ./backup-critical.sh
# Backup and sync to remote
sudo ./backup-critical.sh --remote user@backup-server:/backups/snek
# Add to cron for daily runs
# Edit crontab: sudo crontab -e
# Add line: 0 3 * * * /etc/nixos/scripts/backup-critical.sh
}}}
**Health Check:**
# Quick health check
./health-check.sh
# Detailed check with extra info
./health-check.sh --detailed
# Use in monitoring (returns exit codes: 0=healthy, 1=warning, 2=critical)
./health-check.sh || echo "System has issues"
}}}
*Debug Service:*
# Debug a failing service
./debug-service.sh bluesky-pds
# Save output to file for analysis
./debug-service.sh tangled-knot > /tmp/debug-knot.txt 2>&1
}}}
**Maintenance:**
# Run all maintenance tasks
sudo ./rotate-logs.sh
}}}
---
1. *Always run as root:* Most of these scripts need root access to read service data and system state.
2. *Protect secret paths:* Scripts that read secrets use paths in /run/secrets/ which are only accessible to root.
3. *Audit logging:* Consider adding audit logging for administrative commands, especially PDS operations.
4. *Remote backup security:* When syncing backups remotely, use SSH keys (not passwords) and consider encrypting backups before transfer.
5. *Script permissions:* Store scripts in /etc/nixos/ or similar, owned by root with 755 permissions (executable by anyone, writable only by root).
---
These scripts can be integrated into your NixOS configuration in several ways:
1. *System packages:* Add scripts to environment.systemPackages so they're in PATH
2. *Systemd services:* Run health checks or backups via systemd timers
3. *Activation scripts:* Run checks during system activation
4. *Documentation:* Reference them in your operational runbooks
The scripts are designed to be self-contained and work with existing service configuration without requiring hardcoded values.