From 325b5954af6e223a57b88ef307e1f6f917dfc238 Mon Sep 17 00:00:00 2001 From: Anthony Date: Fri, 3 Oct 2025 00:45:54 +0800 Subject: [PATCH] Fix code issues for backup retention --- FIX_REPOSITORY.md | 208 +++++++++++++++++++++++++++ scripts/imports/backup_core_files.sh | 8 +- scripts/install-restic.sh | 68 +++++++-- 3 files changed, 269 insertions(+), 15 deletions(-) create mode 100644 FIX_REPOSITORY.md diff --git a/FIX_REPOSITORY.md b/FIX_REPOSITORY.md new file mode 100644 index 0000000..2e2f61c --- /dev/null +++ b/FIX_REPOSITORY.md @@ -0,0 +1,208 @@ +# Repository Initialization Fix + +## Problem + +The installation completed but the repository initialization failed with: +``` +[INSTALL] WARNING: Repository initialization failed +``` + +When you try to backup, you get: +``` +ERROR: Unable to access the Restic repository. Aborting backup. +``` + +## Root Cause + +The `/data` directory exists but either: +1. It's not mounted to Shared Storage +2. It has permission issues +3. Restic cannot create the repository structure + +## Quick Fix - Manual Repository Initialization + +### Step 1: SSH into your environment +```bash +# Connect to your compute node +ssh root@node- +``` + +### Step 2: Check /data mount status +```bash +# Check if /data is a mount point +df -h /data +mount | grep /data + +# Expected: Should show it's mounted from shared storage +# If it shows just your local filesystem, /data is NOT mounted to shared storage +``` + +### Step 3: Check current repository status +```bash +# Set the password +export RESTIC_PASSWORD=$(cat /etc/restic-password) + +# Try to access repository +restic -r /data snapshots + +# If it shows "Fatal: unable to open config file" - repository needs initialization +# If it shows "wrong password" - password mismatch issue +# If it shows list of snapshots - repository is working (shouldn't have error) +``` + +### Step 4: Initialize the repository manually +```bash +# Make sure /data is writable +chmod 755 /data +chown root:root /data + +# Set password +export RESTIC_PASSWORD=$(cat /etc/restic-password) + +# Initialize repository +restic -r /data init + +# Expected output: +# created restic repository 1234abcd at /data +# Please note that knowledge of your password is required to access +# the repository. Losing your password means that your data is +# irrecoverably lost. +``` + +### Step 5: Verify repository is accessible +```bash +export RESTIC_PASSWORD=$(cat /etc/restic-password) +restic -r /data snapshots + +# Should show empty list (no snapshots yet) with no errors +``` + +### Step 6: Test backup +```bash +# Try running a manual backup from the addon dashboard +# Or via command line: +bash /home/litespeed/mb-backups/backup_all.sh manual +``` + +## If /data is NOT on Shared Storage + +If Step 2 shows `/data` is just a local directory (not mounted), you need to mount shared storage: + +### Option 1: Mount Shared Storage via Dashboard + +1. Go to your environment in the dashboard +2. Click on "Settings" → "Endpoints" +3. Look for "Shared Storage" node +4. Ensure it's mounted to `/data` on your compute nodes + +### Option 2: Manual Mount (temporary) + +```bash +# Find your shared storage node IP +# This is shown in your dashboard under "Shared Storage 2.0-10.5 Node ID: 9457" + +# Mount shared storage (replace with actual IP) +mount -t nfs :/data /data + +# Verify mount +df -h /data +``` + +### Option 3: Create Mount in Manifest + +Add this to your environment topology (not the addon): + +```yaml +nodes: + - nodeType: storage + cloudlets: 8 + nodeGroup: storage + +# Then mount to compute nodes +- nodeGroup: cp + volumes: + - /data + volumeMounts: + storage: + path: /data + sourcePath: /data +``` + +## Verification Script + +Run this to diagnose the issue: + +```bash +#!/bin/bash +echo "=== Backup Repository Diagnostics ===" +echo "" +echo "1. /data Mount Status:" +df -h /data +echo "" +echo "2. /data Permissions:" +ls -ld /data +echo "" +echo "3. /data Contents:" +ls -la /data +echo "" +echo "4. Password File:" +ls -l /etc/restic-password /data/.restic-password 2>/dev/null +echo "" +echo "5. Repository Check:" +export RESTIC_PASSWORD=$(cat /etc/restic-password) +restic -r /data snapshots 2>&1 +echo "" +echo "6. Repository Structure:" +ls -la /data/ 2>/dev/null +echo "" +echo "=== End Diagnostics ===" +``` + +Save this as `diagnose.sh`, run `chmod +x diagnose.sh && ./diagnose.sh` and share the output. + +## Common Issues & Solutions + +### Issue 1: "Fatal: unable to open config file" +**Solution:** Repository not initialized - Run `restic -r /data init` + +### Issue 2: "wrong password" +**Solution:** Password mismatch +```bash +# Check if passwords are different +diff /etc/restic-password /data/.restic-password + +# Use the one from shared storage +cp /data/.restic-password /etc/restic-password +``` + +### Issue 3: "permission denied" +**Solution:** Fix permissions +```bash +chown -R root:root /data +chmod 755 /data +chmod 644 /etc/restic-password +``` + +### Issue 4: "/data is not on shared storage" +**Solution:** Mount shared storage properly (see above) + +## After Fixing + +Once the repository is initialized: + +1. ✅ Test backup: Run "Backup Now" from dashboard +2. ✅ Verify: Check "View All Backups" shows your backup +3. ✅ The issue should be permanently fixed + +## Prevention for Next Installation + +Before reinstalling the addon: +1. ✅ Ensure Shared Storage is mounted to `/data` +2. ✅ Verify: `df /data` shows shared storage +3. ✅ Test: `touch /data/test && rm /data/test` works +4. ✅ Then install the addon + +--- + +**Need help?** Share the output of the diagnostics script above. + diff --git a/scripts/imports/backup_core_files.sh b/scripts/imports/backup_core_files.sh index 56b78fd..50d4c15 100644 --- a/scripts/imports/backup_core_files.sh +++ b/scripts/imports/backup_core_files.sh @@ -45,10 +45,16 @@ export RESTIC_PASSWORD=$(cat "$PASSWORD_FILE") # Verify repository access log_message "Verifying repository access..." -if ! restic -r "$BACKUP_PATH" snapshots > /dev/null 2>&1; then +REPO_CHECK=$(restic -r "$BACKUP_PATH" snapshots 2>&1) +REPO_RESULT=$? +if [ $REPO_RESULT -ne 0 ]; then log_message "ERROR: Unable to access the Restic repository. Aborting backup." + log_message "ERROR Details: $REPO_CHECK" + log_message "Repository: $BACKUP_PATH" + log_message "Password file: $PASSWORD_FILE" exit 1 fi +log_message "✓ Repository is accessible" # Acquire a global lock to serialize Restic operations log_message "Acquiring global lock for Restic operations..." diff --git a/scripts/install-restic.sh b/scripts/install-restic.sh index 3f1d095..7d7ce7e 100644 --- a/scripts/install-restic.sh +++ b/scripts/install-restic.sh @@ -53,24 +53,64 @@ echo "[INSTALL] Initializing repository..." export RESTIC_PASSWORD=$(cat /etc/restic-password) export RESTIC_REPOSITORY=/data +# Verify /data exists and check if it's mounted storage +echo "[INSTALL] Checking /data directory..." +if [ ! -d "/data" ]; then + echo "[INSTALL] Creating /data directory..." + mkdir -p /data +fi + +# Check if /data is on a mount point (shared storage) +MOUNT_INFO=$(df /data | tail -1) +echo "[INSTALL] Storage info: $MOUNT_INFO" + +# Verify /data has proper permissions +echo "[INSTALL] Checking /data permissions..." +if [ ! -w "/data" ]; then + echo "[INSTALL] /data is not writable, fixing permissions..." + chown -R root:root /data + chmod 755 /data +fi + +# Test write access +echo "[INSTALL] Testing write access to /data..." +if ! touch /data/.write_test 2>/dev/null; then + echo "[INSTALL] ERROR: Cannot write to /data!" + echo "[INSTALL] Please ensure /data is mounted to Shared Storage" + exit 1 +fi +rm -f /data/.write_test +echo "[INSTALL] ✓ /data is writable" + # Check if repository is accessible with current password +echo "[INSTALL] Checking for existing repository..." if restic snapshots >/dev/null 2>&1; then - echo "[INSTALL] Repository already exists and is accessible" - SNAPSHOT_COUNT=$(restic snapshots --json | jq '. | length' 2>/dev/null || echo "0") - echo "[INSTALL] Found $SNAPSHOT_COUNT existing snapshot(s)" + echo "[INSTALL] ✓ Repository already exists and is accessible" + SNAPSHOT_COUNT=$(restic snapshots --json 2>/dev/null | jq '. | length' 2>/dev/null || echo "0") + echo "[INSTALL] ✓ Found $SNAPSHOT_COUNT existing snapshot(s)" else - # Try to initialize - only works on empty/new repositories - echo "[INSTALL] Attempting to initialize repository..." - if restic init 2>/dev/null; then - echo "[INSTALL] New repository initialized successfully" + # Try to initialize - show errors if it fails + echo "[INSTALL] No existing repository found, initializing new repository..." + INIT_OUTPUT=$(restic init 2>&1) + INIT_RESULT=$? + + if [ $INIT_RESULT -eq 0 ]; then + echo "[INSTALL] ✓ New repository initialized successfully" + # Verify it works + if restic snapshots >/dev/null 2>&1; then + echo "[INSTALL] ✓ Repository verified and accessible" + else + echo "[INSTALL] ERROR: Repository created but not accessible!" + exit 1 + fi else - # Repository might exist but with different password or corrupted - echo "[INSTALL] WARNING: Repository initialization failed" - echo "[INSTALL] This could mean:" - echo "[INSTALL] 1. Repository already exists (safe to ignore)" - echo "[INSTALL] 2. Password mismatch with existing repository" - echo "[INSTALL] 3. Permission issues" - echo "[INSTALL] Please check repository manually if backups are missing" + echo "[INSTALL] ERROR: Repository initialization failed!" + echo "[INSTALL] Error details: $INIT_OUTPUT" + echo "[INSTALL] This usually means:" + echo "[INSTALL] - /data is not persistent storage" + echo "[INSTALL] - Permission issues" + echo "[INSTALL] - Existing repository with different password" + exit 1 fi fi