aboutsummaryrefslogtreecommitdiff
path: root/custom/zfs-snap-prune
blob: 762ff9983f9199042056c47d980310ee45775e29 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#!/bin/bash
# zfs-snap-prune - Prune old ZFS snapshots with hybrid retention policy
#
# Retention Policy:
#   - Always keep the N most recent snapshots (default: 20)
#   - Delete snapshots beyond N only if older than MAX_AGE (default: 180 days)
#   - Never delete genesis snapshot
#
# Usage:
#   zfs-snap-prune [OPTIONS]
#
# Options:
#   --dry-run       Show what would be deleted without deleting
#   --verbose       Show decision for every snapshot
#   --quiet         Suppress non-error output
#   --test          Use mock data from stdin instead of real ZFS
#   --help          Show this help message
#
# Environment variables:
#   POOL_NAME       - ZFS pool name (default: zroot)
#   ROOT_DATASET    - Root dataset path (default: ROOT/default)
#   KEEP_COUNT      - Number of recent snapshots to always keep (default: 20)
#   MAX_AGE_DAYS    - Delete older snapshots beyond KEEP_COUNT (default: 180)
#   NOW_OVERRIDE    - Override current timestamp for testing (epoch seconds)

set -e

# Configuration (can be overridden by environment)
POOL_NAME="${POOL_NAME:-zroot}"
ROOT_DATASET="${ROOT_DATASET:-ROOT/default}"
KEEP_COUNT="${KEEP_COUNT:-20}"
MAX_AGE_DAYS="${MAX_AGE_DAYS:-180}"

FULL_DATASET="${POOL_NAME}/${ROOT_DATASET}"

# Flags
DRY_RUN=false
VERBOSE=false
QUIET=false
TEST_MODE=false

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

usage() {
    sed -n '2,/^$/p' "$0" | sed 's/^# \?//'
    exit 0
}

info() {
    [[ "$QUIET" == "true" ]] && return
    echo -e "${GREEN}[INFO]${NC} $1"
}

verbose() {
    [[ "$VERBOSE" != "true" ]] && return
    echo -e "${BLUE}[VERBOSE]${NC} $1"
}

warn() {
    [[ "$QUIET" == "true" ]] && return
    echo -e "${YELLOW}[WARN]${NC} $1"
}

error() {
    echo -e "${RED}[ERROR]${NC} $1" >&2
    exit 1
}

# Parse arguments
while [[ $# -gt 0 ]]; do
    case "$1" in
        --dry-run)
            DRY_RUN=true
            shift
            ;;
        --verbose)
            VERBOSE=true
            shift
            ;;
        --quiet)
            QUIET=true
            shift
            ;;
        --test)
            TEST_MODE=true
            shift
            ;;
        --help|-h)
            usage
            ;;
        *)
            error "Unknown option: $1"
            ;;
    esac
done

# Check if running as root (skip in test mode)
if [[ "$TEST_MODE" != "true" ]] && [[ $EUID -ne 0 ]]; then
    error "This script must be run as root"
fi

# Get current timestamp (can be overridden for testing)
NOW="${NOW_OVERRIDE:-$(date +%s)}"
MAX_AGE_SECONDS=$((MAX_AGE_DAYS * 24 * 60 * 60))
CUTOFF_TIME=$((NOW - MAX_AGE_SECONDS))

info "Pruning snapshots for ${FULL_DATASET}"
info "Policy: Keep ${KEEP_COUNT} recent, delete if older than ${MAX_AGE_DAYS} days"
[[ "$DRY_RUN" == "true" ]] && info "DRY RUN - no changes will be made"

# Get snapshots - either from ZFS or stdin (test mode)
# Expected format: snapshot_name<TAB>creation_date_string
# Example: zroot/ROOT/default@pre-pacman_2025-01-15	Wed Jan 15 10:30 2025
if [[ "$TEST_MODE" == "true" ]]; then
    # Read mock data from stdin
    SNAPSHOTS=$(cat | tac)
else
    # Query real ZFS - sorted by creation (oldest first), then reversed for newest first
    SNAPSHOTS=$(zfs list -H -t snapshot -o name,creation -s creation -r "$FULL_DATASET" 2>/dev/null | \
        grep "^${FULL_DATASET}@" | \
        tac) || true
fi

if [[ -z "$SNAPSHOTS" ]]; then
    info "No snapshots found"
    exit 0
fi

# Count snapshots
TOTAL=$(echo "$SNAPSHOTS" | wc -l)
info "Found ${TOTAL} snapshots"

# Track results
DELETED=0
KEPT=0
POSITION=0

# Process each snapshot
while IFS=$'\t' read -r snapshot creation_str; do
    [[ -z "$snapshot" ]] && continue

    POSITION=$((POSITION + 1))
    SNAP_NAME="${snapshot##*@}"

    # Parse creation time
    if [[ "$TEST_MODE" == "true" ]]; then
        # In test mode, creation_str is epoch seconds
        SNAP_TIME="$creation_str"
    else
        # In real mode, parse date string
        SNAP_TIME=$(date -d "$creation_str" +%s 2>/dev/null || echo "0")
    fi

    AGE_DAYS=$(( (NOW - SNAP_TIME) / 86400 ))

    # Decision logic
    if [[ $POSITION -le $KEEP_COUNT ]]; then
        # Always keep the first KEEP_COUNT snapshots (most recent)
        verbose "KEEP: ${SNAP_NAME} (position ${POSITION}/${KEEP_COUNT}, ${AGE_DAYS} days old) - within keep count"
        KEPT=$((KEPT + 1))
    elif [[ "$SNAP_NAME" == "genesis" ]]; then
        # Never delete genesis
        verbose "KEEP: ${SNAP_NAME} (position ${POSITION}, ${AGE_DAYS} days old) - genesis protected"
        KEPT=$((KEPT + 1))
    elif [[ $SNAP_TIME -ge $CUTOFF_TIME ]]; then
        # Not old enough to delete
        verbose "KEEP: ${SNAP_NAME} (position ${POSITION}, ${AGE_DAYS} days old) - younger than ${MAX_AGE_DAYS} days"
        KEPT=$((KEPT + 1))
    else
        # Delete: beyond keep count AND older than max age
        if [[ "$DRY_RUN" == "true" ]]; then
            info "WOULD DELETE: ${SNAP_NAME} (position ${POSITION}, ${AGE_DAYS} days old)"
            DELETED=$((DELETED + 1))
        elif [[ "$TEST_MODE" == "true" ]]; then
            # Test mode: simulate deletion (don't actually call zfs)
            verbose "DELETE: ${SNAP_NAME} (position ${POSITION}, ${AGE_DAYS} days old)"
            DELETED=$((DELETED + 1))
        else
            verbose "DELETE: ${SNAP_NAME} (position ${POSITION}, ${AGE_DAYS} days old)"
            if zfs destroy "$snapshot" 2>/dev/null; then
                DELETED=$((DELETED + 1))
            else
                warn "Failed to delete ${snapshot}"
            fi
        fi
    fi
done <<< "$SNAPSHOTS"

# Summary
info "Summary: ${KEPT} kept, ${DELETED} deleted"

# Regenerate GRUB menu if we deleted anything (skip in dry-run and test modes)
if [[ $DELETED -gt 0 ]] && [[ "$DRY_RUN" != "true" ]] && [[ "$TEST_MODE" != "true" ]]; then
    if [[ -x /usr/local/bin/grub-zfs-snap ]]; then
        info "Regenerating GRUB menu..."
        /usr/local/bin/grub-zfs-snap
    fi
fi

# Exit with special code for testing (number of deleted)
if [[ "$TEST_MODE" == "true" ]]; then
    echo "RESULT:kept=${KEPT},deleted=${DELETED}"
fi