summaryrefslogtreecommitdiff
path: root/scripts/testing/lib/vm-utils.sh
blob: 47bd391bbbf487e73ec79141c2730c9d070848b7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
#!/bin/bash
# VM management utilities for archsetup testing (direct QEMU)
# Author: Craig Jennings <craigmartinjennings@gmail.com>
# License: GNU GPLv3
#
# Manages QEMU VMs directly without libvirt. Uses user-mode networking
# with port forwarding for SSH access and qemu-img for snapshots.

# Note: logging.sh should already be sourced by the calling script

# VM configuration defaults
VM_CPUS="${VM_CPUS:-4}"
VM_RAM="${VM_RAM:-4096}"  # MB
VM_DISK_SIZE="${VM_DISK_SIZE:-50}"  # GB

# SSH configuration
SSH_PORT="${SSH_PORT:-2222}"
SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10"
ROOT_PASSWORD="${ROOT_PASSWORD:-archsetup}"

# OVMF firmware paths
OVMF_CODE="/usr/share/edk2/x64/OVMF_CODE.4m.fd"
OVMF_VARS_TEMPLATE="/usr/share/edk2/x64/OVMF_VARS.4m.fd"

# VM runtime paths (set by init_vm_paths)
VM_IMAGES_DIR=""
DISK_PATH=""
OVMF_VARS=""
PID_FILE=""
MONITOR_SOCK=""
SERIAL_LOG=""

# Initialize all VM paths from images directory
# Must be called before any other vm-utils function
init_vm_paths() {
    local images_dir="${1:-$VM_IMAGES_DIR}"
    [ -z "$images_dir" ] && fatal "VM_IMAGES_DIR not set"

    VM_IMAGES_DIR="$images_dir"
    DISK_PATH="$VM_IMAGES_DIR/archsetup-base.qcow2"
    OVMF_VARS="$VM_IMAGES_DIR/OVMF_VARS.fd"
    PID_FILE="$VM_IMAGES_DIR/qemu.pid"
    MONITOR_SOCK="$VM_IMAGES_DIR/qemu-monitor.sock"
    SERIAL_LOG="$VM_IMAGES_DIR/qemu-serial.log"
    mkdir -p "$VM_IMAGES_DIR"
}

# ─── Prerequisite Checks ─────────────────────────────────────────────

check_kvm() {
    if [ ! -e /dev/kvm ]; then
        error "KVM is not available"
        info "Check if virtualization is enabled in BIOS"
        info "Load kvm module: sudo modprobe kvm-intel (or kvm-amd)"
        return 1
    fi
    return 0
}

check_qemu() {
    if ! command -v qemu-system-x86_64 &>/dev/null; then
        error "qemu-system-x86_64 not found"
        info "Install with: sudo pacman -S qemu-full"
        return 1
    fi
    return 0
}

check_ovmf() {
    if [ ! -f "$OVMF_CODE" ]; then
        error "OVMF firmware not found: $OVMF_CODE"
        info "Install with: sudo pacman -S edk2-ovmf"
        return 1
    fi
    return 0
}

check_sshpass() {
    if ! command -v sshpass &>/dev/null; then
        error "sshpass not found"
        info "Install with: sudo pacman -S sshpass"
        return 1
    fi
    return 0
}

check_socat() {
    if ! command -v socat &>/dev/null; then
        error "socat not found"
        info "Install with: sudo pacman -S socat"
        return 1
    fi
    return 0
}

check_prerequisites() {
    local failed=0
    check_kvm    || failed=1
    check_qemu   || failed=1
    check_ovmf   || failed=1
    check_sshpass || failed=1
    check_socat  || failed=1
    return $failed
}

# ─── VM Lifecycle ─────────────────────────────────────────────────────

# Check if a QEMU VM is running via PID file
vm_is_running() {
    [ -f "$PID_FILE" ] || return 1

    local pid
    pid=$(cat "$PID_FILE" 2>/dev/null) || return 1

    if kill -0 "$pid" 2>/dev/null && grep -q "qemu" "/proc/$pid/cmdline" 2>/dev/null; then
        return 0
    fi

    # Stale PID file
    rm -f "$PID_FILE"
    return 1
}

# Start a QEMU VM
# Args: $1 = disk path
#        $2 = boot mode: "iso" or "disk" (default: disk)
#        $3 = ISO path (required if mode=iso)
#        $4 = display: "none" (headless) or "gtk" (graphical, default: none)
start_qemu() {
    local disk="$1"
    local mode="${2:-disk}"
    local iso_path="${3:-}"
    local display="${4:-none}"

    # Stop any existing instance
    stop_qemu 2>/dev/null || true

    # Check port availability
    if ss -tln 2>/dev/null | grep -q ":${SSH_PORT} "; then
        error "Port $SSH_PORT is already in use"
        info "Another QEMU instance or service may be running"
        return 1
    fi

    # Ensure OVMF_VARS exists
    if [ ! -f "$OVMF_VARS" ]; then
        cp "$OVMF_VARS_TEMPLATE" "$OVMF_VARS"
    fi

    # Truncate serial log
    : > "$SERIAL_LOG"

    # Build QEMU command
    local qemu_cmd=(
        qemu-system-x86_64
        -name "archsetup-test"
        -machine "q35,accel=kvm"
        -cpu host
        -smp "$VM_CPUS"
        -m "$VM_RAM"
        -drive "if=pflash,format=raw,readonly=on,file=$OVMF_CODE"
        -drive "if=pflash,format=raw,file=$OVMF_VARS"
        -drive "file=$disk,format=qcow2,if=virtio"
        -netdev "user,id=net0,hostfwd=tcp::${SSH_PORT}-:22"
        -device "virtio-net-pci,netdev=net0"
        -monitor "unix:$MONITOR_SOCK,server,nowait"
        -pidfile "$PID_FILE"
        -serial "file:$SERIAL_LOG"
        -usb
        -device usb-tablet
    )

    # Boot mode
    if [ "$mode" = "iso" ]; then
        [ -z "$iso_path" ] && fatal "ISO path required for iso boot mode"
        qemu_cmd+=(-cdrom "$iso_path" -boot d)
    else
        qemu_cmd+=(-boot c)
    fi

    # Display mode
    if [ "$display" = "gtk" ]; then
        qemu_cmd+=(-device virtio-vga-gl -display "gtk,gl=on")
    else
        qemu_cmd+=(-display none)
    fi

    step "Starting QEMU (mode=$mode, display=$display)"

    # Launch in background
    "${qemu_cmd[@]}" &>> "$LOGFILE" &

    # Wait for PID file to appear
    local wait=0
    while [ ! -f "$PID_FILE" ] && [ $wait -lt 10 ]; do
        sleep 1
        wait=$((wait + 1))
    done

    if ! vm_is_running; then
        error "QEMU failed to start"
        return 1
    fi

    success "QEMU started (PID: $(cat "$PID_FILE"))"
    return 0
}

# Stop VM gracefully via ACPI powerdown, fallback to kill
stop_qemu() {
    local timeout="${1:-60}"

    if ! vm_is_running; then
        return 0
    fi

    step "Sending shutdown signal to VM"

    # Send ACPI powerdown via monitor socket
    if [ -S "$MONITOR_SOCK" ]; then
        echo "system_powerdown" | socat - "UNIX-CONNECT:$MONITOR_SOCK" >> "$LOGFILE" 2>&1 || true
    fi

    # Wait for graceful shutdown
    local elapsed=0
    while [ $elapsed -lt $timeout ]; do
        if ! vm_is_running; then
            success "VM stopped gracefully"
            _cleanup_qemu_files
            return 0
        fi
        sleep 2
        elapsed=$((elapsed + 2))
    done

    # Force kill
    warn "VM did not stop gracefully after ${timeout}s, force killing"
    kill_qemu
    return 0
}

# Force kill VM immediately
kill_qemu() {
    if [ -f "$PID_FILE" ]; then
        local pid
        pid=$(cat "$PID_FILE" 2>/dev/null)
        if [ -n "$pid" ]; then
            kill -9 "$pid" 2>/dev/null || true
        fi
    fi
    _cleanup_qemu_files
}

# Clean up runtime files
_cleanup_qemu_files() {
    rm -f "$PID_FILE" "$MONITOR_SOCK"
}

# ─── Snapshot Operations (qemu-img) ──────────────────────────────────
# All snapshot operations require the VM to be stopped.

create_snapshot() {
    local disk="${1:-$DISK_PATH}"
    local snapshot_name="${2:-clean-install}"

    if vm_is_running; then
        error "Cannot create snapshot while VM is running"
        return 1
    fi

    step "Creating snapshot: $snapshot_name"
    if qemu-img snapshot -c "$snapshot_name" "$disk" >> "$LOGFILE" 2>&1; then
        success "Snapshot '$snapshot_name' created"
        return 0
    else
        error "Failed to create snapshot '$snapshot_name'"
        return 1
    fi
}

restore_snapshot() {
    local disk="${1:-$DISK_PATH}"
    local snapshot_name="${2:-clean-install}"

    if vm_is_running; then
        error "Cannot restore snapshot while VM is running"
        return 1
    fi

    step "Restoring snapshot: $snapshot_name"
    if qemu-img snapshot -a "$snapshot_name" "$disk" >> "$LOGFILE" 2>&1; then
        success "Snapshot '$snapshot_name' restored"
        return 0
    else
        error "Failed to restore snapshot '$snapshot_name'"
        return 1
    fi
}

delete_snapshot() {
    local disk="${1:-$DISK_PATH}"
    local snapshot_name="${2:-clean-install}"

    step "Deleting snapshot: $snapshot_name"
    if qemu-img snapshot -d "$snapshot_name" "$disk" >> "$LOGFILE" 2>&1; then
        success "Snapshot '$snapshot_name' deleted"
        return 0
    else
        error "Failed to delete snapshot '$snapshot_name'"
        return 1
    fi
}

list_snapshots() {
    local disk="${1:-$DISK_PATH}"
    qemu-img snapshot -l "$disk" 2>/dev/null
}

snapshot_exists() {
    local disk="${1:-$DISK_PATH}"
    local snapshot_name="${2:-clean-install}"
    qemu-img snapshot -l "$disk" 2>/dev/null | grep -q "$snapshot_name"
}

# ─── SSH Operations ───────────────────────────────────────────────────

# Wait for SSH to become available on localhost:$SSH_PORT
wait_for_ssh() {
    local password="${1:-$ROOT_PASSWORD}"
    local timeout="${2:-120}"
    local elapsed=0

    progress "Waiting for SSH on localhost:$SSH_PORT..."
    while [ $elapsed -lt $timeout ]; do
        if sshpass -p "$password" ssh $SSH_OPTS -p "$SSH_PORT" root@localhost true 2>/dev/null; then
            success "SSH is available"
            return 0
        fi
        sleep 5
        elapsed=$((elapsed + 5))
    done

    error "SSH timeout after ${timeout}s"
    return 1
}

# Execute command in VM via SSH
vm_exec() {
    local password="${1:-$ROOT_PASSWORD}"
    shift
    sshpass -p "$password" ssh $SSH_OPTS \
        -o ServerAliveInterval=30 -o ServerAliveCountMax=10 \
        -p "$SSH_PORT" root@localhost "$@" 2>> "$LOGFILE"
}

# Copy file to VM
copy_to_vm() {
    local local_file="$1"
    local remote_path="$2"
    local password="${3:-$ROOT_PASSWORD}"

    step "Copying $(basename "$local_file") to VM:$remote_path"
    if sshpass -p "$password" scp $SSH_OPTS -P "$SSH_PORT" \
        "$local_file" "root@localhost:$remote_path" >> "$LOGFILE" 2>&1; then
        success "File copied to VM"
        return 0
    else
        error "Failed to copy file to VM"
        return 1
    fi
}

# Copy file from VM
copy_from_vm() {
    local remote_file="$1"
    local local_path="$2"
    local password="${3:-$ROOT_PASSWORD}"

    step "Copying $remote_file from VM"
    if sshpass -p "$password" scp $SSH_OPTS -P "$SSH_PORT" \
        "root@localhost:$remote_file" "$local_path" >> "$LOGFILE" 2>&1; then
        success "File copied from VM"
        return 0
    else
        error "Failed to copy file from VM"
        return 1
    fi
}