From 8c1dd8189eb6e4d8f0cd0c42b54bd840bd2dbd59 Mon Sep 17 00:00:00 2001 From: TerryM Date: Sun, 7 Jun 2026 19:59:18 +0800 Subject: [PATCH] ci(deploy): make runner cleanup more aggressive to prevent ENOSPC A previous deploy failed at the vite chunk-writing stage with "ENOSPC: no space left on device". The cleanup step ran at the start of the job but left enough stale data behind that the runner filled up before `npm run build` could finish. - Drop the act workspace retention from 60min to 10min. Closely-spaced pushes used to keep multiple stale jobs around; 10min still preserves any currently-running job because its mtime keeps advancing. - Drop _work / setup-node / npm cacache retention from 24h to 60min. - Drop the `until=24h` filter on docker prune so dangling images, containers, and builder cache get reclaimed every run. - Add a second "Ensure free space before build" guard right before the Build step. If <3GB is free, aggressively prune act caches, npm cacache, and docker volumes before vite starts writing chunks. --- .gitea/workflows/deploy.yml | 41 +++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml index 445e957..f1ae4a9 100644 --- a/.gitea/workflows/deploy.yml +++ b/.gitea/workflows/deploy.yml @@ -15,22 +15,27 @@ jobs: set +e echo "=== Disk before cleanup ===" df -h - # Stale act runner workspaces from previous jobs (older than 60 min). + # Stale act runner workspaces. Closely-spaced pushes (e.g. 3 commits + # within 30min) used to leak workspaces because the old 60min + # threshold left them in place. 10min is tight but still keeps any + # currently-running job's dir (its mtime updates as it writes). if [ -d "$HOME/.cache/act" ]; then du -sh "$HOME/.cache/act" 2>/dev/null - find "$HOME/.cache/act" -mindepth 1 -maxdepth 1 -type d -mmin +60 -exec rm -rf {} + 2>/dev/null + find "$HOME/.cache/act" -mindepth 1 -maxdepth 1 -type d -mmin +10 -exec rm -rf {} + 2>/dev/null fi - # Stale runner workspaces under common locations. + # Stale runner workspaces and node setup/npm caches: 60min is plenty + # since each job re-fetches deps via `npm ci`. for dir in "$HOME/actions-runner/_work" "$HOME/.cache/setup-node" "$HOME/.npm/_cacache"; do if [ -d "$dir" ]; then - find "$dir" -mindepth 1 -maxdepth 2 -mmin +1440 -exec rm -rf {} + 2>/dev/null + find "$dir" -mindepth 1 -maxdepth 2 -mmin +60 -exec rm -rf {} + 2>/dev/null fi done - # Docker leftovers if docker is available. + # Docker leftovers: drop the `until=24h` filter so any dangling images + # / containers / builder cache get reclaimed every run. if command -v docker >/dev/null 2>&1; then - docker image prune -af --filter "until=24h" 2>/dev/null - docker container prune -f --filter "until=24h" 2>/dev/null - docker builder prune -af --filter "until=24h" 2>/dev/null + docker image prune -af 2>/dev/null + docker container prune -f 2>/dev/null + docker builder prune -af 2>/dev/null fi # Stale /tmp files older than 2h, keep currently-running runner files. find /tmp -mindepth 1 -maxdepth 1 -mmin +120 \ @@ -61,6 +66,26 @@ jobs: - name: Test run: npm test + - name: Ensure free space before build + run: | + # Second-pass guard right before the heaviest step. `npm ci` plus + # tsc/test pull in tons of files since the first cleanup ran, and + # vite chunk writes need at least a few GB free — ENOSPC here is what + # killed past runs. + set +e + AVAIL_MB=$(df -Pm . | awk 'NR==2 {print $4}') + echo "Available on workspace volume: ${AVAIL_MB} MB" + if [ "${AVAIL_MB:-0}" -lt 3072 ]; then + echo "<3GB free — running an aggressive cleanup before build." + rm -rf "$HOME/.cache/act"/*/hostexecutor/node_modules/.cache 2>/dev/null + rm -rf "$HOME/.npm/_cacache" 2>/dev/null + if command -v docker >/dev/null 2>&1; then + docker system prune -af --volumes 2>/dev/null + fi + df -h + fi + exit 0 + - name: Build run: npm run build env: