diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml index f1ae4a9..835c381 100644 --- a/.gitea/workflows/deploy.yml +++ b/.gitea/workflows/deploy.yml @@ -15,27 +15,41 @@ jobs: set +e echo "=== Disk before cleanup ===" df -h - # Stale act runner workspaces. Closely-spaced pushes (e.g. 3 commits - # within 30min) used to leak workspaces because the old 60min - # threshold left them in place. 10min is tight but still keeps any - # currently-running job's dir (its mtime updates as it writes). + # Identify the directory holding the currently-running act job so we + # never touch it. Everything else under ~/.cache/act/ is fair game. + CURRENT_ACT_DIR="" + if [ -n "${ACT_TOOLCACHE_PATH:-}" ]; then + CURRENT_ACT_DIR=$(dirname "${ACT_TOOLCACHE_PATH}" 2>/dev/null) + fi + if [ -z "$CURRENT_ACT_DIR" ]; then + CURRENT_ACT_DIR=$(pwd | sed -n 's|\(.*/.cache/act/[^/]*\).*|\1|p') + fi + echo "Current act dir (preserved): ${CURRENT_ACT_DIR:-}" + # Wipe every other act workspace immediately (no mtime threshold). + # The old 10min threshold still left the previous failed job around, + # which then ate the disk before `npm ci` could finish. if [ -d "$HOME/.cache/act" ]; then du -sh "$HOME/.cache/act" 2>/dev/null - find "$HOME/.cache/act" -mindepth 1 -maxdepth 1 -type d -mmin +10 -exec rm -rf {} + 2>/dev/null + for d in "$HOME/.cache/act"/*/; do + [ -d "$d" ] || continue + case "$d" in + "$CURRENT_ACT_DIR"/*|"$CURRENT_ACT_DIR/") echo "skip current: $d" ;; + *) rm -rf "$d" && echo "removed: $d" ;; + esac + done fi - # Stale runner workspaces and node setup/npm caches: 60min is plenty - # since each job re-fetches deps via `npm ci`. - for dir in "$HOME/actions-runner/_work" "$HOME/.cache/setup-node" "$HOME/.npm/_cacache"; do - if [ -d "$dir" ]; then - find "$dir" -mindepth 1 -maxdepth 2 -mmin +60 -exec rm -rf {} + 2>/dev/null - fi - done - # Docker leftovers: drop the `until=24h` filter so any dangling images - # / containers / builder cache get reclaimed every run. + # npm cache + setup-node cache: blow them away entirely. `npm ci` + # re-populates what it needs; the cache is a nice-to-have, not a + # requirement, and on a tight runner it's the easiest GB to reclaim. + rm -rf "$HOME/.npm/_cacache" "$HOME/.npm/_logs" 2>/dev/null + rm -rf "$HOME/.cache/setup-node" 2>/dev/null + # Stale actions-runner workspaces older than 30min. + if [ -d "$HOME/actions-runner/_work" ]; then + find "$HOME/actions-runner/_work" -mindepth 1 -maxdepth 2 -mmin +30 -exec rm -rf {} + 2>/dev/null + fi + # Docker: drop everything reclaimable (no `until` filter). if command -v docker >/dev/null 2>&1; then - docker image prune -af 2>/dev/null - docker container prune -f 2>/dev/null - docker builder prune -af 2>/dev/null + docker system prune -af --volumes 2>/dev/null fi # Stale /tmp files older than 2h, keep currently-running runner files. find /tmp -mindepth 1 -maxdepth 1 -mmin +120 \ @@ -43,6 +57,17 @@ jobs: -exec rm -rf {} + 2>/dev/null echo "=== Disk after cleanup ===" df -h + # Hard fail early if there still isn't enough room for `npm ci`, + # which needs ~3GB for this codebase's hoisted node_modules. + AVAIL_MB=$(df -Pm . | awk 'NR==2 {print $4}') + echo "Available on workspace volume: ${AVAIL_MB} MB" + if [ "${AVAIL_MB:-0}" -lt 3500 ]; then + echo "::error::Less than 3.5GB free after cleanup (${AVAIL_MB}MB)." + echo "The runner's EBS volume is too small for this codebase \ + — ask devops to expand it. Failing fast so the next steps don't \ + half-write an unusable build." + exit 1 + fi exit 0 - name: Checkout code