Some checks failed
Deploy to Frontend Servers / deploy (push) Failing after 2s
The previous run still hit ENOSPC, this time during `npm ci` while extracting node_modules. The earlier cleanup left the just-failed act workspace on disk (mtime < 10min threshold), and its half-extracted node_modules took the runner past the limit before `npm ci` finished. - Drop the mtime threshold for act workspaces; instead detect the currently-running job's directory and rm -rf every sibling. The current job is preserved by path comparison so we never delete files the running step needs. - Blow away ~/.npm/_cacache, ~/.npm/_logs, ~/.cache/setup-node entirely. `npm ci` re-populates what it needs and the cache is the easiest GB to reclaim on a tight runner. - Tighten actions-runner workspace retention from 24h to 30min. - Drop the docker prune --filter; use `docker system prune -af --volumes` to reclaim builder cache and volumes too. - Hard-fail with a clear error if <3.5GB free after cleanup, instead of letting `npm ci` half-write an unusable node_modules and failing obscurely. Codebase needs ~3GB for hoisted deps.
177 lines
6.7 KiB
YAML
177 lines
6.7 KiB
YAML
name: Deploy to Frontend Servers
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- main
|
|
|
|
jobs:
|
|
deploy:
|
|
runs-on: self-hosted
|
|
|
|
steps:
|
|
- name: Free disk space
|
|
run: |
|
|
set +e
|
|
echo "=== Disk before cleanup ==="
|
|
df -h
|
|
# Identify the directory holding the currently-running act job so we
|
|
# never touch it. Everything else under ~/.cache/act/ is fair game.
|
|
CURRENT_ACT_DIR=""
|
|
if [ -n "${ACT_TOOLCACHE_PATH:-}" ]; then
|
|
CURRENT_ACT_DIR=$(dirname "${ACT_TOOLCACHE_PATH}" 2>/dev/null)
|
|
fi
|
|
if [ -z "$CURRENT_ACT_DIR" ]; then
|
|
CURRENT_ACT_DIR=$(pwd | sed -n 's|\(.*/.cache/act/[^/]*\).*|\1|p')
|
|
fi
|
|
echo "Current act dir (preserved): ${CURRENT_ACT_DIR:-<unknown>}"
|
|
# Wipe every other act workspace immediately (no mtime threshold).
|
|
# The old 10min threshold still left the previous failed job around,
|
|
# which then ate the disk before `npm ci` could finish.
|
|
if [ -d "$HOME/.cache/act" ]; then
|
|
du -sh "$HOME/.cache/act" 2>/dev/null
|
|
for d in "$HOME/.cache/act"/*/; do
|
|
[ -d "$d" ] || continue
|
|
case "$d" in
|
|
"$CURRENT_ACT_DIR"/*|"$CURRENT_ACT_DIR/") echo "skip current: $d" ;;
|
|
*) rm -rf "$d" && echo "removed: $d" ;;
|
|
esac
|
|
done
|
|
fi
|
|
# npm cache + setup-node cache: blow them away entirely. `npm ci`
|
|
# re-populates what it needs; the cache is a nice-to-have, not a
|
|
# requirement, and on a tight runner it's the easiest GB to reclaim.
|
|
rm -rf "$HOME/.npm/_cacache" "$HOME/.npm/_logs" 2>/dev/null
|
|
rm -rf "$HOME/.cache/setup-node" 2>/dev/null
|
|
# Stale actions-runner workspaces older than 30min.
|
|
if [ -d "$HOME/actions-runner/_work" ]; then
|
|
find "$HOME/actions-runner/_work" -mindepth 1 -maxdepth 2 -mmin +30 -exec rm -rf {} + 2>/dev/null
|
|
fi
|
|
# Docker: drop everything reclaimable (no `until` filter).
|
|
if command -v docker >/dev/null 2>&1; then
|
|
docker system prune -af --volumes 2>/dev/null
|
|
fi
|
|
# Stale /tmp files older than 2h, keep currently-running runner files.
|
|
find /tmp -mindepth 1 -maxdepth 1 -mmin +120 \
|
|
-not -name 'runner*' -not -name 'act*' \
|
|
-exec rm -rf {} + 2>/dev/null
|
|
echo "=== Disk after cleanup ==="
|
|
df -h
|
|
# Hard fail early if there still isn't enough room for `npm ci`,
|
|
# which needs ~3GB for this codebase's hoisted node_modules.
|
|
AVAIL_MB=$(df -Pm . | awk 'NR==2 {print $4}')
|
|
echo "Available on workspace volume: ${AVAIL_MB} MB"
|
|
if [ "${AVAIL_MB:-0}" -lt 3500 ]; then
|
|
echo "::error::Less than 3.5GB free after cleanup (${AVAIL_MB}MB)."
|
|
echo "The runner's EBS volume is too small for this codebase \
|
|
— ask devops to expand it. Failing fast so the next steps don't \
|
|
half-write an unusable build."
|
|
exit 1
|
|
fi
|
|
exit 0
|
|
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Setup Node.js
|
|
uses: actions/setup-node@v4
|
|
with:
|
|
node-version: "22"
|
|
cache: npm
|
|
|
|
- name: Install dependencies
|
|
run: npm ci
|
|
|
|
- name: Type check
|
|
run: npx tsc --noEmit
|
|
|
|
- name: Format check
|
|
run: npm run format:check
|
|
|
|
- name: Test
|
|
run: npm test
|
|
|
|
- name: Ensure free space before build
|
|
run: |
|
|
# Second-pass guard right before the heaviest step. `npm ci` plus
|
|
# tsc/test pull in tons of files since the first cleanup ran, and
|
|
# vite chunk writes need at least a few GB free — ENOSPC here is what
|
|
# killed past runs.
|
|
set +e
|
|
AVAIL_MB=$(df -Pm . | awk 'NR==2 {print $4}')
|
|
echo "Available on workspace volume: ${AVAIL_MB} MB"
|
|
if [ "${AVAIL_MB:-0}" -lt 3072 ]; then
|
|
echo "<3GB free — running an aggressive cleanup before build."
|
|
rm -rf "$HOME/.cache/act"/*/hostexecutor/node_modules/.cache 2>/dev/null
|
|
rm -rf "$HOME/.npm/_cacache" 2>/dev/null
|
|
if command -v docker >/dev/null 2>&1; then
|
|
docker system prune -af --volumes 2>/dev/null
|
|
fi
|
|
df -h
|
|
fi
|
|
exit 0
|
|
|
|
- name: Build
|
|
run: npm run build
|
|
env:
|
|
VITE_API_URL: ""
|
|
VITE_API_PREFIX: "/apnew"
|
|
VITE_DISABLE_ADMIN: "true"
|
|
|
|
- name: Setup SSH key
|
|
run: |
|
|
mkdir -p ~/.ssh
|
|
echo "${{ secrets.DEPLOY_KEY }}" > ~/.ssh/deploy_key
|
|
chmod 600 ~/.ssh/deploy_key
|
|
ssh-keyscan -H ${{ secrets.FRONTEND_1_HOST }} >> ~/.ssh/known_hosts 2>/dev/null
|
|
ssh-keyscan -H ${{ secrets.FRONTEND_2_HOST }} >> ~/.ssh/known_hosts 2>/dev/null
|
|
|
|
- name: Deploy to both servers
|
|
run: |
|
|
set -euo pipefail
|
|
deploy_to() {
|
|
local HOST=$1
|
|
echo ">>> 部署到 $HOST"
|
|
rsync -avz --delete \
|
|
-e "ssh -i ~/.ssh/deploy_key -o StrictHostKeyChecking=no" \
|
|
dist/ \
|
|
ec2-user@${HOST}:/var/www/ark-library/
|
|
echo ">>> $HOST 部署完成"
|
|
}
|
|
deploy_to "${{ secrets.FRONTEND_1_HOST }}" &
|
|
PID1=$!
|
|
deploy_to "${{ secrets.FRONTEND_2_HOST }}" &
|
|
PID2=$!
|
|
FAIL=0
|
|
wait $PID1 || { echo "ERROR: frontend-1 部署失败"; FAIL=1; }
|
|
wait $PID2 || { echo "ERROR: frontend-2 部署失败"; FAIL=1; }
|
|
[ $FAIL -eq 0 ] || exit 1
|
|
echo "=== 两台都部署完成 ==="
|
|
|
|
- name: Verify both servers match local build
|
|
run: |
|
|
set -euo pipefail
|
|
LOCAL=$(sha256sum dist/index.html | awk '{print $1}')
|
|
SUM1=$(ssh -i ~/.ssh/deploy_key -o StrictHostKeyChecking=no \
|
|
ec2-user@${{ secrets.FRONTEND_1_HOST }} \
|
|
"sha256sum /var/www/ark-library/index.html | awk '{print \$1}'")
|
|
SUM2=$(ssh -i ~/.ssh/deploy_key -o StrictHostKeyChecking=no \
|
|
ec2-user@${{ secrets.FRONTEND_2_HOST }} \
|
|
"sha256sum /var/www/ark-library/index.html | awk '{print \$1}'")
|
|
echo "local: $LOCAL"
|
|
echo "frontend-1: $SUM1"
|
|
echo "frontend-2: $SUM2"
|
|
if [ "$SUM1" != "$LOCAL" ]; then
|
|
echo "ERROR: frontend-1 不是本次构建的版本"
|
|
exit 1
|
|
fi
|
|
if [ "$SUM2" != "$LOCAL" ]; then
|
|
echo "ERROR: frontend-2 不是本次构建的版本"
|
|
exit 1
|
|
fi
|
|
echo "✓ 两台都已经更新到本次构建的版本。"
|
|
|
|
- name: Cleanup SSH key
|
|
if: always()
|
|
run: rm -f ~/.ssh/deploy_key
|