name: Deploy to Frontend Servers on: push: branches: - main jobs: deploy: runs-on: self-hosted steps: - name: Free disk space run: | set +e echo "=== Disk before cleanup ===" df -h # Identify the directory holding the currently-running act job so we # never touch it. Everything else under ~/.cache/act/ is fair game. CURRENT_ACT_DIR="" if [ -n "${ACT_TOOLCACHE_PATH:-}" ]; then CURRENT_ACT_DIR=$(dirname "${ACT_TOOLCACHE_PATH}" 2>/dev/null) fi if [ -z "$CURRENT_ACT_DIR" ]; then CURRENT_ACT_DIR=$(pwd | sed -n 's|\(.*/.cache/act/[^/]*\).*|\1|p') fi echo "Current act dir (preserved): ${CURRENT_ACT_DIR:-}" # Wipe every other act workspace immediately (no mtime threshold). # The old 10min threshold still left the previous failed job around, # which then ate the disk before `npm ci` could finish. if [ -d "$HOME/.cache/act" ]; then du -sh "$HOME/.cache/act" 2>/dev/null for d in "$HOME/.cache/act"/*/; do [ -d "$d" ] || continue case "$d" in "$CURRENT_ACT_DIR"/*|"$CURRENT_ACT_DIR/") echo "skip current: $d" ;; *) rm -rf "$d" && echo "removed: $d" ;; esac done fi # npm cache + setup-node cache: blow them away entirely. `npm ci` # re-populates what it needs; the cache is a nice-to-have, not a # requirement, and on a tight runner it's the easiest GB to reclaim. rm -rf "$HOME/.npm/_cacache" "$HOME/.npm/_logs" 2>/dev/null rm -rf "$HOME/.cache/setup-node" 2>/dev/null # Stale actions-runner workspaces older than 30min. if [ -d "$HOME/actions-runner/_work" ]; then find "$HOME/actions-runner/_work" -mindepth 1 -maxdepth 2 -mmin +30 -exec rm -rf {} + 2>/dev/null fi # Docker: drop everything reclaimable (no `until` filter). if command -v docker >/dev/null 2>&1; then docker system prune -af --volumes 2>/dev/null fi # Stale /tmp files older than 2h, keep currently-running runner files. find /tmp -mindepth 1 -maxdepth 1 -mmin +120 \ -not -name 'runner*' -not -name 'act*' \ -exec rm -rf {} + 2>/dev/null echo "=== Disk after cleanup ===" df -h # Hard fail early if there still isn't enough room for `npm ci`, # which needs ~3GB for this codebase's hoisted node_modules. AVAIL_MB=$(df -Pm . | awk 'NR==2 {print $4}') echo "Available on workspace volume: ${AVAIL_MB} MB" if [ "${AVAIL_MB:-0}" -lt 3500 ]; then echo "::error::Less than 3.5GB free after cleanup (${AVAIL_MB}MB)." echo "The runner's EBS volume is too small for this codebase \ — ask devops to expand it. Failing fast so the next steps don't \ half-write an unusable build." exit 1 fi exit 0 - name: Checkout code uses: actions/checkout@v4 - name: Setup Node.js uses: actions/setup-node@v4 with: node-version: "22" cache: npm - name: Install dependencies run: npm ci - name: Type check run: npx tsc --noEmit - name: Format check run: npm run format:check - name: Test run: npm test - name: Ensure free space before build run: | # Second-pass guard right before the heaviest step. `npm ci` plus # tsc/test pull in tons of files since the first cleanup ran, and # vite chunk writes need at least a few GB free — ENOSPC here is what # killed past runs. set +e AVAIL_MB=$(df -Pm . | awk 'NR==2 {print $4}') echo "Available on workspace volume: ${AVAIL_MB} MB" if [ "${AVAIL_MB:-0}" -lt 3072 ]; then echo "<3GB free — running an aggressive cleanup before build." rm -rf "$HOME/.cache/act"/*/hostexecutor/node_modules/.cache 2>/dev/null rm -rf "$HOME/.npm/_cacache" 2>/dev/null if command -v docker >/dev/null 2>&1; then docker system prune -af --volumes 2>/dev/null fi df -h fi exit 0 - name: Build run: npm run build env: VITE_API_URL: "" VITE_API_PREFIX: "/apnew" VITE_DISABLE_ADMIN: "true" - name: Setup SSH key run: | mkdir -p ~/.ssh echo "${{ secrets.DEPLOY_KEY }}" > ~/.ssh/deploy_key chmod 600 ~/.ssh/deploy_key ssh-keyscan -H ${{ secrets.FRONTEND_1_HOST }} >> ~/.ssh/known_hosts 2>/dev/null ssh-keyscan -H ${{ secrets.FRONTEND_2_HOST }} >> ~/.ssh/known_hosts 2>/dev/null - name: Deploy to both servers run: | set -euo pipefail deploy_to() { local HOST=$1 echo ">>> 部署到 $HOST" rsync -avz --delete \ -e "ssh -i ~/.ssh/deploy_key -o StrictHostKeyChecking=no" \ dist/ \ ec2-user@${HOST}:/var/www/ark-library/ echo ">>> $HOST 部署完成" } deploy_to "${{ secrets.FRONTEND_1_HOST }}" & PID1=$! deploy_to "${{ secrets.FRONTEND_2_HOST }}" & PID2=$! FAIL=0 wait $PID1 || { echo "ERROR: frontend-1 部署失败"; FAIL=1; } wait $PID2 || { echo "ERROR: frontend-2 部署失败"; FAIL=1; } [ $FAIL -eq 0 ] || exit 1 echo "=== 两台都部署完成 ===" - name: Verify both servers match local build run: | set -euo pipefail LOCAL=$(sha256sum dist/index.html | awk '{print $1}') SUM1=$(ssh -i ~/.ssh/deploy_key -o StrictHostKeyChecking=no \ ec2-user@${{ secrets.FRONTEND_1_HOST }} \ "sha256sum /var/www/ark-library/index.html | awk '{print \$1}'") SUM2=$(ssh -i ~/.ssh/deploy_key -o StrictHostKeyChecking=no \ ec2-user@${{ secrets.FRONTEND_2_HOST }} \ "sha256sum /var/www/ark-library/index.html | awk '{print \$1}'") echo "local: $LOCAL" echo "frontend-1: $SUM1" echo "frontend-2: $SUM2" if [ "$SUM1" != "$LOCAL" ]; then echo "ERROR: frontend-1 不是本次构建的版本" exit 1 fi if [ "$SUM2" != "$LOCAL" ]; then echo "ERROR: frontend-2 不是本次构建的版本" exit 1 fi echo "✓ 两台都已经更新到本次构建的版本。" - name: Cleanup SSH key if: always() run: rm -f ~/.ssh/deploy_key