SwiftLM/.github/workflows/ci.yml at 08412516fb9577c2be78b49953848dc14f6a7c3c · SharpAI/SwiftLM · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
name: CI Pipeline

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

concurrency:
  group: ci-${{ github.ref }}
  cancel-in-progress: true

jobs:
  ci:
    runs-on: macos-15
    timeout-minutes: 40
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: recursive

      - name: Install Metal Toolchain
        run: xcodebuild -downloadComponent MetalToolchain || true

      - name: Cache Swift packages
        uses: actions/cache@v4
        with:
          path: .build
          # Key includes product name so any rename (e.g. mlx-server→SwiftLM)
          # automatically busts the cache and prevents stale PCH errors.
          key: ${{ runner.os }}-spm-SwiftLM-v2-${{ hashFiles('Package.resolved') }}
          restore-keys: |
            ${{ runner.os }}-spm-SwiftLM-v2-

      - name: Clear stale module cache
        # Prevents: "PCH was compiled with module cache path '…mlx-server…'
        # but the path is currently '…SwiftLM…'" after repo rename.
        run: find .build -type d -name ModuleCache -exec rm -rf {} + 2>/dev/null || true

      - name: Resolve dependencies
        run: swift package resolve

      - name: Build (Release)
        run: swift build -c release

      - name: Verify binary
        run: |
          ls -lh .build/release/SwiftLM
          file .build/release/SwiftLM

      - name: TurboQuant unit tests
        run: |
          # Compile and run standalone C++ unit tests for the TurboQuant
          # KV cache compression algorithm (ported from TheTom/llama-cpp-turboquant).
          # Tests: centroids, WHT self-inverse, rotation orthogonality,
          #        3-bit pack/unpack, V-cache SNR, K-cache IP SNR, fp16 round-trip.
          clang++ -std=c++17 -O2 -o /tmp/tq_test tests/test_turbo_quant.cpp
          /tmp/tq_test

      - name: Build Test Harness
        run: swift build --build-tests

      - name: Install MLX Metal library
        run: |
          python3 -m venv /tmp/mlx_venv
          /tmp/mlx_venv/bin/pip install --quiet mlx

          # Inject metallib for production e2e runner
          cp /tmp/mlx_venv/lib/python*/site-packages/mlx/lib/mlx.metallib .build/release/

          # Distribute metallib exclusively to XCTest bundles so it satisfies memory.cpp current_binary_dir() constraints natively.
          find .build -type d -name "MacOS" -exec cp /tmp/mlx_venv/lib/python*/site-packages/mlx/lib/mlx.metallib {}/ \;

      - name: SwiftBuddy Tests (MemPalace & Lifecycle)
        run: swift test --skip-build --filter SwiftBuddyTests --disable-swift-testing

      - name: Cache MLX model
        uses: actions/cache@v4
        with:
          path: ~/.cache/huggingface
          key: mlx-model-qwen2.5-0.5b-4bit

      - name: Run E2E tests
        env:
          HF_HUB_DOWNLOAD_TIMEOUT: "600"
        run: |
          chmod +x tests/test-server.sh
          # Retry up to 2 times for transient HuggingFace download failures
          for attempt in 1 2 3; do
            echo "Attempt $attempt of 3..."
            if tests/test-server.sh .build/release/SwiftLM 15413; then
              exit 0
            fi
            if [ "$attempt" -lt 3 ]; then
              echo "Test failed, retrying in 10s..."
              sleep 10
            fi
          done
          echo "All attempts failed"
          exit 1

      - name: Upload test logs on failure
        if: failure()
        uses: actions/upload-artifact@v4
        with:
          name: ci-test-logs
          path: /tmp/SwiftLM-test-*.log
          retention-days: 7