Skip to content

Commit dd6a4d5

Browse files
authored
ci: run Iceberg Spark tests on all PRs and commits to main branch (#3792)
1 parent cfd8a1c commit dd6a4d5

7 files changed

Lines changed: 22 additions & 62 deletions

File tree

.github/actions/setup-iceberg-rust-builder/action.yaml

Lines changed: 0 additions & 39 deletions
This file was deleted.

.github/workflows/iceberg_spark_test.yml

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ env:
5353
jobs:
5454
# Build native library once and share with all test jobs
5555
build-native:
56-
if: contains(github.event.pull_request.title, '[iceberg]')
5756
name: Build Native Library
5857
runs-on: ubuntu-24.04
5958
container:
@@ -102,9 +101,8 @@ jobs:
102101
path: native/target/ci/libcomet.so
103102
retention-days: 1
104103

105-
iceberg-spark-rust:
104+
iceberg-spark:
106105
needs: build-native
107-
if: contains(github.event.pull_request.title, '[iceberg]')
108106
strategy:
109107
matrix:
110108
os: [ubuntu-24.04]
@@ -113,7 +111,7 @@ jobs:
113111
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.8'}]
114112
scala-version: ['2.13']
115113
fail-fast: false
116-
name: iceberg-spark-rust/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
114+
name: iceberg-spark/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
117115
runs-on: ${{ matrix.os }}
118116
container:
119117
image: amd64/rust
@@ -135,20 +133,19 @@ jobs:
135133
run: |
136134
./mvnw install -Prelease -DskipTests -Pspark-${{ matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }}
137135
- name: Setup Iceberg
138-
uses: ./.github/actions/setup-iceberg-rust-builder
136+
uses: ./.github/actions/setup-iceberg-builder
139137
with:
140138
iceberg-version: ${{ matrix.iceberg-version.full }}
141-
- name: Run Iceberg Spark tests (Rust)
139+
- name: Run Iceberg Spark tests
142140
run: |
143141
cd apache-iceberg
144142
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
145143
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ matrix.spark-version.short }} -DscalaVersion=${{ matrix.scala-version }} -DflinkVersions= -DkafkaVersions= \
146144
:iceberg-spark:iceberg-spark-${{ matrix.spark-version.short }}_${{ matrix.scala-version }}:test \
147145
-Pquick=true -x javadoc
148146
149-
iceberg-spark-extensions-rust:
147+
iceberg-spark-extensions:
150148
needs: build-native
151-
if: contains(github.event.pull_request.title, '[iceberg]')
152149
strategy:
153150
matrix:
154151
os: [ubuntu-24.04]
@@ -157,7 +154,7 @@ jobs:
157154
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.8'}]
158155
scala-version: ['2.13']
159156
fail-fast: false
160-
name: iceberg-spark-extensions-rust/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
157+
name: iceberg-spark-extensions/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
161158
runs-on: ${{ matrix.os }}
162159
container:
163160
image: amd64/rust
@@ -179,20 +176,19 @@ jobs:
179176
run: |
180177
./mvnw install -Prelease -DskipTests -Pspark-${{ matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }}
181178
- name: Setup Iceberg
182-
uses: ./.github/actions/setup-iceberg-rust-builder
179+
uses: ./.github/actions/setup-iceberg-builder
183180
with:
184181
iceberg-version: ${{ matrix.iceberg-version.full }}
185-
- name: Run Iceberg Spark extensions tests (Rust)
182+
- name: Run Iceberg Spark extensions tests
186183
run: |
187184
cd apache-iceberg
188185
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
189186
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ matrix.spark-version.short }} -DscalaVersion=${{ matrix.scala-version }} -DflinkVersions= -DkafkaVersions= \
190187
:iceberg-spark:iceberg-spark-extensions-${{ matrix.spark-version.short }}_${{ matrix.scala-version }}:test \
191188
-Pquick=true -x javadoc
192189
193-
iceberg-spark-runtime-rust:
190+
iceberg-spark-runtime:
194191
needs: build-native
195-
if: contains(github.event.pull_request.title, '[iceberg]')
196192
strategy:
197193
matrix:
198194
os: [ubuntu-24.04]
@@ -201,7 +197,7 @@ jobs:
201197
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.8'}]
202198
scala-version: ['2.13']
203199
fail-fast: false
204-
name: iceberg-spark-runtime-rust/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
200+
name: iceberg-spark-runtime/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
205201
runs-on: ${{ matrix.os }}
206202
container:
207203
image: amd64/rust
@@ -223,10 +219,10 @@ jobs:
223219
run: |
224220
./mvnw install -Prelease -DskipTests -Pspark-${{ matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }}
225221
- name: Setup Iceberg
226-
uses: ./.github/actions/setup-iceberg-rust-builder
222+
uses: ./.github/actions/setup-iceberg-builder
227223
with:
228224
iceberg-version: ${{ matrix.iceberg-version.full }}
229-
- name: Run Iceberg Spark runtime tests (Rust)
225+
- name: Run Iceberg Spark runtime tests
230226
run: |
231227
cd apache-iceberg
232228
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups

docs/source/contributor-guide/iceberg-spark-tests.md

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ Clone Apache Iceberg locally and apply the diff file from Comet against the matc
5151
git clone [email protected]:apache/iceberg.git apache-iceberg
5252
cd apache-iceberg
5353
git checkout apache-iceberg-1.8.1
54-
git apply ../datafusion-comet/dev/diffs/iceberg-rust/1.8.1.diff
54+
git apply ../datafusion-comet/dev/diffs/iceberg/1.8.1.diff
5555
```
5656

5757
## 3. Run Iceberg Spark Tests
@@ -64,9 +64,11 @@ ENABLE_COMET=true ./gradlew -DsparkVersions=3.5 -DscalaVersion=2.13 -DflinkVersi
6464

6565
The three Gradle targets tested in CI are:
6666

67-
- `:iceberg-spark:iceberg-spark-<sparkVersion>_<scalaVersion>:test`
68-
- `:iceberg-spark:iceberg-spark-extensions-<sparkVersion>_<scalaVersion>:test`
69-
- `:iceberg-spark:iceberg-spark-runtime-<sparkVersion>_<scalaVersion>:integrationTest`
67+
| Gradle Target | What It Covers |
68+
| --------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
69+
| `iceberg-spark-<ver>:test` | Core read/write paths (Parquet, Avro, ORC, vectorized), scan operations, filtering, bloom filters, runtime filtering, deletion handling, structured streaming, DDL/DML (create/alter/drop, writes, deletes), filter and aggregate pushdown, actions (snapshot expiration, file rewriting, orphan cleanup, table migration), serialization, and data format conversions. |
70+
| `iceberg-spark-extensions-<ver>:test` | SQL extensions: stored procedures (migrate, snapshot, cherrypick, rollback, rewrite-data-files, rewrite-manifests, expire-snapshots, remove-orphan-files, etc.), row-level operations (copy-on-write and merge-on-read update/delete/merge), DDL extensions (branches, tags, alter schema, partition fields), changelog tables/views, metadata tables, and views. |
71+
| `iceberg-spark-runtime-<ver>:integrationTest` | A single smoke test (`SmokeTest.java`) that validates the shaded runtime JAR. The `spark-runtime` module has no main source — it packages Iceberg and all dependencies into a shaded uber-JAR. The smoke test exercises basic create, insert, merge, query, partition field, and sort order operations to confirm the shaded JAR works end-to-end. |
7072

7173
## Updating Diffs
7274

@@ -76,14 +78,14 @@ regenerate:
7678
```shell
7779
cd apache-iceberg
7880
git reset --hard apache-iceberg-1.8.1 && git clean -fd
79-
git apply ../datafusion-comet/dev/diffs/iceberg-rust/1.8.1.diff
81+
git apply ../datafusion-comet/dev/diffs/iceberg/1.8.1.diff
8082

8183
# Make changes, then run spotless to fix formatting
8284
./gradlew spotlessApply
8385

8486
# Stage any new or deleted files, then generate the diff
8587
git add -A
86-
git diff apache-iceberg-1.8.1 > ../datafusion-comet/dev/diffs/iceberg-rust/1.8.1.diff
88+
git diff apache-iceberg-1.8.1 > ../datafusion-comet/dev/diffs/iceberg/1.8.1.diff
8789
```
8890

8991
Repeat for each Iceberg version (1.8.1, 1.9.1, 1.10.0). The file contents differ between versions, so each
@@ -93,4 +95,4 @@ diff must be generated against its own tag.
9395

9496
The `iceberg_spark_test.yml` workflow applies these diffs and runs the three Gradle targets above against
9597
each Iceberg version. The test matrix covers Spark 3.4 and 3.5 across Iceberg 1.8.1, 1.9.1, and 1.10.0
96-
with Java 11 and 17. The workflow only runs when the PR title contains `[iceberg]`.
98+
with Java 11 and 17. The workflow runs on all pull requests and pushes to the main branch.

docs/source/contributor-guide/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Adding a New Expression <adding_a_new_expression>
3737
Tracing <tracing>
3838
Profiling Native Code <profiling_native_code>
3939
Spark SQL Tests <spark-sql-tests.md>
40+
Iceberg Spark Tests <iceberg-spark-tests.md>
4041
SQL File Tests <sql-file-tests.md>
4142
Roadmap <roadmap.md>
4243
Release Process <release_process>

0 commit comments

Comments
 (0)