-
-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathDockerfile
More file actions
295 lines (240 loc) · 11.5 KB
/
Dockerfile
File metadata and controls
295 lines (240 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# syntax=docker/dockerfile:1.20.0@sha256:26147acbda4f14c5add9946e2fd2ed543fc402884fd75146bd342a7f6271dc1d
# Disabled error checks:
# - SecretsUsedInArgOrEnv : OPA_AUTH_MANAGER is a false positive and breaks the build.
# check=error=true;skip=InvalidDefaultArgInFrom,SecretsUsedInArgOrEnv
ARG GIT_SYNC_VERSION
# For updated versions check https://github.com/kubernetes/git-sync/releases
# which should contain a image location (e.g. registry.k8s.io/git-sync/git-sync:v3.6.8)
FROM oci.stackable.tech/sdp/git-sync/git-sync:${GIT_SYNC_VERSION} AS gitsync-image
FROM local-image/shared/statsd-exporter AS statsd_exporter-builder
FROM local-image/vector AS opa-auth-manager-builder
ARG OPA_AUTH_MANAGER
ARG PYTHON_VERSION
ARG UV_VERSION
COPY airflow/opa-auth-manager/${OPA_AUTH_MANAGER} /tmp/opa-auth-manager
WORKDIR /tmp/opa-auth-manager
RUN <<EOF
microdnf update
microdnf install python${PYTHON_VERSION}-pip
microdnf clean all
pip${PYTHON_VERSION} install --no-cache-dir uv==${UV_VERSION}
# This folder is required by the tests to set up an sqlite database
mkdir /root/airflow
# Warnings are disabled because they come from various third party testing libraries
# that we have no control over.
uv run pytest --disable-warnings
uv build
EOF
FROM local-image/stackable-devel AS airflow-build-image
ARG PRODUCT_VERSION
ARG PYTHON_VERSION
ARG TARGETARCH
ARG STACKABLE_USER_UID
ARG NODEJS_VERSION
ARG S3FS_VERSION
ARG CYCLONEDX_BOM_VERSION
ARG UV_VERSION
# Airflow "extras" packages are listed here: https://airflow.apache.org/docs/apache-airflow/stable/extra-packages-ref.html
# They evolve over time and thus belong to the version-specific arguments.
# The mysql provider is currently excluded.
# Requires implementation of https://github.com/apache/airflow/blob/main/scripts/docker/install_mysql.sh
# The providers are split into separate lists to make it easier to manage
# (and to compare to the online links). Default values are provided for
# backwards compatability.
ARG AIRFLOW_EXTRAS_CORE=""
ARG AIRFLOW_EXTRAS_META=""
ARG AIRFLOW_EXTRAS_PROVIDER_APACHE=""
ARG AIRFLOW_EXTRAS_EXTERNAL_SERVICES=""
ARG AIRFLOW_EXTRAS_LOCALLY_INSTALLED_SOFTWARE=""
ARG AIRFLOW_EXTRAS_OTHER=""
RUN microdnf module enable -y nodejs:${NODEJS_VERSION} && \
microdnf update && \
microdnf install \
cyrus-sasl-devel \
# Needed for kerberos
cyrus-sasl-gssapi \
krb5-devel\
# Needed by ./configure to build gevent, see snippet [1] at the end of file
diffutils \
# Needed to build gevent, see snippet [1] at the end of file
make \
gcc \
gcc-c++ \
libpq-devel \
openldap-devel \
openssl-devel \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-devel \
python${PYTHON_VERSION}-pip \
python${PYTHON_VERSION}-wheel \
# The airflow odbc provider can compile without the development files (headers and libraries) (see https://github.com/stackabletech/docker-images/pull/683)
unixODBC \
# Needed for Airflow UI assets
npm \
nodejs \
# Needed to modify the SBOM
jq \
# Needed to create the source code snapshot
tar && \
microdnf clean all && \
rm -rf /var/cache/yum
COPY airflow/stackable/constraints/${PRODUCT_VERSION}/constraints-python${PYTHON_VERSION}.txt /tmp/constraints.txt
COPY airflow/stackable/constraints/${PRODUCT_VERSION}/build-constraints-python${PYTHON_VERSION}.txt /tmp/build-constraints.txt
COPY --from=opa-auth-manager-builder /tmp/opa-auth-manager/dist/opa_auth_manager-0.1.0-py3-none-any.whl /tmp/
COPY --chown=${STACKABLE_USER_UID}:0 airflow/stackable/patches/patchable.toml /stackable/src/airflow/stackable/patches/patchable.toml
COPY --chown=${STACKABLE_USER_UID}:0 airflow/stackable/patches/${PRODUCT_VERSION} /stackable/src/airflow/stackable/patches/${PRODUCT_VERSION}
WORKDIR /stackable
RUN <<EOF
# Compose comma-delimited AIRFLOW_EXTRAS
AIRFLOW_EXTRAS="$AIRFLOW_EXTRAS_CORE,$AIRFLOW_EXTRAS_META,$AIRFLOW_EXTRAS_PROVIDER_APACHE,$AIRFLOW_EXTRAS_EXTERNAL_SERVICES,$AIRFLOW_EXTRAS_LOCALLY_INSTALLED_SOFTWARE,$AIRFLOW_EXTRAS_OTHER"
# Removing duplicates
AIRFLOW_EXTRAS=$(echo "$AIRFLOW_EXTRAS" | tr ',' '\n' | awk 'NF > 0 {if (!seen[$0]++) print $0}' | tr '\n' ',' | sed 's/,$//')
python${PYTHON_VERSION} -m venv --system-site-packages /stackable/app
source /stackable/app/bin/activate
# Upgrade pip to the latest version
# Also install uv to get support for build constraints
pip install --no-cache-dir --upgrade pip
pip install --no-cache-dir uv==${UV_VERSION}
uv tool install hatch
cd "$(/stackable/patchable --images-repo-root=src checkout airflow ${PRODUCT_VERSION})"
tar -czf /stackable/airflow-${PRODUCT_VERSION}-src.tar.gz .
if [ -d "./airflow-core" ]; then
# Airflow 3.x
cd airflow-core/src/airflow/ui
# build front-end assets
npm install -g [email protected]
pnpm install --frozen-lockfile
pnpm run build
# build airflow wheel from airflow root folder
# this picks up the UI assets from the pnpm build, and the dependencies from the root folder
cd ../../..
/root/.local/bin/hatch build -t wheel
# First install the full apache-airflow package to get all dependencies including database drivers
uv pip install --no-cache-dir apache-airflow[${AIRFLOW_EXTRAS}]==${PRODUCT_VERSION} --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
# Then install the locally built core wheel to override the core package
uv pip install --no-cache-dir dist/apache_airflow_core-${PRODUCT_VERSION}-py3-none-any.whl[${AIRFLOW_EXTRAS}] --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
else
# Airflow 2.x
# build front-end assets
cd airflow/www
npm install -g [email protected]
yarn install --frozen-lockfile
yarn run build
# build airflow wheel from airflow root folder
cd ../..
/root/.local/bin/hatch build -t wheel
# First install the full apache-airflow package to get all dependencies including database drivers
uv pip install --no-cache-dir apache-airflow[${AIRFLOW_EXTRAS}]==${PRODUCT_VERSION} --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
# Then install the locally built wheel to override with patched version
uv pip install --no-cache-dir dist/apache_airflow-${PRODUCT_VERSION}-py3-none-any.whl[${AIRFLOW_EXTRAS}] --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
fi
# Needed for pandas S3 integration to e.g. write and read csv and parquet files to/from S3
uv pip install --no-cache-dir s3fs==${S3FS_VERSION} cyclonedx-bom==${CYCLONEDX_BOM_VERSION}
# Needed for OIDC
uv pip install --no-cache-dir Flask_OIDC==2.2.0 Flask-OpenID==1.3.1
uv pip install --no-cache-dir /tmp/opa_auth_manager-0.1.0-py3-none-any.whl
# Create the SBOM for Airflow
# Important: All `pip install` commands must be above this line, otherwise the SBOM will be incomplete
cyclonedx-py environment --schema-version 1.5 --outfile /tmp/sbom.json
uv pip uninstall cyclonedx-bom
# Break circular dependencies by removing the apache-airflow dependency from the providers
jq '.dependencies |= map(if .ref | test("^apache-airflow-providers-") then
.dependsOn |= map(select(. != "apache-airflow=='${PRODUCT_VERSION}'"))
else
.
end)' /tmp/sbom.json > /stackable/app/airflow-${PRODUCT_VERSION}.cdx.json
# Clean up build artifacts and temporary files to reduce image size
cd /stackable
rm -rf ./src
EOF
RUN <<EOF
mkdir -pv /stackable/airflow
mkdir -pv /stackable/airflow/dags
mkdir -pv /stackable/airflow/logs
chmod --recursive g=u /stackable
EOF
FROM local-image/vector AS airflow-main-image
ARG PRODUCT_VERSION
ARG PYTHON_VERSION
ARG RELEASE_VERSION
ARG TINI_VERSION
ARG TARGETARCH
ARG SHARED_STATSD_EXPORTER_VERSION
ARG STACKABLE_USER_UID
LABEL name="Apache Airflow" \
maintainer="[email protected]" \
vendor="Stackable GmbH" \
version="${PRODUCT_VERSION}" \
release="${RELEASE_VERSION}" \
summary="The Stackable image for Apache Airflow." \
description="This image is deployed by the Stackable Operator for Apache Airflow."
ENV HOME=/stackable
ENV AIRFLOW_USER_HOME_DIR=/stackable
ENV PATH=$PATH:/bin:$HOME/app/bin
ENV AIRFLOW_HOME=$HOME/airflow
COPY --from=airflow-build-image --chown=${STACKABLE_USER_UID}:0 /stackable/ ${HOME}/
COPY --chown=${STACKABLE_USER_UID}:0 airflow/stackable/utils/entrypoint.sh /entrypoint.sh
COPY --chown=${STACKABLE_USER_UID}:0 airflow/stackable/utils/run-airflow.sh /run-airflow.sh
COPY --from=statsd_exporter-builder --chown=${STACKABLE_USER_UID}:0 /statsd_exporter/statsd_exporter ${HOME}/statsd_exporter
COPY --from=statsd_exporter-builder --chown=${STACKABLE_USER_UID}:0 /statsd_exporter/statsd_exporter-${SHARED_STATSD_EXPORTER_VERSION}.cdx.json ${HOME}/statsd_exporter-${SHARED_STATSD_EXPORTER_VERSION}.cdx.json
COPY --from=gitsync-image --chown=${STACKABLE_USER_UID}:0 /git-sync ${HOME}/git-sync
COPY airflow/licenses /licenses
# Update image and install needed packages
RUN <<EOF
microdnf update
# git: Needed for the gitsync functionality
# openldap: Needed for authentication of clients against LDAP servers
# openssh-clients: We need the openssh libs for the gitsync functionality (the clone target could be e.g. [email protected]:org/repo.git)
# python: Airflow needs Python
microdnf install \
ca-certificates \
cyrus-sasl \
git \
libpq \
openldap \
openldap-clients \
openssh-clients \
openssl-libs \
openssl-pkcs11 \
python${PYTHON_VERSION} \
socat \
unixODBC
microdnf clean all
rm -rf /var/cache/yum
# Get the correct `tini` binary for our architecture.
# It is used as an init alternative in the entrypoint
curl -o /usr/bin/tini "https://repo.stackable.tech/repository/packages/tini/tini-${TINI_VERSION}-${TARGETARCH}"
# fix missing permissions
chmod a+x /entrypoint.sh
chmod a+x /run-airflow.sh
chmod +x /usr/bin/tini
chmod g=u /stackable/statsd_exporter ${HOME}/statsd_exporter-${SHARED_STATSD_EXPORTER_VERSION}.cdx.json ${HOME}/git-sync
EOF
# ----------------------------------------
# Checks
# This section is to run final checks to ensure the created final images
# adhere to several minimal requirements like:
# - check file permissions and ownerships
# ----------------------------------------
# Check that permissions and ownership in ${HOME} are set correctly
# This will fail and stop the build if any mismatches are found.
RUN <<EOF
/bin/check-permissions-ownership.sh ${HOME} ${STACKABLE_USER_UID} 0
EOF
# ----------------------------------------
# Attention: Do not perform any file based actions (copying/creating etc.) below this comment because the permissions would not be checked.
# ----------------------------------------
USER ${STACKABLE_USER_UID}
WORKDIR /stackable
ENTRYPOINT ["/usr/bin/tini", "--", "/run-airflow.sh"]
CMD []
# SNIPPET 1
# 137.0 Running '(cd "/tmp/pip-install-cyuymnu6/gevent_0f8b4d282c464210b62acdf399e4a04c/deps/libev" && sh ./configure -C > configure-output.txt )' in /tmp/pip-install-cyuymnu6/gevent_0f8b4d282c464210b62acdf399e4a04c
# 137.0 ./configure: line 6350: cmp: command not found
# 137.0 ./configure: line 6350: cmp: command not found
# 137.0 ./configure: line 8279: diff: command not found
# 137.0 config.status: error: in `/tmp/pip-install-cyuymnu6/gevent_0f8b4d282c464210b62acdf399e4a04c/deps/libev':
# 137.0 config.status: error: Something went wrong bootstrapping makefile fragments
# 137.0 for automatic dependency tracking. Try re-running configure with the
# 137.0 '--disable-dependency-tracking' option to at least be able to build
# 137.0 the package (albeit without support for automatic dependency tracking).