-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjvm_image_layers.bzl
More file actions
443 lines (396 loc) · 16.7 KB
/
jvm_image_layers.bzl
File metadata and controls
443 lines (396 loc) · 16.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
"""Rules for converting JVM binaries into layered container tarballs.
Two strategies are provided:
- jvm_image_layers: Explodes a deploy jar into loose files (fast, but loses
duplicate resources like reference.conf).
- jvm_jar_layers: Keeps individual dependency JARs intact in the container
(preserves all resources, identical runtime behavior to bazel run).
"""
MavenDepsInfo = provider(
doc = "Collects maven artifact IDs from jvm_import dependencies.",
fields = {
"artifacts": "depset of artifact ID strings (group:name)",
},
)
def _maven_deps_aspect_impl(target, ctx):
artifacts = []
# Check tags for maven_coordinates.
if hasattr(ctx.rule.attr, "tags"):
for tag in ctx.rule.attr.tags:
if tag.startswith("maven_coordinates="):
coord = tag[len("maven_coordinates="):]
parts = coord.split(":")
if len(parts) >= 2:
artifact_id = parts[0] + ":" + parts[1]
artifacts.append(artifact_id)
# Collect from transitive deps.
transitive = []
for attr_name in ("deps", "exports", "runtime_deps"):
if hasattr(ctx.rule.attr, attr_name):
for dep in getattr(ctx.rule.attr, attr_name):
if MavenDepsInfo in dep:
transitive.append(dep[MavenDepsInfo].artifacts)
return [MavenDepsInfo(
artifacts = depset(direct = artifacts, transitive = transitive),
)]
_maven_deps_aspect = aspect(
implementation = _maven_deps_aspect_impl,
attr_aspects = ["deps", "exports", "runtime_deps"],
)
def _sanitize_prefix(prefix):
"""Convert a path prefix to a safe filename component."""
return prefix.replace("/", "_").strip("_")
def _sanitize_artifact_id(artifact_id):
"""Convert an artifact ID to a safe filename component."""
return artifact_id.replace(":", "_")
def _group_key(artifact_id, depth):
"""Extract a grouping key from an artifact ID at the given depth.
For artifact "com.google.guava:guava":
depth=None -> "com.google.guava" (full group ID)
depth=2 -> "com.google"
depth=1 -> "com"
Args:
artifact_id: string like "com.google.guava:guava"
depth: number of dot-segments to keep, or None for full group ID
Returns:
grouping key string
"""
group_id = artifact_id.split(":")[0]
if depth == None:
return group_id
parts = group_id.split(".")
if depth >= len(parts):
return group_id
return ".".join(parts[:depth])
def _group_artifacts(artifact_ids, max_groups):
"""Group artifacts by progressively shorter Maven group prefixes until under max_groups.
Args:
artifact_ids: list of artifact ID strings
max_groups: maximum number of groups allowed
Returns:
list of (group_name, [artifact_id, ...]) tuples
"""
if len(artifact_ids) <= max_groups:
return [(aid, [aid]) for aid in artifact_ids]
# Start with full group ID, then progressively shorten.
# depth=None means full group ID, then 2, 1.
for depth in [None, 3, 2, 1]:
groups = {}
group_order = []
for aid in sorted(artifact_ids):
key = _group_key(aid, depth)
if key not in groups:
groups[key] = []
group_order.append(key)
groups[key].append(aid)
if len(group_order) <= max_groups:
return [(key, groups[key]) for key in group_order]
# Final fallback: merge everything into one group.
if max_groups >= 1:
return [("all", sorted(artifact_ids))]
# max_groups is 0: no artifact layers at all.
return []
def jvm_image_layers(
name,
binary,
layers = [],
maven_lock_file = None,
max_layers = 121,
layer_strategy = "group_by_prefix",
app_prefix = "/app",
path_prefix = "app/",
**kwargs):
"""Creates layered tarballs from a java_binary or scala_binary deploy jar.
Args:
name: target name
binary: label of a java_binary or scala_binary target
layers: list of path prefix strings; entries matching a prefix go into
a separate tar layer. Unmatched entries go to the fallback tar.
maven_lock_file: optional label of a maven lock file JSON. When set,
the aspect collects maven artifact IDs from deps and the tool
creates per-artifact tar layers using package prefixes from the
lock file.
max_layers: maximum number of artifact layers to generate (default 121).
Does not count explicit layers or the fallback tar.
layer_strategy: strategy when artifacts exceed max_layers.
"truncate": keep first N artifacts alphabetically, rest go to fallback.
"group_by_prefix": group artifacts by Maven group ID prefix (default).
app_prefix: classpath prefix inside the container (default "/app").
path_prefix: prefix prepended to tar entry paths (default "app/").
**kwargs: additional arguments passed to the underlying rule
"""
if ":" in binary:
pkg, _, target_name = binary.rpartition(":")
deploy_jar = pkg + ":" + target_name + "_deploy.jar"
else:
deploy_jar = binary + "_deploy.jar"
_jvm_image_layers(
name = name,
binary = binary,
deploy_jar = deploy_jar,
layers = layers,
maven_lock_file = maven_lock_file,
max_layers = max_layers,
layer_strategy = layer_strategy,
app_prefix = app_prefix,
path_prefix = path_prefix,
**kwargs
)
def _jvm_image_layers_impl(ctx):
deploy_jar = ctx.file.deploy_jar
outputs = []
inputs = [deploy_jar]
args = ctx.actions.args()
args.add("--input", deploy_jar)
# Entrypoint shell script.
entrypoint = ctx.actions.declare_file(ctx.label.name + "_entrypoint.sh")
args.add("--entrypoint", entrypoint)
args.add("--app_prefix", ctx.attr.app_prefix)
args.add("--path_prefix", ctx.attr.path_prefix)
# Fallback output tar (entries not matching any layer or artifact prefix).
fallback = ctx.actions.declare_file(ctx.label.name + ".tar")
args.add("--output", fallback)
outputs.append(fallback)
# Per-layer output tars (explicit prefix layers).
for prefix in ctx.attr.layers:
sanitized = _sanitize_prefix(prefix)
layer_out = ctx.actions.declare_file(ctx.label.name + "." + sanitized + ".tar")
args.add("--output_layer", prefix + "=" + layer_out.path)
outputs.append(layer_out)
# Maven artifact layers via aspect.
if ctx.file.maven_lock_file:
lock_file = ctx.file.maven_lock_file
inputs.append(lock_file)
args.add("--maven_lock_file", lock_file)
artifact_ids = sorted(ctx.attr.binary[MavenDepsInfo].artifacts.to_list())
available_slots = ctx.attr.max_layers - len(ctx.attr.layers)
strategy = ctx.attr.layer_strategy
if len(artifact_ids) <= available_slots:
# Under the limit: one layer per artifact.
for artifact_id in artifact_ids:
sanitized = _sanitize_artifact_id(artifact_id)
artifact_out = ctx.actions.declare_file(ctx.label.name + ".maven." + sanitized + ".tar")
args.add("--artifact", artifact_id + "=" + artifact_out.path)
outputs.append(artifact_out)
elif strategy == "truncate":
# Truncate: first N artifacts get layers, rest fall to fallback.
for artifact_id in artifact_ids[:available_slots]:
sanitized = _sanitize_artifact_id(artifact_id)
artifact_out = ctx.actions.declare_file(ctx.label.name + ".maven." + sanitized + ".tar")
args.add("--artifact", artifact_id + "=" + artifact_out.path)
outputs.append(artifact_out)
elif strategy == "group_by_prefix":
# Group by Maven group prefix.
groups = _group_artifacts(artifact_ids, available_slots)
for group_name, group_ids in groups:
sanitized = _sanitize_artifact_id(group_name)
group_out = ctx.actions.declare_file(ctx.label.name + ".maven." + sanitized + ".tar")
if len(group_ids) == 1:
args.add("--artifact", group_ids[0] + "=" + group_out.path)
else:
args.add("--artifact_group", ",".join(group_ids) + "=" + group_out.path)
outputs.append(group_out)
ctx.actions.run(
inputs = inputs,
outputs = outputs + [entrypoint],
executable = ctx.executable._tool,
arguments = [args],
mnemonic = "JvmImageLayers",
progress_message = "Splitting deploy jar into layers: %s" % ctx.label,
)
return [
DefaultInfo(files = depset(outputs)),
OutputGroupInfo(
entrypoint = depset([entrypoint]),
),
]
_jvm_image_layers = rule(
implementation = _jvm_image_layers_impl,
attrs = {
"binary": attr.label(
mandatory = True,
aspects = [_maven_deps_aspect],
doc = "The java_binary or scala_binary target.",
),
"deploy_jar": attr.label(
mandatory = True,
allow_single_file = [".jar"],
doc = "The _deploy.jar implicit output of the java_ or scala_binary.",
),
"layers": attr.string_list(
default = [],
doc = "Path prefixes for layer splitting. Each prefix gets its own output tar.",
),
"maven_lock_file": attr.label(
allow_single_file = [".json"],
doc = "Maven lock file JSON for artifact-based layer splitting.",
),
"max_layers": attr.int(
default = 121,
doc = "Maximum number of artifact layers. Does not count explicit layers or fallback.",
),
"layer_strategy": attr.string(
default = "group_by_prefix",
values = ["truncate", "group_by_prefix"],
doc = "Strategy when artifacts exceed max_layers: 'truncate' or 'group_by_prefix'.",
),
"app_prefix": attr.string(
doc = "Classpath prefix inside the container.",
mandatory = True,
),
"path_prefix": attr.string(
default = "app/",
doc = "Path prefix prepended to tar entry names.",
),
"_tool": attr.label(
default = "//cmd/executable_jar_splitter",
executable = True,
cfg = "exec",
doc = "The executable_jar_splitter Go binary.",
),
},
)
# ---------------------------------------------------------------------------
# jvm_jar_layers: Keep individual JARs intact (preserves reference.conf etc.)
# ---------------------------------------------------------------------------
def jvm_jar_layers(
name,
binary,
maven_lock_file = None,
max_layers = 121,
layer_strategy = "group_by_prefix",
app_prefix = "/app/lib",
path_prefix = "app/lib/",
**kwargs):
"""Creates layered tarballs containing individual dependency JARs.
Unlike jvm_image_layers which explodes the deploy jar into loose files,
this rule preserves each dependency JAR intact. This avoids resource
merge conflicts (reference.conf, META-INF/services/*) that occur when
singlejar merges duplicate entries.
The container classpath uses Java's @file syntax to reference a classpath
file listing all JARs.
Args:
name: target name
binary: label of a java_binary or scala_binary target
maven_lock_file: optional label of a maven lock file JSON for
artifact-based layer grouping.
max_layers: maximum number of artifact layers (default 121).
layer_strategy: strategy when artifacts exceed max_layers.
app_prefix: classpath prefix inside the container (default "/app/lib").
path_prefix: prefix prepended to tar entry paths (default "app/lib/").
**kwargs: additional arguments passed to the underlying rule
"""
_jvm_jar_layers(
name = name,
binary = binary,
maven_lock_file = maven_lock_file,
max_layers = max_layers,
layer_strategy = layer_strategy,
app_prefix = app_prefix,
path_prefix = path_prefix,
**kwargs
)
def _jvm_jar_layers_impl(ctx):
# Collect all runtime JARs from the binary's JavaInfo.
runtime_jars = ctx.attr.binary[JavaInfo].transitive_runtime_jars.to_list()
# Write a file listing all JAR paths for the tool to read.
jar_list = ctx.actions.declare_file(ctx.label.name + "_jars.txt")
ctx.actions.write(
output = jar_list,
content = "\n".join([jar.path for jar in runtime_jars]),
)
tar_outputs = []
inputs = list(runtime_jars) + [jar_list]
args = ctx.actions.args()
args.add("--jar_list", jar_list)
args.add("--app_prefix", ctx.attr.app_prefix)
args.add("--path_prefix", ctx.attr.path_prefix)
# Classpath file (not a tar — kept separate from tar outputs).
classpath_file = ctx.actions.declare_file(ctx.label.name + "_classpath")
args.add("--classpath", classpath_file)
# Fallback output tar.
fallback = ctx.actions.declare_file(ctx.label.name + ".tar")
args.add("--fallback", fallback)
tar_outputs.append(fallback)
# Maven artifact layers via aspect.
if ctx.file.maven_lock_file:
lock_file = ctx.file.maven_lock_file
inputs.append(lock_file)
args.add("--maven_lock_file", lock_file)
artifact_ids = sorted(ctx.attr.binary[MavenDepsInfo].artifacts.to_list())
available_slots = ctx.attr.max_layers
strategy = ctx.attr.layer_strategy
if len(artifact_ids) <= available_slots:
for artifact_id in artifact_ids:
sanitized = _sanitize_artifact_id(artifact_id)
artifact_out = ctx.actions.declare_file(ctx.label.name + ".maven." + sanitized + ".tar")
args.add("--artifact_layer", artifact_id + "=" + artifact_out.path)
tar_outputs.append(artifact_out)
elif strategy == "truncate":
for artifact_id in artifact_ids[:available_slots]:
sanitized = _sanitize_artifact_id(artifact_id)
artifact_out = ctx.actions.declare_file(ctx.label.name + ".maven." + sanitized + ".tar")
args.add("--artifact_layer", artifact_id + "=" + artifact_out.path)
tar_outputs.append(artifact_out)
elif strategy == "group_by_prefix":
groups = _group_artifacts(artifact_ids, available_slots)
for group_name, group_ids in groups:
sanitized = _sanitize_artifact_id(group_name)
group_out = ctx.actions.declare_file(ctx.label.name + ".maven." + sanitized + ".tar")
if len(group_ids) == 1:
args.add("--artifact_layer", group_ids[0] + "=" + group_out.path)
else:
args.add("--artifact_group_layer", ",".join(group_ids) + "=" + group_out.path)
tar_outputs.append(group_out)
ctx.actions.run(
inputs = inputs,
outputs = tar_outputs + [classpath_file],
executable = ctx.executable._tool,
arguments = [args],
mnemonic = "JvmJarLayers",
progress_message = "Layering JARs: %s" % ctx.label,
)
# DefaultInfo only includes tar files — the classpath file is a plain text
# file and must not be passed to container_image's tars attribute.
return [
DefaultInfo(files = depset(tar_outputs)),
OutputGroupInfo(
classpath = depset([classpath_file]),
),
]
_jvm_jar_layers = rule(
implementation = _jvm_jar_layers_impl,
attrs = {
"binary": attr.label(
mandatory = True,
aspects = [_maven_deps_aspect],
doc = "The java_binary or scala_binary target.",
),
"maven_lock_file": attr.label(
allow_single_file = [".json"],
doc = "Maven lock file JSON for artifact-based layer grouping.",
),
"max_layers": attr.int(
default = 121,
doc = "Maximum number of artifact layers.",
),
"layer_strategy": attr.string(
default = "group_by_prefix",
values = ["truncate", "group_by_prefix"],
doc = "Strategy when artifacts exceed max_layers.",
),
"app_prefix": attr.string(
default = "/app/lib",
doc = "Classpath prefix inside the container.",
),
"path_prefix": attr.string(
default = "app/lib/",
doc = "Path prefix prepended to tar entry names.",
),
"_tool": attr.label(
default = "//cmd/jar_layerer",
executable = True,
cfg = "exec",
doc = "The jar_layerer Go binary.",
),
},
)