Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion TESTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,9 @@ defaults to the image used by the `gateway` stage in
`deploy/docker/Dockerfile.images`; set `OPENSHELL_E2E_GPU_PROBE_IMAGE` to
override it. Per-device checks run only for NVIDIA CDI device IDs reported by
the runtime's discovered devices list, so WSL2 hosts that expose only
`nvidia.com/gpu=all` skip the index-based cases.
`nvidia.com/gpu=all` skip the index-based cases. Exact CDI device selection is
passed through `--driver-config-json` with the active Docker or Podman driver
key.

Run the Docker-backed Rust CLI e2e suite:

Expand Down
8 changes: 0 additions & 8 deletions crates/openshell-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1215,12 +1215,6 @@ enum SandboxCommands {
#[arg(long)]
gpu: bool,

/// Target a driver-specific GPU device. Docker and Podman use CDI device IDs
/// (for example "nvidia.com/gpu=0"); VM uses a PCI BDF or index.
/// Only valid with --gpu. When omitted with --gpu, the driver uses its default GPU selection.
#[arg(long, requires = "gpu")]
gpu_device: Option<String>,

/// CPU limit for the sandbox (for example: 500m, 1, 2.5).
#[arg(long)]
cpu: Option<String>,
Expand Down Expand Up @@ -2556,7 +2550,6 @@ async fn main() -> Result<()> {
no_keep,
editor,
gpu,
gpu_device,
cpu,
memory,
driver_config_json,
Expand Down Expand Up @@ -2641,7 +2634,6 @@ async fn main() -> Result<()> {
&upload_specs,
keep,
gpu,
gpu_device.as_deref(),
cpu.as_deref(),
memory.as_deref(),
driver_config_json.as_deref(),
Expand Down
2 changes: 0 additions & 2 deletions crates/openshell-cli/src/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1725,7 +1725,6 @@ pub async fn sandbox_create(
uploads: &[(String, Option<String>, bool)],
keep: bool,
gpu: bool,
gpu_device: Option<&str>,
cpu: Option<&str>,
memory: Option<&str>,
driver_config_json: Option<&str>,
Expand Down Expand Up @@ -1817,7 +1816,6 @@ pub async fn sandbox_create(
let request = CreateSandboxRequest {
spec: Some(SandboxSpec {
gpu: requested_gpu,
gpu_device: gpu_device.unwrap_or_default().to_string(),
environment: environment.clone(),
policy,
providers: configured_providers,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,6 @@ async fn sandbox_create_keeps_command_sessions_by_default() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -827,7 +826,6 @@ async fn sandbox_create_sends_cpu_and_memory_limits_only() {
&[],
true,
false,
None,
Some("500m"),
Some("2Gi"),
None,
Expand Down Expand Up @@ -907,7 +905,6 @@ async fn sandbox_create_sends_driver_config_json() {
false,
None,
None,
None,
Some(r#"{"kubernetes":{"pod":{"priority_class_name":"batch-low"}}}"#),
None,
&[],
Expand Down Expand Up @@ -984,7 +981,6 @@ async fn sandbox_create_does_not_infer_command_providers_when_v2_enabled() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -1043,7 +1039,6 @@ async fn sandbox_create_returns_vm_error_without_waiting_for_timeout() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -1098,7 +1093,6 @@ async fn sandbox_create_keeps_waiting_while_vm_progress_arrives() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -1145,7 +1139,6 @@ async fn sandbox_create_times_out_when_only_logs_arrive() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -1188,7 +1181,6 @@ async fn sandbox_create_deletes_command_sessions_with_no_keep() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -1235,7 +1227,6 @@ async fn sandbox_create_deletes_shell_sessions_with_no_keep() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -1282,7 +1273,6 @@ async fn sandbox_create_keeps_sandbox_with_hidden_keep_flag() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -1329,7 +1319,6 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() {
None,
None,
None,
None,
&[],
None,
Some(openshell_core::forward::ForwardSpec::new(forward_port)),
Expand Down Expand Up @@ -1372,7 +1361,6 @@ async fn sandbox_create_sends_environment_variables() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down
4 changes: 4 additions & 0 deletions crates/openshell-core/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ pub enum ComputeDriverError {
/// The requested sandbox already exists.
#[error("sandbox already exists")]
AlreadyExists,
/// The request contains an invalid argument.
#[error("{0}")]
InvalidArgument(String),
/// A precondition for the operation was not met.
#[error("{0}")]
Precondition(String),
Expand All @@ -125,6 +128,7 @@ impl From<ComputeDriverError> for tonic::Status {
fn from(err: ComputeDriverError) -> Self {
match err {
ComputeDriverError::AlreadyExists => Self::already_exists("sandbox already exists"),
ComputeDriverError::InvalidArgument(m) => Self::invalid_argument(m),
ComputeDriverError::Precondition(m) => Self::failed_precondition(m),
ComputeDriverError::Message(m) => Self::internal(m),
}
Expand Down
33 changes: 21 additions & 12 deletions crates/openshell-core/src/gpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@
use crate::config::CDI_GPU_DEVICE_ALL;

/// Resolve the existing GPU request fields into CDI device identifiers.
/// Resolve a GPU request into CDI device identifiers.
///
/// `None` means no GPU was requested. A GPU request with no explicit device
/// ID uses the CDI all-GPU request; otherwise the driver-native ID passes
/// through unchanged.
/// `None` means no GPU was requested. A GPU request with no explicit CDI
/// devices uses the CDI all-GPU request; otherwise the driver-configured CDI
/// devices pass through unchanged.
#[must_use]
pub fn cdi_gpu_device_ids(gpu: bool, gpu_device: &str) -> Option<Vec<String>> {
pub fn cdi_gpu_device_ids(gpu: bool, cdi_devices: &[String]) -> Option<Vec<String>> {
gpu.then(|| {
if gpu_device.is_empty() {
if cdi_devices.is_empty() {
vec![CDI_GPU_DEVICE_ALL.to_string()]
Comment thread
alangou marked this conversation as resolved.
} else {
vec![gpu_device.to_string()]
cdi_devices.to_vec()
}
})
}
Expand All @@ -27,22 +27,31 @@ mod tests {

#[test]
fn cdi_gpu_device_ids_returns_none_when_absent() {
assert_eq!(cdi_gpu_device_ids(false, ""), None);
assert_eq!(cdi_gpu_device_ids(false, &[]), None);
}

#[test]
fn cdi_gpu_device_ids_defaults_empty_request_to_all_gpus() {
assert_eq!(
cdi_gpu_device_ids(true, ""),
cdi_gpu_device_ids(true, &[]),
Some(vec![CDI_GPU_DEVICE_ALL.to_string()])
);
}

#[test]
fn cdi_gpu_device_ids_passes_explicit_device_id_through() {
fn cdi_gpu_device_ids_passes_explicit_device_ids_through() {
assert_eq!(
cdi_gpu_device_ids(true, "nvidia.com/gpu=0"),
Some(vec!["nvidia.com/gpu=0".to_string()])
cdi_gpu_device_ids(
true,
&[
"nvidia.com/gpu=0".to_string(),
"nvidia.com/gpu=1".to_string()
]
),
Some(vec![
"nvidia.com/gpu=0".to_string(),
"nvidia.com/gpu=1".to_string()
])
);
}
}
75 changes: 75 additions & 0 deletions crates/openshell-core/src/proto_struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

//! Helpers for decoding `google.protobuf.Struct` values.

use serde::{Deserialize, Deserializer, de::Error as _};

/// Convert a protobuf Struct into a JSON object for typed serde decoding.
#[must_use]
pub fn struct_to_json_object(
Expand Down Expand Up @@ -38,3 +40,76 @@ pub fn value_to_json(value: &prost_types::Value) -> serde_json::Value {
Some(prost_types::value::Kind::NullValue(_)) | None => serde_json::Value::Null,
}
}

/// Deserialize a present field as a non-empty list of non-empty strings.
///
/// Use with `#[serde(default, deserialize_with = "...")]` on
/// `Option<Vec<String>>` fields. Missing fields use the option default; present
/// fields must be arrays and cannot be empty.
pub fn deserialize_optional_non_empty_string_list<'de, D>(
deserializer: D,
) -> Result<Option<Vec<String>>, D::Error>
where
D: Deserializer<'de>,
{
let values = Vec::<String>::deserialize(deserializer)?;
if values.is_empty() {
return Err(D::Error::custom("must be a non-empty list of strings"));
}

for (idx, value) in values.iter().enumerate() {
if value.trim().is_empty() {
return Err(D::Error::custom(format!(
"[{idx}] must be a non-empty string"
)));
}
}

Ok(Some(values))
}

#[cfg(test)]
mod tests {
use super::*;

#[derive(Debug, Default, Deserialize)]
#[serde(default)]
struct TestConfig {
#[serde(
default,
deserialize_with = "deserialize_optional_non_empty_string_list"
)]
devices: Option<Vec<String>>,
}

#[test]
fn optional_non_empty_string_list_defaults_when_absent() {
let config: TestConfig = serde_json::from_value(serde_json::json!({})).unwrap();

assert_eq!(config.devices, None);
}

#[test]
fn optional_non_empty_string_list_parses_present_list() {
let config: TestConfig =
serde_json::from_value(serde_json::json!({"devices": ["nvidia.com/gpu=0"]})).unwrap();

assert_eq!(config.devices, Some(vec!["nvidia.com/gpu=0".to_string()]));
}

#[test]
fn optional_non_empty_string_list_rejects_empty_list() {
let err =
serde_json::from_value::<TestConfig>(serde_json::json!({"devices": []})).unwrap_err();

assert!(err.to_string().contains("non-empty list"));
}

#[test]
fn optional_non_empty_string_list_rejects_empty_string() {
let err =
serde_json::from_value::<TestConfig>(serde_json::json!({"devices": [""]})).unwrap_err();

assert!(err.to_string().contains("non-empty string"));
}
}
1 change: 1 addition & 0 deletions crates/openshell-driver-docker/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ tempfile = "3"
url = { workspace = true }

[dev-dependencies]
prost-types = { workspace = true }
temp-env = "0.3"

[lints]
Expand Down
2 changes: 1 addition & 1 deletion crates/openshell-driver-docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ contract:
| `apparmor=unconfined` | Avoids Docker's default profile blocking required mount operations. |
| `restart_policy = unless-stopped` | Keeps managed sandboxes resumable across daemon or gateway restarts. |
| `PidsLimit` | Enforces the sandbox PID budget at the Docker cgroup layer. Set `[openshell.drivers.docker].sandbox_pids_limit = 0` to inherit the Docker/runtime default. |
| CDI GPU request | Uses the sandbox `gpu_device` value when set; otherwise requests all NVIDIA GPUs when the sandbox spec asks for GPU support and daemon CDI support is detected. |
| CDI GPU request | Uses `driver_config.cdi_devices` when set; otherwise requests all NVIDIA GPUs when the sandbox spec asks for GPU support and daemon CDI support is detected. |

The agent child process does not retain these supervisor privileges.

Expand Down
Loading
Loading