Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 173 additions & 20 deletions rust/crates/runtime/src/permission_enforcer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,33 +173,119 @@ impl PermissionEnforcer {
}
}

/// Simple workspace boundary check via string prefix.
/// Workspace boundary check.
///
/// Resolves `.` and `..` components lexically *before* comparing against the
/// workspace root, so that traversal sequences like `/workspace/../../etc`
/// cannot escape the sandbox via a naive string prefix match. Normalization is
/// lexical (it does not touch the filesystem) because the target path may not
/// exist yet on a write, and we must not depend on CWD.
fn is_within_workspace(path: &str, workspace_root: &str) -> bool {
let normalized = if path.starts_with('/') {
let combined = if path.starts_with('/') {
path.to_owned()
} else {
format!("{workspace_root}/{path}")
};

let root = if workspace_root.ends_with('/') {
workspace_root.to_owned()
let normalized = lexically_normalize(&combined);
let root = lexically_normalize(workspace_root);
let root_with_slash = if root.ends_with('/') {
root.clone()
} else {
format!("{workspace_root}/")
format!("{root}/")
};

normalized.starts_with(&root) || normalized == workspace_root.trim_end_matches('/')
normalized == root || normalized.starts_with(&root_with_slash)
}

/// Collapse `.` and `..` segments without consulting the filesystem.
/// `..` that would climb above an absolute root is clamped at `/`, so the
/// result can never be a prefix-match for a deeper workspace root.
fn lexically_normalize(path: &str) -> String {
let is_absolute = path.starts_with('/');
let mut stack: Vec<&str> = Vec::new();
for component in path.split('/') {
match component {
"" | "." => {}
".." => {
stack.pop();
}
other => stack.push(other),
}
}
let joined = stack.join("/");
if is_absolute {
format!("/{joined}")
} else {
joined
}
}

/// Conservative heuristic: is this bash command read-only?
///
/// Hardening notes:
/// - Any shell metacharacter that could chain, substitute, pipe, or redirect
/// into a state-changing command rejects the whole line. This blocks
/// `cat x; rm -rf y`, `cat x | sh`, `$(...)`, backticks, redirects, and
/// subshells regardless of the leading token.
/// - Language interpreters (`python`, `node`, `ruby`) and build drivers
/// (`cargo`, `rustc`) are NOT read-only: they execute arbitrary code, so they
/// are excluded from the allow-list.
/// - `git` is allowed only for a known set of non-mutating subcommands.
/// - `find` is rejected when it carries an action that can execute or delete.
///
/// Residual known gaps (documented, not yet closed): `sed`'s `w`/`e` script
/// commands and `awk`'s `system()` can still mutate — these require quoting or
/// metacharacters that the checks above usually catch, but a dedicated parser
/// would be more robust. Tracked as follow-up.
fn is_read_only_command(command: &str) -> bool {
let first_token = command
.split_whitespace()
// Shell metacharacters that enable command chaining, substitution,
// piping, redirection, or subshells. Presence of any of these means we
// cannot reason about the command from its leading token alone.
const SHELL_METACHARS: &[char] =
&[';', '|', '&', '$', '`', '>', '<', '(', ')', '{', '}', '\n'];
if command.contains(SHELL_METACHARS) {
return false;
}

let mut tokens = command.split_whitespace();
let first_token = tokens
.next()
.unwrap_or("")
.rsplit('/')
.next()
.unwrap_or("");

// `git` is only read-only for a curated set of subcommands.
if first_token == "git" {
let subcommand = tokens.next().unwrap_or("");
return matches!(
subcommand,
"status"
| "log"
| "diff"
| "show"
| "branch"
| "rev-parse"
| "ls-files"
| "blame"
| "describe"
| "tag"
| "remote"
);
}

// `find` can execute or delete via actions; reject those forms.
if first_token == "find"
&& (command.contains("-exec")
|| command.contains("-execdir")
|| command.contains("-delete")
|| command.contains("-ok")
|| command.contains("-fprintf"))
{
return false;
}

matches!(
first_token,
"cat"
Expand Down Expand Up @@ -237,8 +323,6 @@ fn is_read_only_command(command: &str) -> bool {
| "tr"
| "cut"
| "paste"
| "tee"
| "xargs"
| "test"
| "true"
| "false"
Expand All @@ -257,18 +341,8 @@ fn is_read_only_command(command: &str) -> bool {
| "tree"
| "jq"
| "yq"
| "python3"
| "python"
| "node"
| "ruby"
| "cargo"
| "rustc"
| "git"
| "gh"
) && !command.contains("-i ")
&& !command.contains("--in-place")
&& !command.contains(" > ")
&& !command.contains(" >> ")
}

#[cfg(test)]
Expand Down Expand Up @@ -375,6 +449,85 @@ mod tests {
assert!(!is_read_only_command("sed -i 's/a/b/' file"));
}

// --- Hardening regression tests (#2: read-only bypasses) ---

#[test]
fn read_only_rejects_command_chaining() {
// A leading read-only token must not launder a trailing destructive one.
assert!(!is_read_only_command("cat foo; rm -rf bar"));
assert!(!is_read_only_command("cat foo && rm -rf bar"));
assert!(!is_read_only_command("ls || rm bar"));
assert!(!is_read_only_command("cat foo | sh"));
assert!(!is_read_only_command("echo `rm bar`"));
assert!(!is_read_only_command("echo $(rm bar)"));
assert!(!is_read_only_command("echo x>file")); // redirect without spaces
}

#[test]
fn read_only_rejects_interpreters_and_build_drivers() {
// These execute arbitrary code and are no longer read-only.
assert!(!is_read_only_command(
"python3 -c \"import os; os.system('rm -rf .')\""
));
assert!(!is_read_only_command("python script.py"));
assert!(!is_read_only_command("node app.js"));
assert!(!is_read_only_command("ruby x.rb"));
assert!(!is_read_only_command("cargo run"));
assert!(!is_read_only_command("rustc evil.rs"));
}

#[test]
fn read_only_gates_git_subcommands() {
// Read-only git subcommands remain allowed...
assert!(is_read_only_command("git status"));
assert!(is_read_only_command("git diff HEAD~1"));
assert!(is_read_only_command("git show abc123"));
// ...but mutating/exfiltrating ones are rejected.
assert!(!is_read_only_command("git commit -m x"));
assert!(!is_read_only_command("git push origin main"));
assert!(!is_read_only_command("git reset --hard"));
assert!(!is_read_only_command("git clean -fd"));
assert!(!is_read_only_command("git config user.email a@b.c"));
}

#[test]
fn read_only_rejects_find_actions() {
assert!(is_read_only_command("find . -name Cargo.toml"));
assert!(!is_read_only_command("find . -delete"));
// -exec uses braces/semicolon which also trip the metachar guard,
// but the explicit action check is the primary defense.
assert!(!is_read_only_command("find . -execdir rm rf"));
}

// --- Hardening regression tests (#1: workspace path traversal) ---

#[test]
fn workspace_rejects_parent_traversal() {
assert!(!is_within_workspace("/workspace/../etc/passwd", "/workspace"));
assert!(!is_within_workspace(
"/workspace/../../etc/crontab",
"/workspace"
));
assert!(!is_within_workspace("../etc/passwd", "/workspace"));
assert!(!is_within_workspace(
"/workspace/sub/../../outside",
"/workspace"
));
// Legitimate paths still resolve inside.
assert!(is_within_workspace("/workspace/./src/main.rs", "/workspace"));
assert!(is_within_workspace(
"/workspace/src/../src/main.rs",
"/workspace"
));
}

#[test]
fn workspace_write_denies_traversal_escape() {
let enforcer = make_enforcer(PermissionMode::WorkspaceWrite);
let result = enforcer.check_file_write("/workspace/../../etc/crontab", "/workspace");
assert!(matches!(result, EnforcementResult::Denied { .. }));
}

#[test]
fn active_mode_returns_policy_mode() {
// given
Expand Down
34 changes: 34 additions & 0 deletions rust/crates/tools/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2571,6 +2571,20 @@ fn is_within_workspace(path: &str) -> bool {

let path = PathBuf::from(trimmed);

// Reject any parent-directory traversal. Callers never need `..` to refer
// to files inside the workspace, and `..` defeats both checks below: the
// relative branch only inspects the leading component, and the absolute
// branch's `canonicalize()` silently falls back to the literal `..` path
// when the target does not exist yet (e.g. a file about to be created).
// Returning false here is the safe direction: it classifies the command as
// requiring full-access permission rather than workspace-write.
if path
.components()
.any(|component| matches!(component, std::path::Component::ParentDir))
{
return false;
}

// If path is absolute, check if it starts with CWD
if path.is_absolute() {
if let Ok(cwd) = std::env::current_dir() {
Expand All @@ -2588,6 +2602,26 @@ fn run_powershell(input: PowerShellInput) -> Result<String, String> {
to_pretty_json(execute_powershell(input).map_err(|error| error.to_string())?)
}

#[cfg(test)]
mod workspace_traversal_guard_tests {
use super::is_within_workspace;

#[test]
fn rejects_parent_traversal_components() {
// Leading and embedded `..` must both be rejected (was previously a hole
// because only the leading component was inspected).
assert!(!is_within_workspace("../secrets"));
assert!(!is_within_workspace("src/../../etc/passwd"));
assert!(!is_within_workspace("a/b/../../../etc/crontab"));
}

#[test]
fn allows_plain_relative_paths() {
assert!(is_within_workspace("src/main.rs"));
assert!(is_within_workspace("Cargo.toml"));
}
}

fn to_pretty_json<T: serde::Serialize>(value: T) -> Result<String, String> {
serde_json::to_string_pretty(&value).map_err(|error| error.to_string())
}
Expand Down