Skip to content

Commit dfc712d

Browse files
committed
Revert "shim: Fix running rootless on Linux"
This reverts commit c94e788.
1 parent 91b8685 commit dfc712d

3 files changed

Lines changed: 30 additions & 40 deletions

File tree

internal/shim/manager/manager_unix.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"os"
2626
"os/exec"
2727
"path/filepath"
28+
goruntime "runtime"
2829
"strconv"
2930
"strings"
3031
"syscall"
@@ -183,12 +184,17 @@ func (manager) Start(ctx context.Context, id string, opts shim.StartOpts) (_ shi
183184
cmd.ExtraFiles = append(cmd.ExtraFiles, s.f)
184185
}
185186

186-
cloneMntNs(cmd)
187+
goruntime.LockOSThread()
188+
if err := setupMntNs(); err != nil {
189+
return params, err
190+
}
187191

188192
if err := cmd.Start(); err != nil {
189193
return params, err
190194
}
191195

196+
goruntime.UnlockOSThread()
197+
192198
defer func() {
193199
if retErr != nil {
194200
cmd.Process.Kill()

internal/shim/manager/mount_linux.go

Lines changed: 20 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -17,44 +17,28 @@
1717
package manager
1818

1919
import (
20-
"os"
21-
"os/exec"
22-
"syscall"
20+
"fmt"
21+
22+
"golang.org/x/sys/unix"
2323
)
2424

25-
// cloneMntNs configures the child command to start in a new user + mount
26-
// namespace. The user namespace provides mount isolation and grants the
27-
// child capabilities within it, without requiring or granting real host
28-
// capabilities. User namespaces are available unprivileged on many
29-
// distros (since Linux 3.8), but some may gate them via sysctl (e.g.
30-
// kernel.unprivileged_userns_clone).
31-
//
32-
// For a VM-based runtime like nerdbox, the shim does not need real host
33-
// root — it needs /dev/kvm access (checked against mapped host UID) and
34-
// file access (same user). The user namespace is defense-in-depth: it
35-
// limits the shim's host-level capabilities even when the daemon runs as
36-
// root.
37-
//
38-
// We use clone flags instead of unshare(2) because unshare(CLONE_NEWUSER)
39-
// requires the calling process to be single-threaded, which is not
40-
// possible in a Go program (the runtime uses multiple OS threads).
41-
//
42-
// The new mount namespace inherits copies of the parent's mounts with
43-
// the same propagation flags. The shim performs rootfs mounts (overlay /
44-
// bind) inside this namespace. On hosts where / is shared, those mounts
45-
// could in theory propagate back. Because the child also runs in a user
46-
// namespace, it cannot remount / as MS_SLAVE. In practice this is safe:
47-
// the mounts are into bundle-specific paths that are cleaned up on
48-
// container delete, and the VM itself performs all container-visible
49-
// filesystem setup.
50-
func cloneMntNs(cmd *exec.Cmd) {
51-
uid := os.Getuid()
52-
gid := os.Getgid()
53-
cmd.SysProcAttr.Cloneflags |= syscall.CLONE_NEWUSER | syscall.CLONE_NEWNS
54-
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
55-
{ContainerID: uid, HostID: uid, Size: 1},
25+
func setupMntNs() error {
26+
err := unix.Unshare(unix.CLONE_NEWNS)
27+
if err != nil {
28+
return err
5629
}
57-
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
58-
{ContainerID: gid, HostID: gid, Size: 1},
30+
31+
err = unix.Mount("", "/", "", unix.MS_REC|unix.MS_SLAVE, "")
32+
if err != nil {
33+
err = fmt.Errorf("failed to mount with slave: %v", err)
34+
return err
5935
}
36+
37+
err = unix.Mount("", "/", "", unix.MS_REC|unix.MS_SHARED, "")
38+
if err != nil {
39+
err = fmt.Errorf("failed to mount with shared: %v", err)
40+
return err
41+
}
42+
43+
return nil
6044
}

internal/shim/manager/mount_other.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,6 @@
1818

1919
package manager
2020

21-
import "os/exec"
22-
23-
func cloneMntNs(_ *exec.Cmd) {}
21+
func setupMntNs() error {
22+
return nil
23+
}

0 commit comments

Comments
 (0)