From b034f3f9800d6ba8957f5789d6194e63a82c576b Mon Sep 17 00:00:00 2001 From: Shreyansh Sancheti Date: Wed, 20 May 2026 21:17:37 +0530 Subject: [PATCH 1/2] test: add LCOWV2 feature flag and v2 LCOW test surface Introduce a `-feature LCOWV2` gate across the functional and cri-containerd test suites so the v2 LCOW controller can be exercised end-to-end without disturbing the existing v1 LCOW pipeline. Functional suite: * LCOWV2 implies LCOW in TestMain so featureLCOW-gated tests are reachable, then defaultLCOWOptions calls requireV1Only to short- circuit every v1 path cleanly. Net effect: only TestLCOW_V2_* runs. * Add helpers_v2_test.go and lcow_v2_test.go covering the v2 surface via internal/builder/vm/lcow + the v2 controller in-process. * Export LCOWBootFilesPath from test/pkg/uvm so v2 tests can resolve boot files without going through v1 *uvm.OptionsLCOW. cri-containerd suite: * Mirror the LCOWV2-implies-LCOW pattern; thread RuntimeHandler onto the CRI ImageSpec when pulling LCOW images so containerd selects the windows-lcow snapshotter and linux/amd64 platform (the sandbox- platform label alone is not honored by containerd >=2.0). * Add lcow_v2_test.go and the runhcs-lcow-v2 runtime handler constant. Flag plumbing: * Add IncludesExplicit and Include to IncludeExcludeStringSet so test TestMain hooks can implement feature implications safely after flag.Parse without breaking default-when-unset semantics. CI: * New `Build and run functional testing binary (LCOWV2)` step that invokes functional.test.exe -feature=LCOWV2. continue-on-error while the v2 surface is being grown. CRI v2 testing is intentionally deferred to a follow-up alongside the integration-tests v2 setup. * Build and upload containerd-shim-lcow-v2.exe as a test artifact. Signed-off-by: Shreyansh Sancheti --- .github/workflows/ci.yml | 51 +++ .../container_layers_packing_test.go | 3 + test/cri-containerd/disable_vpmem_test.go | 1 + test/cri-containerd/helper_sandbox_test.go | 36 ++ test/cri-containerd/lcow_v2_test.go | 104 +++++ test/cri-containerd/main_test.go | 30 +- test/cri-containerd/unmap_vpmem_test.go | 1 + test/functional/helpers_v2_test.go | 25 ++ test/functional/lcow_v2_test.go | 397 ++++++++++++++++++ test/functional/main_test.go | 18 + test/pkg/flag/flag.go | 22 + test/pkg/uvm/lcow.go | 11 + 12 files changed, 697 insertions(+), 2 deletions(-) create mode 100644 test/cri-containerd/lcow_v2_test.go create mode 100644 test/functional/helpers_v2_test.go create mode 100644 test/functional/lcow_v2_test.go diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c880d3123f..4796a52c79 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -383,6 +383,56 @@ jobs: exit $ec working-directory: test + # Run the v2 LCOW functional tests against the v2 vm.Controller + # end-to-end. The chokepoint in defaultLCOWOptions auto-skips every + # v1 LCOW test under -feature LCOWV2, so this invocation effectively + # runs only the TestLCOW_V2_* surface (and any future v2-aware tests). + # Marked continue-on-error: true while this surface is being grown so + # a v2-specific failure does not block the v1 pipeline. + # + # NOTE: Functional tests build HCS documents directly via + # internal/builder/vm/lcow and create v2 controllers in-process — they + # do NOT exercise the CRI -> containerd -> shim path. CRI v2 testing + # (Test_V2_LCOW_* in test/cri-containerd) requires a separate CI step + # that starts containerd with `snapshotter = "windows-lcow"` set on + # both `runhcs-lcow` AND `runhcs-lcow-v2` runtime blocks (the mapping + # logic lives in the containerd CRI plugin). That step is intentionally + # deferred to a follow-up PR alongside the integration-tests v2 setup. + - name: Build and run functional testing binary (LCOWV2) + continue-on-error: true + run: | + if ( -not (Test-Path './functional.test.exe') ) { + Write-Output '::warning::functional.test.exe missing; skipping LCOWV2 run' + exit 0 + } + + $gotestsum = Get-Command -Name 'gotestsum' -CommandType Application -ErrorAction 'Stop' | + Select-Object -First 1 -ExpandProperty Source + $go = Get-Command -Name 'go' -CommandType Application -ErrorAction Stop | + Select-Object -First 1 -ExpandProperty Source + + # LCOWV2 implies LCOW in TestMain so featureLCOW-gated tests would + # also be reachable, but defaultLCOWOptions calls requireV1Only so + # they skip cleanly. Net effect: only TestLCOW_V2_* tests actually + # execute, exercising the v2 controller end-to-end. + $cmd = '${{ env.GOTESTSUM_CMD_RAW }} ./functional.test.exe -feature=LCOWV2 -exclude=LCOWIntegrity -test.timeout=1h -test.v -log-level=info' + $cmd = $cmd -replace '\bgo\b', $go + $cmd = $cmd -replace '\bgotestsum\b', $gotestsum + Write-Host "gotestsum command: $cmd" + + psexec -nobanner -w (Get-Location) -s cmd /c "$cmd > v2-out.txt 2>&1" + $ec = $LASTEXITCODE + + Get-Content v2-out.txt + + exit $ec + working-directory: test + + # Build the v2 LCOW shim binary so it is included in test_binaries + # artifact uploads and available to anyone reproducing v2 test runs. + - name: Build containerd-shim-lcow-v2 binary + run: ${{ env.GO_BUILD_CMD }} -tags lcow -o test/containerd-shim-lcow-v2.exe ./cmd/containerd-shim-lcow-v2 + # build testing binaries - name: Build cri-containerd Testing Binary run: ${{ env.GO_BUILD_TEST_CMD }} ./cri-containerd @@ -400,6 +450,7 @@ jobs: name: test_binaries_${{ matrix.name }} path: | test/containerd-shim-runhcs-v1.test.exe + test/containerd-shim-lcow-v2.exe test/cri-containerd.test.exe test/functional.test.exe test/runhcs.test.exe diff --git a/test/cri-containerd/container_layers_packing_test.go b/test/cri-containerd/container_layers_packing_test.go index ace5872e24..9728d2fa28 100644 --- a/test/cri-containerd/container_layers_packing_test.go +++ b/test/cri-containerd/container_layers_packing_test.go @@ -46,6 +46,7 @@ func Test_Container_Layer_Packing_On_VPMem(t *testing.T) { defer cancel() requireFeatures(t, featureLCOW) + requireV1Only(t) // use ubuntu to make sure that multiple container layers will be mapped properly pullRequiredLCOWImages(t, []string{imageLcowK8sPause, ubuntu1804}) @@ -101,6 +102,7 @@ func Test_Many_Container_Layers_Supported_On_VPMem(t *testing.T) { defer cancel() requireFeatures(t, featureLCOW) + requireV1Only(t) pullRequiredLCOWImages(t, []string{imageLcowK8sPause, alpine70ExtraLayers, ubuntu70ExtraLayers}) @@ -132,6 +134,7 @@ func Test_Annotation_Disable_Multi_Mapping(t *testing.T) { defer cancel() requireFeatures(t, featureLCOW) + requireV1Only(t) pullRequiredLCOWImages(t, []string{imageLcowK8sPause, alpine70ExtraLayers}) diff --git a/test/cri-containerd/disable_vpmem_test.go b/test/cri-containerd/disable_vpmem_test.go index 4c5104f46c..be6c22aaaa 100644 --- a/test/cri-containerd/disable_vpmem_test.go +++ b/test/cri-containerd/disable_vpmem_test.go @@ -28,6 +28,7 @@ func uniqueRef() string { func Test_70LayerImagesWithNoVPmemForLayers(t *testing.T) { requireFeatures(t, featureLCOW) + requireV1Only(t) ubuntu70Image := "cplatpublic.azurecr.io/ubuntu70extra:18.04" alpine70Image := "cplatpublic.azurecr.io/alpine70extra:latest" diff --git a/test/cri-containerd/helper_sandbox_test.go b/test/cri-containerd/helper_sandbox_test.go index 984b53d522..67aa02ad90 100644 --- a/test/cri-containerd/helper_sandbox_test.go +++ b/test/cri-containerd/helper_sandbox_test.go @@ -10,6 +10,42 @@ import ( runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ) +// isLCOWV2 reports whether the LCOWV2 feature flag is set on the current test +// invocation. Callers should prefer the higher-level helpers below +// (lcowRuntimeHandlerForTest, requireV1Only) so the V2 selection logic stays +// in one place. +func isLCOWV2() bool { + return flagFeatures.IsSet(featureLCOWV2) +} + +// lcowRuntimeHandlerForTest returns the LCOW runtime handler that the current +// test should target. When the LCOWV2 feature flag is set, it returns the V2 +// shim handler (containerd-shim-lcow-v2.exe via runtime_type +// io.containerd.lcow.v2). Otherwise it returns the V1 handler +// (containerd-shim-runhcs-v1.exe via runtime_type io.containerd.runhcs.v1). +// +// Tests that exercise generic LCOW lifecycle and work on both shims should use +// this helper instead of hard-coding lcowRuntimeHandler, so the same suite can +// be run twice in CI: once for V1 (default) and once with -feature LCOWV2 for V2. +// Mirrors the pattern in the azcri repo. +func lcowRuntimeHandlerForTest(tb testing.TB) string { + tb.Helper() + if isLCOWV2() { + return lcowV2RuntimeHandler + } + return lcowRuntimeHandler +} + +// requireV1Only skips the test when the LCOWV2 feature flag is set. +// Use this for tests that depend on V1-only features such as VPMEM, +// VHD/initrd boot modes, or other UVM knobs not exposed in the v2 builder. +func requireV1Only(tb testing.TB) { + tb.Helper() + if isLCOWV2() { + tb.Skip("test requires V1 shim features (VPMEM/VHD/initrd) not exposed in V2") + } +} + type SandboxConfigOpt func(*runtime.PodSandboxConfig) error func WithSandboxAnnotations(annotations map[string]string) SandboxConfigOpt { diff --git a/test/cri-containerd/lcow_v2_test.go b/test/cri-containerd/lcow_v2_test.go new file mode 100644 index 0000000000..0018a29820 --- /dev/null +++ b/test/cri-containerd/lcow_v2_test.go @@ -0,0 +1,104 @@ +//go:build windows && functional +// +build windows,functional + +// V2 LCOW-specific CRI tests. These mirror the Test_V2Sandbox_* pattern +// established in the azcri test suite: each test gates on featureLCOWV2 and +// targets lcowV2RuntimeHandler directly (no v1 fallback), because the +// scenarios under test exercise the V2 runtime path. +// +// To run these tests: +// 1. The CI containerd config must register `runhcs-lcow-v2` → +// `containerd-shim-lcow-v2.exe` (runtime_type io.containerd.lcow.v2). +// 2. The test binary must be invoked with -feature=LCOWV2. + +package cri_containerd + +import ( + "context" + "testing" + + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +// Test_V2_LCOW_PodLifecycle exercises the basic pod-sandbox lifecycle through +// the V2 shim: RunPodSandbox → StopPodSandbox → RemovePodSandbox. This is the +// minimum end-to-end smoke test that proves containerd → shim handshake works +// on the V2 path. +func Test_V2_LCOW_PodLifecycle(t *testing.T) { + requireFeatures(t, featureLCOWV2) + + pullRequiredLCOWImages(t, []string{imageLcowK8sPause}) + + client := newTestRuntimeClient(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + podReq := getRunPodSandboxRequest(t, lcowV2RuntimeHandler) + podID := runPodSandbox(t, client, ctx, podReq) + defer removePodSandbox(t, client, ctx, podID) + defer stopPodSandbox(t, client, ctx, podID) +} + +// Test_V2_LCOW_ContainerLifecycle exercises a full container lifecycle inside +// a V2 sandbox: RunPodSandbox → CreateContainer → StartContainer → +// StopContainer → RemoveContainer → StopPodSandbox → RemovePodSandbox. +func Test_V2_LCOW_ContainerLifecycle(t *testing.T) { + requireFeatures(t, featureLCOWV2) + + pullRequiredLCOWImages(t, []string{imageLcowK8sPause, imageLcowAlpine}) + + client := newTestRuntimeClient(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + podReq := getRunPodSandboxRequest(t, lcowV2RuntimeHandler) + podID := runPodSandbox(t, client, ctx, podReq) + defer removePodSandbox(t, client, ctx, podID) + defer stopPodSandbox(t, client, ctx, podID) + + cReq := getCreateContainerRequest(podID, "alpine", imageLcowAlpine, + []string{"echo", "hello"}, podReq.Config) + containerID := createContainer(t, client, ctx, cReq) + defer removeContainer(t, client, ctx, containerID) + + startContainer(t, client, ctx, containerID) + stopContainer(t, client, ctx, containerID) +} + +// Test_V2_LCOW_ContainerExec runs a workload container and verifies that +// ExecSync into it succeeds with the expected exit code. Validates the GCS +// exec path through the V2 controller. +func Test_V2_LCOW_ContainerExec(t *testing.T) { + requireFeatures(t, featureLCOWV2) + + pullRequiredLCOWImages(t, []string{imageLcowK8sPause, imageLcowAlpine}) + + client := newTestRuntimeClient(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + podReq := getRunPodSandboxRequest(t, lcowV2RuntimeHandler) + podID := runPodSandbox(t, client, ctx, podReq) + defer removePodSandbox(t, client, ctx, podID) + defer stopPodSandbox(t, client, ctx, podID) + + cReq := getCreateContainerRequest(podID, "alpine", imageLcowAlpine, + []string{"top"}, podReq.Config) + containerID := createContainer(t, client, ctx, cReq) + defer removeContainer(t, client, ctx, containerID) + + startContainer(t, client, ctx, containerID) + defer stopContainer(t, client, ctx, containerID) + + execResp, err := client.ExecSync(ctx, &runtime.ExecSyncRequest{ + ContainerId: containerID, + Cmd: []string{"echo", "hello"}, + Timeout: 20, + }) + if err != nil { + t.Fatalf("ExecSync failed: %v", err) + } + if execResp.ExitCode != 0 { + t.Fatalf("ExecSync returned exit code %d, stderr: %s", execResp.ExitCode, string(execResp.Stderr)) + } +} diff --git a/test/cri-containerd/main_test.go b/test/cri-containerd/main_test.go index fdbd79a8c6..13b379be1f 100644 --- a/test/cri-containerd/main_test.go +++ b/test/cri-containerd/main_test.go @@ -29,6 +29,7 @@ const ( // TODO: remove lcow when shim only tests are relocated lcowRuntimeHandler = "runhcs-lcow" + lcowV2RuntimeHandler = "runhcs-lcow-v2" wcowProcessRuntimeHandler = "runhcs-wcow-process" wcowHypervisorRuntimeHandler = "runhcs-wcow-hypervisor" wcowHypervisor17763RuntimeHandler = "runhcs-wcow-hypervisor-17763" @@ -98,6 +99,7 @@ var ( // Make sure you update allFeatures below with any new features you add. const ( featureLCOW = "LCOW" + featureLCOWV2 = "LCOWV2" featureWCOWProcess = "WCOWProcess" featureWCOWHypervisor = "WCOWHypervisor" featureHostProcess = "HostProcess" @@ -109,6 +111,7 @@ const ( var allFeatures = []string{ featureLCOW, + featureLCOWV2, featureWCOWProcess, featureWCOWHypervisor, featureHostProcess, @@ -120,6 +123,15 @@ var allFeatures = []string{ func TestMain(m *testing.M) { flag.Parse() + // LCOWV2 implies LCOW: the v2 shim IS an LCOW runtime, so a run gated + // only on `-feature LCOWV2` should still execute tests that gate on + // `featureLCOW`. Without this implication, the same test suite would + // need parallel `-feature LCOW` and `-feature LCOWV2` invocations OR + // every LCOW-gated test would have to be rewritten to accept either + // flag. Mirrors the pattern established in the azcri test suite. + if flagFeatures.IncludesExplicit() && flagFeatures.IsSet(featureLCOWV2) { + flagFeatures.Include(featureLCOW) + } os.Exit(m.Run()) } @@ -205,10 +217,23 @@ func pullRequiredLCOWImages(tb testing.TB, images []string, opts ...SandboxConfi opts = append(opts, WithSandboxLabels(map[string]string{ "sandbox-platform": "linux/amd64", })) - pullRequiredImagesWithOptions(tb, images, opts...) + // Set RuntimeHandler on ImageSpec so containerd CRI picks the LCOW + // runtime's configured snapshotter (windows-lcow) and platform + // (linux/amd64) rather than the default windows/amd64. The sandbox- + // platform label alone is not honored by modern containerd (≥2.0). + pullRequiredImagesWithRuntime(tb, images, lcowRuntimeHandlerForTest(tb), opts...) } func pullRequiredImagesWithOptions(tb testing.TB, images []string, opts ...SandboxConfigOpt) { + tb.Helper() + pullRequiredImagesWithRuntime(tb, images, "", opts...) +} + +// pullRequiredImagesWithRuntime pulls each image with the given runtime +// handler set on the CRI ImageSpec. Empty runtimeHandler means use the +// containerd default. Tests pulling LCOW images should pass the LCOW handler +// so containerd selects the windows-lcow snapshotter and linux/amd64 platform. +func pullRequiredImagesWithRuntime(tb testing.TB, images []string, runtimeHandler string, opts ...SandboxConfigOpt) { tb.Helper() if len(images) < 1 { return @@ -228,7 +253,8 @@ func pullRequiredImagesWithOptions(tb testing.TB, images []string, opts ...Sandb for _, image := range images { _, err := client.PullImage(ctx, &runtime.PullImageRequest{ Image: &runtime.ImageSpec{ - Image: image, + Image: image, + RuntimeHandler: runtimeHandler, }, SandboxConfig: sb, }) diff --git a/test/cri-containerd/unmap_vpmem_test.go b/test/cri-containerd/unmap_vpmem_test.go index af850325dd..15f96b3643 100644 --- a/test/cri-containerd/unmap_vpmem_test.go +++ b/test/cri-containerd/unmap_vpmem_test.go @@ -28,6 +28,7 @@ const ( // leave it intact and recycle, the bug no longer surfaces. func Test_Force_LayerUnmap_Not_In_Order(t *testing.T) { requireFeatures(t, featureLCOW) + requireV1Only(t) pullRequiredLCOWImages(t, []string{imageLcowK8sPause, ubuntu2004LargeLayers, ubuntu2204LargeLayers}) diff --git a/test/functional/helpers_v2_test.go b/test/functional/helpers_v2_test.go new file mode 100644 index 0000000000..8ba55ecafb --- /dev/null +++ b/test/functional/helpers_v2_test.go @@ -0,0 +1,25 @@ +//go:build windows && functional + +package functional + +import "testing" + +// isLCOWV2 reports whether the LCOWV2 feature flag is set on the current test +// invocation. Callers should prefer the higher-level helper requireV1Only so +// the V2 selection logic stays in one place. +func isLCOWV2() bool { + return flagFeatures.IsSet(featureLCOWV2) +} + +// requireV1Only skips the test when the LCOWV2 feature flag is set. Use this +// for tests that depend on V1-only features such as VPMEM, VHD/initrd boot +// modes, KernelDirect, or other UVM knobs not exposed in the v2 builder. +// +// Mirrors the pattern established in the azcri repo and in the CRI test suite +// in test/cri-containerd/. +func requireV1Only(tb testing.TB) { + tb.Helper() + if isLCOWV2() { + tb.Skip("test requires V1 shim features (VPMEM/VHD/initrd/KernelDirect) not exposed in V2") + } +} diff --git a/test/functional/lcow_v2_test.go b/test/functional/lcow_v2_test.go new file mode 100644 index 0000000000..8d97b43080 --- /dev/null +++ b/test/functional/lcow_v2_test.go @@ -0,0 +1,397 @@ +//go:build windows && functional + +// V2 LCOW utility VM functional tests. +// +// These tests exercise the v2 vm.Controller lifecycle end-to-end against a +// real Hyper-V guest, validating every method on the v2 Controller interface +// that does not require a container or device sub-controller. They mirror the +// scope of the v1 TestLCOW_UVM_* tests, retargeted to the v2 builder + v2 +// controller API: +// +// v1 | v2 equivalent here +// ---------------------------------+----------------------------------------- +// TestLCOW_UVM_Boot | TestLCOW_V2_UVM_BootIterations +// TestLCOW_UVM_KernelArgs | TestLCOW_V2_UVM_KernelArgs +// testuvm.CreateAndStartLCOW... | TestLCOW_V2_UVM_Lifecycle +// TestPropertiesGuestConnection... | TestLCOW_V2_UVM_Stats +// (diagnostic helper) | TestLCOW_V2_UVM_DumpStacks +// (no v1 equivalent) | TestLCOW_V2_UVM_ExecIntoHost +// (no v1 equivalent) | TestLCOW_V2_UVM_ConcurrentExec +// (no v1 equivalent) | TestLCOW_V2_UVM_StartIdempotent +// (no v1 equivalent) | TestLCOW_V2_UVM_TerminateIdempotent +// (no v1 equivalent) | TestLCOW_V2_UVM_TerminateFromCreated +// +// Container, device, and update tests are intentionally NOT covered here: +// vm.Controller exposes no container/device API and no Modify path; those +// live in cmd/containerd-shim-lcow-v2/service/ and are out of scope until +// internal/controller/container/ exists. Existing v1 LCOW container tests +// stay v1-only via the chokepoint in defaultLCOWOptions. +// +// VM lifecycle tests run serially — parallel Hyper-V VM creation strains +// nested-virt CI runners. + +package functional + +import ( + "bytes" + "context" + "fmt" + "strings" + "sync" + "testing" + "time" + + "github.com/Microsoft/go-winio" + + runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + lcowbuilder "github.com/Microsoft/hcsshim/internal/builder/vm/lcow" + hcscmd "github.com/Microsoft/hcsshim/internal/cmd" + "github.com/Microsoft/hcsshim/internal/controller/vm" + "github.com/Microsoft/hcsshim/internal/gcs/prot" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/shimdiag" + "github.com/Microsoft/hcsshim/osversion" + vmspec "github.com/Microsoft/hcsshim/sandbox-spec/vm/v2" + + "github.com/Microsoft/hcsshim/test/internal/util" + "github.com/Microsoft/hcsshim/test/pkg/require" + testuvm "github.com/Microsoft/hcsshim/test/pkg/uvm" +) + +// buildLCOWV2Document resolves boot files (auto-detect first, with +// -linux-bootfiles flag override mirroring defaultLCOWOptions) and returns a +// minimal non-confidential v2 LCOW HCS ComputeSystem document plus the OCI +// bundle path used to build it. Tests skip if no boot files can be found. +func buildLCOWV2Document(t *testing.T) (*hcsschema.ComputeSystem, string) { + t.Helper() + + bootFiles, err := testuvm.LCOWBootFilesPath() + if err != nil { + t.Logf("LCOWBootFilesPath: %v", err) + } + if p := *flagLinuxBootFilesPath; p != "" { + bootFiles = p + } + if bootFiles == "" { + t.Skip("no LCOW boot files found; set -linux-bootfiles or install ContainerPlat") + } + + ctx := util.Context(context.Background(), t) + shimOpts := &runhcsopts.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: bootFiles, + } + bundle := t.TempDir() + + // Empty vmspec.Spec selects the non-confidential code path in BuildSandboxConfig. + doc, _, err := lcowbuilder.BuildSandboxConfig(ctx, hcsOwner, bundle, shimOpts, &vmspec.Spec{}) + if err != nil { + t.Fatalf("BuildSandboxConfig: %v", err) + } + return doc, bundle +} + +// cleanupV2Controller registers a deferred TerminateVM+Wait with independent +// 30-second budgets. The compute-system sweep in TestMain is the ultimate +// backstop if both time out. +func cleanupV2Controller(t *testing.T, ctrl vm.Controller) { + t.Helper() + t.Cleanup(func() { + termCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + if err := ctrl.TerminateVM(termCtx); err != nil { + t.Logf("TerminateVM failed (sweeper will reap): %v", err) + } + waitCtx, cancel2 := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel2() + if err := ctrl.Wait(waitCtx); err != nil { + t.Logf("Wait after Terminate failed: %v", err) + } + }) +} + +// createAndStartV2 is the common Create + Start dance used by every v2 test +// here. Returns the controller after StartVM succeeds; caller is responsible +// for cleanup (use cleanupV2Controller or explicit TerminateVM). +func createAndStartV2(t *testing.T, ctx context.Context) vm.Controller { + t.Helper() + doc, _ := buildLCOWV2Document(t) + ctrl := vm.NewController() + if err := ctrl.CreateVM(ctx, &vm.CreateOptions{ID: testName(t) + "@vm", HCSDocument: doc}); err != nil { + t.Fatalf("CreateVM: %v", err) + } + if err := ctrl.StartVM(ctx, &vm.StartOptions{ + GCSServiceID: winio.VsockServiceID(prot.LinuxGcsVsockPort), + }); err != nil { + t.Fatalf("StartVM: %v", err) + } + return ctrl +} + +// requireLCOWV2Uvm gates v2 controller tests on both the LCOWV2 feature flag +// and the uVM feature flag (nested-virt is mandatory for v2 controller tests). +func requireLCOWV2Uvm(t *testing.T) { + t.Helper() + require.Build(t, osversion.RS5) + requireFeatures(t, featureLCOWV2, featureUVM) +} + +// execInUVM runs args in the guest UVM with named-pipe stdio and returns +// (exitCode, stdout, stderr). The cmd package wires CreateStd{Out,Err}Pipe +// only when stdout/stderr paths are non-empty (see internal/cmd/cmd.go); we +// always provide pipes so processes that write to stdio don't get EPIPE. +func execInUVM(t *testing.T, ctx context.Context, ctrl vm.Controller, args []string) (int, string, string) { + t.Helper() + var stdout, stderr bytes.Buffer + stdoutPath, err := hcscmd.CreatePipeAndListen(&stdout, false) + if err != nil { + t.Fatalf("CreatePipeAndListen(stdout): %v", err) + } + stderrPath, err := hcscmd.CreatePipeAndListen(&stderr, false) + if err != nil { + t.Fatalf("CreatePipeAndListen(stderr): %v", err) + } + exitCode, err := ctrl.ExecIntoHost(ctx, &shimdiag.ExecProcessRequest{ + Args: args, + Stdout: stdoutPath, + Stderr: stderrPath, + }) + if err != nil { + t.Fatalf("ExecIntoHost(%v): %v (stderr=%q)", args, err, stderr.String()) + } + return exitCode, stdout.String(), stderr.String() +} + +// TestLCOW_V2_UVM_Lifecycle validates the Create → Start state transitions of +// the v2 vm.Controller against a real Hyper-V guest. +func TestLCOW_V2_UVM_Lifecycle(t *testing.T) { + requireLCOWV2Uvm(t) + + ctx := util.Context(context.Background(), t) + ctrl := createAndStartV2(t, ctx) + cleanupV2Controller(t, ctrl) + + if got := ctrl.State(); got != vm.StateRunning { + t.Fatalf("expected StateRunning, got %s", got) + } + if ctrl.StartTime().IsZero() { + t.Fatalf("expected non-zero StartTime") + } +} + +// TestLCOW_V2_UVM_BootIterations validates repeated boot+terminate cycles +// work — proves the v2 controller path can be re-used across multiple VM +// creations without leaking handles, mirroring the v1 TestLCOW_UVM_Boot +// iteration loop. +func TestLCOW_V2_UVM_BootIterations(t *testing.T) { + requireLCOWV2Uvm(t) + + const iterations = 3 + for i := 0; i < iterations; i++ { + ctx := util.Context(context.Background(), t) + ctrl := createAndStartV2(t, ctx) + if got := ctrl.State(); got != vm.StateRunning { + t.Fatalf("iter %d: expected StateRunning, got %s", i, got) + } + termCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + err := ctrl.TerminateVM(termCtx) + cancel() + if err != nil { + t.Fatalf("iter %d: TerminateVM: %v", i, err) + } + waitCtx, cancelW := context.WithTimeout(context.Background(), 30*time.Second) + if werr := ctrl.Wait(waitCtx); werr != nil { + t.Logf("iter %d: Wait after Terminate: %v", i, werr) + } + cancelW() + } +} + +// TestLCOW_V2_UVM_ExecIntoHost runs a command in the guest UVM via the GCS +// connection. Despite the method name, ExecIntoHost forwards to +// guest.ExecIntoUVM — execution happens inside the Linux guest rootfs, not on +// the Windows host. +func TestLCOW_V2_UVM_ExecIntoHost(t *testing.T) { + requireLCOWV2Uvm(t) + + ctx := util.Context(context.Background(), t) + ctrl := createAndStartV2(t, ctx) + cleanupV2Controller(t, ctrl) + + const want = "hello" + exitCode, stdout, stderr := execInUVM(t, ctx, ctrl, []string{"echo", want}) + if exitCode != 0 { + t.Fatalf("expected exit code 0, got %d (stderr=%q)", exitCode, stderr) + } + if got := strings.TrimSpace(stdout); got != want { + t.Fatalf("expected stdout %q, got %q (stderr=%q)", want, got, stderr) + } +} + +// TestLCOW_V2_UVM_KernelArgs reads /proc/cmdline from the guest and asserts +// the kernel args produced by the v2 builder match expectations — parity with +// the v1 TestLCOW_UVM_KernelArgs validation, retargeted to the v2 path. +func TestLCOW_V2_UVM_KernelArgs(t *testing.T) { + requireLCOWV2Uvm(t) + + ctx := util.Context(context.Background(), t) + ctrl := createAndStartV2(t, ctx) + cleanupV2Controller(t, ctrl) + + exitCode, stdout, stderr := execInUVM(t, ctx, ctrl, []string{"cat", "/proc/cmdline"}) + if exitCode != 0 { + t.Fatalf("cat /proc/cmdline exited %d (stderr=%q)", exitCode, stderr) + } + want := []string{ + "8250_core.nr_uarts=0", + "panic=-1", + "quiet", + "pci=off", + "init=/init", + } + for _, w := range want { + if !strings.Contains(stdout, w) { + t.Errorf("kernel cmdline missing %q (got: %s)", w, stdout) + } + } +} + +// TestLCOW_V2_UVM_Stats queries memory + CPU statistics from the running VM. +// Validates the host-side stats collection path in vm.Controller.Stats. +func TestLCOW_V2_UVM_Stats(t *testing.T) { + requireLCOWV2Uvm(t) + + ctx := util.Context(context.Background(), t) + ctrl := createAndStartV2(t, ctx) + cleanupV2Controller(t, ctrl) + + s, err := ctrl.Stats(ctx) + if err != nil { + t.Fatalf("Stats: %v", err) + } + if s == nil { + t.Fatalf("Stats returned nil response") + } + if s.Memory == nil { + t.Fatalf("Stats.Memory is nil") + } +} + +// TestLCOW_V2_UVM_DumpStacks asks the guest to emit goroutine stacks via the +// GCS DumpStacks RPC. Validates the diagnostic path. +func TestLCOW_V2_UVM_DumpStacks(t *testing.T) { + requireLCOWV2Uvm(t) + + ctx := util.Context(context.Background(), t) + ctrl := createAndStartV2(t, ctx) + cleanupV2Controller(t, ctrl) + + stacks, err := ctrl.DumpStacks(ctx) + if err != nil { + t.Fatalf("DumpStacks: %v", err) + } + // Empty stacks string is valid if the guest reports capability=false; + // the call itself succeeding is the assertion. Log size for diagnostics. + t.Logf("DumpStacks returned %d bytes", len(stacks)) +} + +// TestLCOW_V2_UVM_StartIdempotent validates that calling StartVM twice on the +// same controller is a no-op (matches the doc on Manager.StartVM). Containerd +// retries depend on this. +func TestLCOW_V2_UVM_StartIdempotent(t *testing.T) { + requireLCOWV2Uvm(t) + + ctx := util.Context(context.Background(), t) + ctrl := createAndStartV2(t, ctx) + cleanupV2Controller(t, ctrl) + + if err := ctrl.StartVM(ctx, &vm.StartOptions{ + GCSServiceID: winio.VsockServiceID(prot.LinuxGcsVsockPort), + }); err != nil { + t.Fatalf("second StartVM (should be no-op): %v", err) + } + if got := ctrl.State(); got != vm.StateRunning { + t.Fatalf("expected StateRunning after second StartVM, got %s", got) + } +} + +// TestLCOW_V2_UVM_TerminateIdempotent verifies that calling TerminateVM twice +// is safe. containerd retries StopPodSandbox / Delete, so the second call must +// be a no-op rather than an error. +func TestLCOW_V2_UVM_TerminateIdempotent(t *testing.T) { + requireLCOWV2Uvm(t) + + ctx := util.Context(context.Background(), t) + ctrl := createAndStartV2(t, ctx) + // No cleanupV2Controller — this test owns termination explicitly. + + if err := ctrl.TerminateVM(ctx); err != nil { + t.Fatalf("first TerminateVM: %v", err) + } + if err := ctrl.TerminateVM(ctx); err != nil { + t.Fatalf("second TerminateVM (should be no-op): %v", err) + } + if got := ctrl.State(); got != vm.StateTerminated { + t.Fatalf("expected StateTerminated, got %s", got) + } + waitCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + if err := ctrl.Wait(waitCtx); err != nil { + t.Logf("Wait after Terminate: %v", err) + } +} + +// TestLCOW_V2_UVM_TerminateFromCreated validates that TerminateVM works when +// called on a Created (not yet Started) controller — covers the failed-pod- +// create cleanup path where Start never ran. +func TestLCOW_V2_UVM_TerminateFromCreated(t *testing.T) { + requireLCOWV2Uvm(t) + + ctx := util.Context(context.Background(), t) + doc, _ := buildLCOWV2Document(t) + ctrl := vm.NewController() + + if err := ctrl.CreateVM(ctx, &vm.CreateOptions{ID: testName(t) + "@vm", HCSDocument: doc}); err != nil { + t.Fatalf("CreateVM: %v", err) + } + if got := ctrl.State(); got != vm.StateCreated { + t.Fatalf("expected StateCreated, got %s", got) + } + if err := ctrl.TerminateVM(ctx); err != nil { + t.Fatalf("TerminateVM from Created: %v", err) + } + if got := ctrl.State(); got != vm.StateTerminated { + t.Fatalf("expected StateTerminated, got %s", got) + } +} + +// TestLCOW_V2_UVM_ConcurrentExec runs multiple ExecIntoHost calls concurrently +// to exercise activeExecCount and the GCS bridge under parallel load. All +// invocations should succeed with exit code 0. +func TestLCOW_V2_UVM_ConcurrentExec(t *testing.T) { + requireLCOWV2Uvm(t) + + ctx := util.Context(context.Background(), t) + ctrl := createAndStartV2(t, ctx) + cleanupV2Controller(t, ctrl) + + const n = 5 + var wg sync.WaitGroup + errs := make([]error, n) + for i := 0; i < n; i++ { + wg.Add(1) + go func(idx int) { + defer wg.Done() + ec, _, stderr := execInUVM(t, ctx, ctrl, []string{"echo", "concurrent"}) + if ec != 0 { + errs[idx] = fmt.Errorf("concurrent exec idx=%d exit=%d stderr=%q", idx, ec, strings.TrimSpace(stderr)) + } + }(i) + } + wg.Wait() + for _, e := range errs { + if e != nil { + t.Errorf("%v", e) + } + } +} diff --git a/test/functional/main_test.go b/test/functional/main_test.go index 18bd6a72f4..78720b9d78 100644 --- a/test/functional/main_test.go +++ b/test/functional/main_test.go @@ -92,6 +92,7 @@ const ( // container and uVM types. featureLCOW = "LCOW" // Linux containers or uVM tests; requires [featureUVM] + featureLCOWV2 = "LCOWV2" // v2 LCOW controller tests; requires [featureUVM] featureLCOWIntegrity = "LCOWIntegrity" // Linux confidential/policy tests featureWCOW = "WCOW" // Windows containers or uVM tests featureUVM = "uVM" // tests that create a utility VM @@ -109,6 +110,7 @@ const ( var allFeatures = []string{ featureLCOW, + featureLCOWV2, featureLCOWIntegrity, featureWCOW, featureUVM, @@ -142,6 +144,14 @@ var ( func TestMain(m *testing.M) { flag.Parse() + // LCOWV2 implies LCOW: the v2 shim IS an LCOW runtime, so a run gated + // only on `-feature LCOWV2` should still execute tests that gate on + // `featureLCOW`. Mirrors the pattern in the CRI test suite and the + // azcri repo. + if flagFeatures.IncludesExplicit() && flagFeatures.IsSet(featureLCOWV2) { + flagFeatures.Include(featureLCOW) + } + if err := runTests(m); err != nil { fmt.Fprintln(os.Stderr, err) @@ -279,6 +289,14 @@ func requireAnyFeature(tb testing.TB, features ...string) { func defaultLCOWOptions(ctx context.Context, tb testing.TB) *uvm.OptionsLCOW { tb.Helper() + // Every caller of defaultLCOWOptions is on the v1 path: the returned + // *uvm.OptionsLCOW only feeds testuvm.CreateLCOW / CreateAndStartLCOWFromOpts + // which build a *uvm.UtilityVM (v1 controller). The v2 controller does not + // consume *uvm.OptionsLCOW. Skip here so the whole v1 LCOW test surface + // short-circuits cleanly under `-feature LCOWV2`. Individual tests do not + // need to repeat requireV1Only. + requireV1Only(tb) + opts := testuvm.DefaultLCOWOptions(ctx, tb, testName(tb), hcsOwner) if p := *flagLinuxBootFilesPath; p != "" { opts.UpdateBootFilesPath(ctx, p) diff --git a/test/pkg/flag/flag.go b/test/pkg/flag/flag.go index 19cc0a8e68..377b8e3ad5 100644 --- a/test/pkg/flag/flag.go +++ b/test/pkg/flag/flag.go @@ -92,6 +92,28 @@ func (es *IncludeExcludeStringSet) IsSet(s string) bool { return false } +// IncludesExplicit reports whether the include flag (e.g. -feature) was used +// explicitly on the command line. When false, [Strings] resolves from the +// defaults and IsSet is permissive for any default value not excluded. +// +// This is useful for callers that need to know whether to apply an implication +// rule (e.g. "feature A implies feature B") without breaking the default-when- +// unset semantics. +func (es *IncludeExcludeStringSet) IncludesExplicit() bool { + return es.inc != nil && es.inc.Len() > 0 +} + +// Include adds s to the include set, mirroring the effect of passing - s +// on the command line. Intended for tests that need one feature flag to imply +// another after [flag.Parse] has returned. Safe to call when the include flag +// is already explicitly set; ignored when the underlying include set is nil. +func (es *IncludeExcludeStringSet) Include(s string) { + if es.inc == nil { + return + } + _ = es.inc.Set(s) +} + // StringSet is a type to be used with the standard library's flag.Var // function as a custom flag value, similar to "github.com/urfave/cli".StringSet, // but it only tracks unique instances. diff --git a/test/pkg/uvm/lcow.go b/test/pkg/uvm/lcow.go index 4e17122349..779759487a 100644 --- a/test/pkg/uvm/lcow.go +++ b/test/pkg/uvm/lcow.go @@ -32,6 +32,17 @@ var lcowOSBootFilesOnce = sync.OnceValues(func() (string, error) { return "", nil }) +// LCOWBootFilesPath returns the absolute path to the LinuxBootFiles directory +// found alongside containerd.exe on the PATH or at C:\ContainerPlat. Returns +// an empty string (and no error) if no such directory exists. +// +// Exported for v2 controller tests that build HCS documents directly via +// internal/builder/vm/lcow.BuildSandboxConfig and cannot use +// [DefaultLCOWOptions] (which returns the v1 *uvm.OptionsLCOW type). +func LCOWBootFilesPath() (string, error) { + return lcowOSBootFilesOnce() +} + // DefaultLCOWOptions returns default options for a bootable LCOW uVM, but first checks // if `containerd.exe` is in the path, or C:\ContainerPlat\LinuxBootFiles exists, and // prefers those paths above the default boot path set by [uvm.NewDefaultOptionsLCOW] From 46c72064a30250b4ccad9875dbdd2d208262e8c3 Mon Sep 17 00:00:00 2001 From: Shreyansh Sancheti Date: Wed, 20 May 2026 21:47:17 +0530 Subject: [PATCH 2/2] ci: bump golangci-lint to v2.5 to fix cross-module typecheck v2.1.0 fails to typecheck cross-module imports from test/ that resolve back into the parent module via a replace directive, producing: could not import github.com/Microsoft/hcsshim/internal/... (-: build constraints exclude all Go files in ...) This only affects the (windows, test) lint matrix entry; the same imports typecheck cleanly under (windows, "") and locally with v2.11.x. v2.5+ resolves this. Signed-off-by: Shreyansh Sancheti --- .github/workflows/ci.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4796a52c79..7e54105ece 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,13 @@ jobs: - name: Run golangci-lint uses: golangci/golangci-lint-action@v9 with: - version: v2.1 + # v2.1.0 has a known typechecker bug that fails to resolve + # cross-module imports under `working-directory: test/` when those + # imports point back into the parent module via a `replace` + # directive. Symptom: `could not import ... (-: build constraints + # exclude all Go files in ...)` on `(windows, test)` despite the + # files matching the active build tags + GOOS. Fixed in v2.5+. + version: v2.5 args: >- --verbose --max-issues-per-linter=0