diff --git a/.gitattributes b/.gitattributes index a748d2ce..82891680 100644 --- a/.gitattributes +++ b/.gitattributes @@ -31,3 +31,9 @@ Dockerfile* text # .gitattributes export-ignore .gitignore export-ignore + +# napi-rs auto-generates this file from the kernel's `napi-binding/napi/` +# crate; regenerated by `npm run build:native`. Tell git/GitHub it's +# machine-generated so it collapses in diffs and is excluded from +# blame and language stats. +native/sea/index.d.ts linguist-generated=true diff --git a/.gitignore b/.gitignore index 99381ce5..a0b80632 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,10 @@ coverage_unit dist *.DS_Store lib/version.ts + +# SEA native binding — copied/generated from kernel workspace by `npm run build:native`. +# The committed contract is `native/sea/index.d.ts` (TypeScript declarations). +# Everything else under native/sea/ is a build artifact and must not be committed. +native/sea/index.js +native/sea/index.node +native/sea/index.*.node diff --git a/.npmignore b/.npmignore index 2bfe597c..f4b203e8 100644 --- a/.npmignore +++ b/.npmignore @@ -3,6 +3,13 @@ !dist/**/* !thrift/**/* +# SEA napi-rs router shim + TypeScript declarations. The router (index.js) +# selects the per-platform `.node` artifact from `@databricks/sea-native-*` +# optionalDependencies (populated when the kernel CI publishes them); +# the .d.ts is the consumer-facing type contract. +!native/sea/index.js +!native/sea/index.d.ts + !LICENSE !NOTICE !package.json diff --git a/.prettierignore b/.prettierignore index 9a9ec6bc..4a764095 100644 --- a/.prettierignore +++ b/.prettierignore @@ -11,3 +11,9 @@ coverage dist thrift package-lock.json + +# Generated by napi-rs from the kernel's `napi-binding/napi/` crate; +# regenerated by `npm run build:native`. Format follows napi-rs's +# defaults (no semicolons), not this repo's prettier config. +native/sea/index.d.ts +native/sea/index.js diff --git a/lib/sea/SeaNativeLoader.ts b/lib/sea/SeaNativeLoader.ts new file mode 100644 index 00000000..7da23eed --- /dev/null +++ b/lib/sea/SeaNativeLoader.ts @@ -0,0 +1,117 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * Lazy loader for the SEA (Statement Execution API) native binding. + * + * Mirrors the load-failure-tolerant pattern of `lib/utils/lz4.ts`: the + * `.node` artifact ships via per-platform optional dependencies + * (`@databricks/sea-native-`), so its absence must not crash + * a Thrift-only consumer of the driver. Callers that actually need + * SEA invoke `getSeaNative()`, which throws a structured error if + * the binding could not be loaded. + */ + +import type { + Connection as NativeConnection, + Statement as NativeStatement, + ConnectionOptions, + ExecuteOptions, + ArrowBatch, + ArrowSchema, +} from '@sea-native'; + +export type { ConnectionOptions, ExecuteOptions, ArrowBatch, ArrowSchema }; +export type Connection = NativeConnection; +export type Statement = NativeStatement; + +export interface SeaNativeBinding { + version(): string; + openSession(options: ConnectionOptions): Promise; + Connection: typeof NativeConnection; + Statement: typeof NativeStatement; +} + +const MIN_NODE_MAJOR = 18; + +function detectNodeMajor(): number { + // `process.version` is `vX.Y.Z`; parseInt stops at the first non-digit. + return parseInt(process.version.slice(1), 10); +} + +function loadFailureHint(err: NodeJS.ErrnoException): string { + const platform = `${process.platform}-${process.arch}`; + const installHint = `Install the matching optional dependency (e.g. @databricks/sea-native-${platform}).`; + if (err.code === 'MODULE_NOT_FOUND') { + return `SEA native binding not installed for platform ${platform} on Node ${process.version}. ${installHint}`; + } + if (err.code === 'ERR_DLOPEN_FAILED') { + return `SEA native binding present but failed to dlopen on platform ${platform} / Node ${process.version} — likely a libc or Node ABI mismatch. The binding requires Node >=${MIN_NODE_MAJOR}.`; + } + return `SEA native binding failed to load on platform ${platform} / Node ${process.version}: ${err.message}`; +} + +let cached: SeaNativeBinding | null | undefined; +let cachedError: Error | undefined; + +function tryLoad(): SeaNativeBinding | undefined { + const nodeMajor = detectNodeMajor(); + if (Number.isFinite(nodeMajor) && nodeMajor < MIN_NODE_MAJOR) { + cachedError = new Error( + `SEA native binding requires Node >=${MIN_NODE_MAJOR}; running Node ${process.version}. Continue using the Thrift backend on this runtime.`, + ); + return undefined; + } + + try { + // The require path resolves to `native/sea/index.js` (the napi-rs + // router). `.js` is omitted so eslint's `import/extensions` rule + // accepts the call. + // eslint-disable-next-line @typescript-eslint/no-var-requires, global-require + return require('../../native/sea') as SeaNativeBinding; + } catch (err) { + if (err instanceof Error && 'code' in err) { + cachedError = new Error(loadFailureHint(err as NodeJS.ErrnoException)); + return undefined; + } + cachedError = new Error(`SEA native binding failed to load with non-standard error: ${String(err)}`); + return undefined; + } +} + +/** + * Returns the loaded native binding. Throws a structured error if + * the binding is unavailable on this platform / Node version. + */ +export function getSeaNative(): SeaNativeBinding { + if (cached === undefined) { + cached = tryLoad() ?? null; + } + if (cached === null) { + throw cachedError ?? new Error('SEA native binding unavailable'); + } + return cached; +} + +/** + * Returns the loaded binding or `undefined` if it could not be + * loaded. Use this for capability-detection at startup; use + * `getSeaNative()` at the point where SEA is actually required. + */ +export function tryGetSeaNative(): SeaNativeBinding | undefined { + if (cached === undefined) { + cached = tryLoad() ?? null; + } + return cached ?? undefined; +} diff --git a/native/sea/README.md b/native/sea/README.md new file mode 100644 index 00000000..5ca6a47e --- /dev/null +++ b/native/sea/README.md @@ -0,0 +1,62 @@ +# `native/sea/` — consumer-side directory for the Rust napi binding + +**The Rust binding source lives in the kernel repo** at +`databricks-sql-kernel/napi-binding/napi/`. Building it requires a +local checkout of that repo — see "Build for local dev" below. + +## Workspace topology + +The napi crate is a **standalone Cargo workspace** (`[workspace] +members = ["."]` in `napi-binding/napi/Cargo.toml`), **not** a +sibling of `pyo3/` in the kernel root workspace. + +The reason is Cargo feature unification. pyo3 builds the kernel with +the default `tls-native` feature (system OpenSSL via `native-tls`). +The napi crate has to opt INTO `tls-rustls` instead: napi modules are +loaded into Node.js processes that statically link OpenSSL 3.x, and +dynamically linking the system's OpenSSL 1.1 (which `native-tls` +pulls in on Linux) collides with Node's symbols at module-load time +and segfaults the process before any Rust code runs. `rustls` is +pure Rust + `ring` and avoids the conflict entirely. + +If napi lived in the same workspace as pyo3, `cargo build +--workspace` would unify the kernel's feature set to `tls-native ∪ +tls-rustls`, link both TLS stacks into the resulting napi cdylib, +and reintroduce the same clash. Standalone-workspace is the fix. + +## What lives in this directory + +- `index.d.ts` — TypeScript declarations consumed by `lib/sea/`. + Generated by napi-rs from the Rust source; checked in as the + consumer-facing type contract. +- `index.js` — napi-rs's per-platform router shim. Gitignored; + populated by `npm run build:native` for local dev. In published + tarballs it ships alongside the `.d.ts` and `require()`s the + right `@databricks/sea-native-` optional dependency. +- `index.*.node` — the actual native binary, one per platform. + Gitignored. In production these live in the per-triple optional + dependencies (`@databricks/sea-native-linux-x64-gnu`, etc.); for + local dev `npm run build:native` copies one into this directory. + +## Build for local dev + +```bash +# From the nodejs repo root: +export DATABRICKS_SQL_KERNEL_REPO=/path/to/your/databricks-sql-kernel/napi-binding +npm run build:native # release build (default) +BUILD_PROFILE= npm run build:native # debug build (empty BUILD_PROFILE drops --release) +``` + +`DATABRICKS_SQL_KERNEL_REPO` is required when your kernel checkout +isn't at `../../databricks-sql-kernel/napi-binding` relative to the +nodejs repo. + +## Production load path + +At release time the kernel's CI publishes +`@databricks/sea-native-` npm packages — one per supported +platform — each containing a single `.node` binary. The nodejs +driver lists them as `optionalDependencies`; npm installs only the +one matching the consumer's `process.platform` / `process.arch`. +`native/sea/index.js` (the napi-rs router) then `require()`s the +installed package at load time. diff --git a/native/sea/index.d.ts b/native/sea/index.d.ts new file mode 100644 index 00000000..5fb5e902 --- /dev/null +++ b/native/sea/index.d.ts @@ -0,0 +1,144 @@ +/* tslint:disable */ +/* eslint-disable */ + +/* auto-generated by NAPI-RS */ + +/** + * JS-visible per-execute options. M0 only carries + * initialCatalog / initialSchema / sessionConfig — parameters and + * per-statement overrides land in M1. + */ +export interface ExecuteOptions { + /** Default catalog applied to this statement via session conf. */ + initialCatalog?: string + /** Default schema applied to this statement via session conf. */ + initialSchema?: string + /** + * Per-statement session conf overrides (forwarded to SEA + * `parameters` / Thrift `confOverlay`). + */ + sessionConfig?: Record +} +/** + * JS-visible options for opening a Databricks SQL session over PAT. + * + * M0 supports PAT only — `token` is required. OAuth M2M / U2M variants + * land in M1 along with a discriminated-union shape on the JS side. + */ +export interface ConnectionOptions { + /** + * Workspace host, e.g. `adb-…azuredatabricks.net`. The kernel + * normalises this — bare hostnames get `https://` prepended. + */ + hostName: string + /** + * JDBC-style HTTP path, e.g. `/sql/1.0/warehouses/abc123`. The + * kernel parses out the warehouse id. + */ + httpPath: string + /** + * Personal access token. Must be non-empty (the kernel rejects + * empty PATs at session construction). + */ + token: string +} +/** + * Open a Databricks SQL session over PAT auth and return an opaque + * `Connection` wrapping the kernel `Session`. + * + * The JS-visible name is `openSession` (napi-rs converts snake_case + * to camelCase for free functions). + */ +export declare function openSession(options: ConnectionOptions): Promise +/** + * A single Arrow IPC stream payload encoding one record batch (plus + * the schema header so the JS-side reader is stateless). + */ +export interface ArrowBatch { + ipcBytes: Buffer +} +/** + * An Arrow IPC stream payload encoding just the result schema (no + * record-batch messages). Returned by `Statement.schema()`. + */ +export interface ArrowSchema { + ipcBytes: Buffer +} +/** + * Returns the native binding's crate version (`CARGO_PKG_VERSION`). + * + * Originally the round-1b smoke test; kept as a cheap "is the binding + * loaded?" probe for the JS-side loader's structured diagnostics. + */ +export declare function version(): string +/** + * Opaque connection handle wrapping a kernel `Session`. + * + * `inner` is `Arc>>` so: + * - the Drop impl can clone the `Arc` and `.take()` the session on a + * background tokio task without holding `&mut self` (which Drop is + * forbidden from doing across an `await`), + * - `executeStatement` can share immutable access to the session via + * the `Arc` clones the kernel makes internally + * (`Session::statement()` only needs `&self`). + */ +export declare class Connection { + /** + * Execute a SQL statement and return a Statement handle that + * streams batches via `fetchNextBatch()`. + */ + executeStatement(sql: string, options: ExecuteOptions): Promise + /** + * Explicit close. Marks the connection wrapper as closed so + * subsequent calls on this `Connection` return `InvalidArg`, then + * schedules a fire-and-forget server-side close on the runtime. + * + * **Why fire-and-forget and not `Session::close().await`:** the + * kernel's `Session::close(self).await` body holds a + * `tracing::EnteredSpan` (a `!Send` type) across an `.await`, so + * the future is not `Send`. napi-rs's `execute_tokio_future` glue + * rejects non-`Send` futures, and `Handle::spawn` does too. The + * kernel's `SessionInner::Drop` already spawns the + * `delete_session` RPC on the same runtime handle the napi + * binding captured, so dropping the value is functionally + * equivalent — the difference is that JS callers can't observe a + * `delete_session` failure from `close()`. Tracked as a kernel- + * side follow-up (clone the span rather than entering it) in + * Round 3 findings. + */ + close(): Promise +} +/** + * Opaque executed-statement handle. + * + * `inner` is wrapped in `Arc>>` so: + * - `fetch_next_batch` can `await` `ResultStream::next_batch` which + * requires `&mut ExecutedStatement` (via `result_stream_mut`), + * - `cancel` / `close` (which take `&self` on the kernel side via the + * `ExecutedStatementHandle` trait) can run concurrently with each + * other from a JS perspective without panicking, + * - `Drop` can hand the inner handle off to a tokio task without + * touching `&mut self` across an `await`. + */ +export declare class Statement { + /** + * Pull the next batch of results. Returns `None` when the stream + * is exhausted. The returned `ArrowBatch.ipcBytes` is a complete + * Arrow IPC stream (schema header + 1 record-batch message) + * suitable for handing to `apache-arrow`'s `RecordBatchReader`. + */ + fetchNextBatch(): Promise + /** + * Result schema as an Arrow IPC payload (schema header only, no + * record-batch message). Available before any batches have been + * fetched. + */ + schema(): Promise + /** Server-side cancel. No-op if already finished. */ + cancel(): Promise + /** + * Explicit close. Awaits the server-side close so the JS caller + * can observe failures. + */ + close(): Promise +} diff --git a/package.json b/package.json index e430181f..612213f9 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,7 @@ "test": "nyc --report-dir=${NYC_REPORT_DIR:-coverage_unit} mocha --config tests/unit/.mocharc.js", "update-version": "node bin/update-version.js && prettier --write ./lib/version.ts", "build": "npm run update-version && tsc --project tsconfig.build.json", + "build:native": "bash -c 'cd ${DATABRICKS_SQL_KERNEL_REPO:-../../databricks-sql-kernel/napi-binding}/napi && npx --no-install @napi-rs/cli build --platform ${BUILD_PROFILE:---release} && cp index.* $OLDPWD/native/sea/'", "watch": "tsc --project tsconfig.build.json --watch", "type-check": "tsc --noEmit", "prettier": "prettier . --check", @@ -47,6 +48,7 @@ ], "license": "Apache 2.0", "devDependencies": { + "@napi-rs/cli": "2.18.4", "@types/chai": "^4.3.14", "@types/http-proxy": "^1.17.14", "@types/lz4": "^0.6.4", @@ -89,6 +91,7 @@ "winston": "^3.8.2" }, "optionalDependencies": { - "lz4": "^0.6.5" + "lz4": "^0.6.5", + "@databricks/sea-native-linux-x64-gnu": "0.1.0" } } diff --git a/tests/e2e/sea/e2e-smoke.test.ts b/tests/e2e/sea/e2e-smoke.test.ts new file mode 100644 index 00000000..5b14ae59 --- /dev/null +++ b/tests/e2e/sea/e2e-smoke.test.ts @@ -0,0 +1,121 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import { tableFromIPC } from 'apache-arrow'; +import { tryGetSeaNative, Connection, Statement } from '../../../lib/sea/SeaNativeLoader'; + +// End-to-end smoke test against a live warehouse: +// 1. Open a kernel `Session` over PAT. +// 2. Execute `SELECT 1`, decode the IPC payload, assert the value is 1. +// 3. Exercise lifecycle negative paths (drain-past-null, double-close). +// 4. Close the statement, then the connection. +// +// Required env vars: +// - DATABRICKS_PECOTESTING_SERVER_HOSTNAME +// - DATABRICKS_PECOTESTING_HTTP_PATH +// - DATABRICKS_PECOTESTING_TOKEN_PERSONAL +// +// On dev machines without the secrets the suite is skipped. In CI +// (process.env.CI === 'true') missing secrets are fatal — a silent +// skip would let credential-rotation regressions reach prod. + +const REQUIRED_ENV = [ + 'DATABRICKS_PECOTESTING_SERVER_HOSTNAME', + 'DATABRICKS_PECOTESTING_HTTP_PATH', + 'DATABRICKS_PECOTESTING_TOKEN_PERSONAL', +] as const; + +function missingEnvVars(): string[] { + return REQUIRED_ENV.filter((name) => !process.env[name]); +} + +describe('SEA native binding — end-to-end smoke', function smoke() { + // Live-warehouse tests can take >2s through warm-up. + this.timeout(60_000); + + const binding = tryGetSeaNative(); + if (binding === undefined) { + // Optional dependency absent — never reach the live path. + it.skip('SEA native binding not available on this platform'); + return; + } + + const missing = missingEnvVars(); + if (missing.length > 0) { + if (process.env.CI === 'true') { + // Fail loudly so credential-rotation regressions surface in CI. + it('fails when required env vars are missing in CI', () => { + expect.fail(`Missing required env vars in CI: ${missing.join(', ')}. Set CI=false to skip locally.`); + }); + return; + } + it.skip(`skipped — missing env vars: ${missing.join(', ')}`); + return; + } + + const hostName = process.env.DATABRICKS_PECOTESTING_SERVER_HOSTNAME as string; + const httpPath = process.env.DATABRICKS_PECOTESTING_HTTP_PATH as string; + const token = process.env.DATABRICKS_PECOTESTING_TOKEN_PERSONAL as string; + + it('opens a session, runs SELECT 1, decodes the IPC payload to 1', async () => { + const connection: Connection = await binding.openSession({ hostName, httpPath, token }); + expect(connection).to.be.an('object'); + + let statement: Statement | null = null; + try { + statement = await connection.executeStatement('SELECT 1', {}); + expect(statement).to.be.an('object'); + + const batch = await statement.fetchNextBatch(); + expect(batch).to.not.equal(null); + expect(batch!.ipcBytes).to.be.instanceOf(Buffer); + expect(batch!.ipcBytes.length).to.be.greaterThan(0); + + // Decode the IPC payload and verify the value, not just the shape. + const table = tableFromIPC(batch!.ipcBytes); + expect(table.numRows).to.equal(1); + expect(Number(table.getChildAt(0)!.get(0))).to.equal(1); + + // Drain-past-null: subsequent fetch returns null. + const after = await statement.fetchNextBatch(); + expect(after).to.equal(null); + + // Drain-past-drained: another fetch still returns null (idempotent). + const afterAgain = await statement.fetchNextBatch(); + expect(afterAgain).to.equal(null); + } finally { + if (statement !== null) { + await statement.close(); + } + await connection.close(); + } + }); + + it('returns a schema IPC payload before any batch is fetched', async () => { + const connection: Connection = await binding.openSession({ hostName, httpPath, token }); + try { + const statement = await connection.executeStatement('SELECT 1', {}); + try { + const schema = await statement.schema(); + expect(schema.ipcBytes).to.be.instanceOf(Buffer); + expect(schema.ipcBytes.length).to.be.greaterThan(0); + } finally { + await statement.close(); + } + } finally { + await connection.close(); + } + }); +}); diff --git a/tests/unit/sea/version.test.ts b/tests/unit/sea/version.test.ts new file mode 100644 index 00000000..45acf9d5 --- /dev/null +++ b/tests/unit/sea/version.test.ts @@ -0,0 +1,35 @@ +// Copyright (c) 2026 Databricks, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import { expect } from 'chai'; +import { tryGetSeaNative } from '../../../lib/sea/SeaNativeLoader'; + +describe('SEA native binding — smoke test', function smoke() { + const binding = tryGetSeaNative(); + if (binding === undefined) { + // The binding is an optional dependency. On platforms where the + // .node artifact isn't installed (CI matrix entries without a + // corresponding sea-native package, dev machines that haven't + // run `npm run build:native`, etc.), skip the suite rather than + // fail the build. + // eslint-disable-next-line no-invalid-this + this.pending = true; + it.skip('SEA native binding not available on this platform'); + return; + } + + it('returns a semver version()', () => { + expect(binding.version()).to.match(/^\d+\.\d+\.\d+$/); + }); +}); diff --git a/tsconfig.json b/tsconfig.json index 9da406df..cf8acb08 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -6,7 +6,11 @@ "sourceMap": true, "strict": true, "esModuleInterop": true, - "forceConsistentCasingInFileNames": true + "forceConsistentCasingInFileNames": true, + "baseUrl": "./", + "paths": { + "@sea-native": ["./native/sea/index.d.ts"] + } }, "exclude": ["./dist/**/*"] }