From 8aa8cf3eb68d776eb8d15ee2cdebcd9b8c804a6f Mon Sep 17 00:00:00 2001 From: Simon Jensen Date: Fri, 12 Jun 2026 12:33:39 +0200 Subject: [PATCH 1/7] fix(manifest/bazel): pin lockfile read-only for show_extension scans The mod show_extension argv builders for the Maven and pip extensions did not pass --lockfile_mode=off, so a read-only dependency scan could rewrite the user's MODULE.bazel.lock. Mirror the query/cquery builders and pin the lockfile read-only before user flags. --- src/commands/manifest/bazel/bazel-query-runner.mts | 8 ++++++++ .../manifest/bazel/bazel-query-runner.test.mts | 13 +++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/commands/manifest/bazel/bazel-query-runner.mts b/src/commands/manifest/bazel/bazel-query-runner.mts index 75f266851..c9ef84f1a 100644 --- a/src/commands/manifest/bazel/bazel-query-runner.mts +++ b/src/commands/manifest/bazel/bazel-query-runner.mts @@ -85,6 +85,10 @@ function buildBazelModShowMavenExtensionArgv( 'mod', 'show_extension', '@rules_jvm_external//:extensions.bzl%maven', + // A read-only scan must never rewrite the user's MODULE.bazel.lock; pin + // the lockfile read-only before user flags, mirroring the query/cquery + // argv builders. + '--lockfile_mode=off', // Belt-and-suspenders output reducer mirroring the PyPI path: bias the // report toward the root module's usages. The authoritative pruning is // the importers-filter applied to the parsed output, so this is not @@ -101,6 +105,10 @@ function buildBazelModShowPipExtensionArgv(opts: BazelQueryOptions): string[] { 'mod', 'show_extension', '@rules_python//python/extensions:pip.bzl%pip', + // A read-only scan must never rewrite the user's MODULE.bazel.lock; pin + // the lockfile read-only before user flags, mirroring the query/cquery + // argv builders. + '--lockfile_mode=off', '--extension_usages=', ...userFlags, ] diff --git a/src/commands/manifest/bazel/bazel-query-runner.test.mts b/src/commands/manifest/bazel/bazel-query-runner.test.mts index aa69b969c..c4e526f40 100644 --- a/src/commands/manifest/bazel/bazel-query-runner.test.mts +++ b/src/commands/manifest/bazel/bazel-query-runner.test.mts @@ -256,10 +256,21 @@ describe('runBazelModShowMavenExtension', () => { 'mod', 'show_extension', '@rules_jvm_external//:extensions.bzl%maven', + '--lockfile_mode=off', '--extension_usages=', ]) }) + it('pins the lockfile read-only so the scan never rewrites MODULE.bazel.lock', async () => { + await runBazelModShowMavenExtension({ + bin: 'bazel', + cwd: '/repo', + invocationFlags: [], + }) + const argv = mocked.mock.calls[0]![1] as string[] + expect(argv).toContain('--lockfile_mode=off') + }) + it('threads outputUserRoot ahead of the subcommand', async () => { await runBazelModShowMavenExtension({ bin: 'bazel', @@ -273,6 +284,7 @@ describe('runBazelModShowMavenExtension', () => { 'mod', 'show_extension', '@rules_jvm_external//:extensions.bzl%maven', + '--lockfile_mode=off', '--extension_usages=', ]) }) @@ -320,6 +332,7 @@ describe('runBazelModShowPipExtension', () => { 'mod', 'show_extension', '@rules_python//python/extensions:pip.bzl%pip', + '--lockfile_mode=off', '--extension_usages=', ]) }) From 849d22f7c5d04a08f877b11d4cddd98537a2ee65 Mon Sep 17 00:00:00 2001 From: Simon Jensen Date: Fri, 12 Jun 2026 12:33:47 +0200 Subject: [PATCH 2/7] fix(manifest/bazel): detect cquery timeouts by the kill shape spawn emits The cquery timeout detection tested for a `timedOut` flag the registry spawn never sets; on a timeout the child is killed and Node reports `killed: true` with `signal: SIGTERM` (or SIGKILL). Drop the phantom `timedOut` branch and update the test to assert the real kill shape. --- src/commands/manifest/bazel/bazel-cquery.mts | 5 +++-- src/commands/manifest/bazel/bazel-cquery.test.mts | 8 ++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/commands/manifest/bazel/bazel-cquery.mts b/src/commands/manifest/bazel/bazel-cquery.mts index ee3445fca..f86c6ad49 100644 --- a/src/commands/manifest/bazel/bazel-cquery.mts +++ b/src/commands/manifest/bazel/bazel-cquery.mts @@ -551,12 +551,13 @@ export async function runMetadataCqueryForRepo( signal?: unknown stderr?: unknown stdout?: unknown - timedOut?: unknown } const stdout = typeof err.stdout === 'string' ? err.stdout : '' const stderr = typeof err.stderr === 'string' ? err.stderr : '' + // On a `timeout`, the registry spawn kills the child, so Node sets + // `killed: true` and `signal: 'SIGTERM'` (or `SIGKILL`). There is no + // `timedOut` flag on the real rejection, so do not test for one. const timedOut = - err.timedOut === true || err.killed === true || err.signal === 'SIGTERM' || err.signal === 'SIGKILL' diff --git a/src/commands/manifest/bazel/bazel-cquery.test.mts b/src/commands/manifest/bazel/bazel-cquery.test.mts index 9dd3ae269..f26d1e7d4 100644 --- a/src/commands/manifest/bazel/bazel-cquery.test.mts +++ b/src/commands/manifest/bazel/bazel-cquery.test.mts @@ -642,11 +642,15 @@ describe('runMetadataCqueryForRepo', () => { expect(r.artifacts).toEqual([]) }) - it('returns status:timeout when spawn rejects with timedOut=true', async () => { + it('returns status:timeout when spawn is killed on timeout (killed=true + SIGTERM)', async () => { + // The real registry spawn does not set `timedOut`; on a `timeout` it kills + // the child, so Node populates `killed: true` and `signal: 'SIGTERM'`. + // Mock that shape so the test pins the behaviour real spawn produces. mocked.mockRejectedValueOnce( Object.assign(new Error('command timed out'), { code: null, - timedOut: true, + killed: true, + signal: 'SIGTERM', stderr: '', stdout: '', }), From 57b71066c28c462ce150a5ed5b12142050156d6d Mon Sep 17 00:00:00 2001 From: Simon Jensen Date: Fri, 12 Jun 2026 12:34:13 +0200 Subject: [PATCH 3/7] fix(manifest/bazel): surface incomplete Maven extraction honestly A Bazel Maven extraction that under-reports dependencies must never be presented downstream as a complete SBOM. This reworks the shared status model so the CLI is honest about partial and unanalyzable runs: - Add an `indeterminate` probe state. An unrecognized non-zero probe exit or a thrown probe is no longer swallowed as `not-defined` ("no Maven here"); it is propagated so the run is never reported complete. - A workspace that fails to load is recorded as a load failure rather than silently skipped: a hard failure when nothing else was analyzable, a partial otherwise. - Enrich the extraction result with a machine-readable completeness signal (a `complete` flag plus per-workspace / per-hub outcomes) and write a completeness summary alongside the manifests for downstream consumers. The partial SBOM is still emitted and the warning stays loud. - Make both the explicit command and auto-manifest honest: exit non-zero only on hard failure, exit 0 on partial with a prominent warning and the completeness signal echoed. - Gate synthetic hub manifests on committed lockfiles: when a committed maven_install.json / _maven_install.json already covers a hub, skip re-emitting it since the server already ingests committed lockfiles. - Wire a --per-repo-timeout flag (and socket.json setting) with a 120s default for the explicit command, longer than the 60s auto-manifest default; drop the misleading comment claiming the default already existed. Per-state diagnostics are logged under --verbose. --- .../manifest/bazel/bazel-repo-discovery.mts | 31 +- .../bazel/bazel-repo-discovery.test.mts | 19 +- .../manifest/bazel/cmd-manifest-bazel.mts | 56 ++- .../bazel/cmd-manifest-bazel.test.mts | 97 +++- .../manifest/bazel/extract_bazel_to_maven.mts | 452 ++++++++++++++++-- .../bazel/extract_bazel_to_maven.test.mts | 165 +++++++ .../manifest/generate_auto_manifest.mts | 18 +- .../manifest/generate_auto_manifest.test.mts | 53 +- src/utils/socket-json.mts | 1 + 9 files changed, 832 insertions(+), 60 deletions(-) diff --git a/src/commands/manifest/bazel/bazel-repo-discovery.mts b/src/commands/manifest/bazel/bazel-repo-discovery.mts index 1a82c6317..8da537598 100644 --- a/src/commands/manifest/bazel/bazel-repo-discovery.mts +++ b/src/commands/manifest/bazel/bazel-repo-discovery.mts @@ -36,7 +36,17 @@ export type ProbeResult = { export type RepoProbe = (repoName: string) => Promise -export type ProbeStatus = 'populated' | 'empty' | 'not-defined' +// `indeterminate` means the probe could not be classified: an unrecognized +// non-zero exit, or the probe threw outright (the Bazel invocation itself +// failed). It is NOT evidence that the repo is undefined — treating it as +// `not-defined` would silently under-report a hub that may well hold Maven +// deps. The orchestrator must propagate it so the run is never reported +// `complete` when a probe was indeterminate. +export type ProbeStatus = + | 'populated' + | 'empty' + | 'not-defined' + | 'indeterminate' // Conventional Maven hub names rules_jvm_external sets up under // WORKSPACE-mode invocations. Probing each one is cheap (a failed visibility @@ -154,13 +164,16 @@ export function classifyProbeResult(result: ProbeResult): ProbeStatus { return 'empty' } // Code 0 with empty stdout: WORKSPACE-mode probes do this when the repo - // name isn't declared (Exp 5c). Treat as not-defined. + // name isn't declared. Treat as not-defined. if (result.code === 0) { return 'not-defined' } - // Code 1 with no recognizable message: be conservative and call it - // not-defined so the orchestrator skips it without erroring the workspace. - return 'not-defined' + // Non-zero exit with no recognizable message: the probe failed for a reason + // we can't classify (Bazel infra error, analysis crash, unexpected stderr). + // This is NOT proof the repo is undefined, so do NOT downgrade it to + // not-defined — surface it as indeterminate so the orchestrator can flag + // the workspace as not fully analyzable rather than silently skipping it. + return 'indeterminate' } // Convenience: probe a single candidate and return its classified status, @@ -176,14 +189,18 @@ export async function probeCandidate( try { result = await probe(repoName) } catch (e) { + // A thrown probe means the Bazel invocation itself failed; we have no + // evidence about whether the repo exists. Surface it as indeterminate so + // the run is not reported complete, rather than swallowing it as a + // not-defined skip. if (verbose) { logger.log( - `[VERBOSE] discovery: probe @${repoName}: not-defined (probe threw: ${ + `[VERBOSE] discovery: probe @${repoName}: indeterminate (probe threw: ${ e instanceof Error ? e.message : String(e) })`, ) } - return 'not-defined' + return 'indeterminate' } const status = classifyProbeResult(result) if (verbose) { diff --git a/src/commands/manifest/bazel/bazel-repo-discovery.test.mts b/src/commands/manifest/bazel/bazel-repo-discovery.test.mts index 20628d65d..248cb53b8 100644 --- a/src/commands/manifest/bazel/bazel-repo-discovery.test.mts +++ b/src/commands/manifest/bazel/bazel-repo-discovery.test.mts @@ -177,12 +177,15 @@ describe('bazel-repo-discovery', () => { ).toBe('not-defined') }) - it('classifies code=1 + unrecognized stderr conservatively as not-defined', () => { + it('classifies code=1 + unrecognized stderr as indeterminate (not a silent skip)', () => { + // An unrecognized non-zero exit is NOT proof the repo is absent; it must + // surface as indeterminate so the orchestrator never reports complete on + // a workspace it could not actually analyze. expect( classifyProbeResult( probeResult({ code: 1, stderr: 'some other failure\n' }), ), - ).toBe('not-defined') + ).toBe('indeterminate') }) it('classifies code=1 + "no such package" stderr as not-defined', () => { @@ -195,6 +198,12 @@ describe('bazel-repo-discovery', () => { ), ).toBe('not-defined') }) + + it('classifies a non-zero exit with no recognizable message as indeterminate', () => { + expect( + classifyProbeResult(probeResult({ code: 37, stderr: '' })), + ).toBe('indeterminate') + }) }) describe('probeCandidate', () => { @@ -204,9 +213,9 @@ describe('bazel-repo-discovery', () => { ).toBe('populated') }) - it('returns not-defined when the probe throws', async () => { + it('returns indeterminate when the probe throws (infra failure, not absence)', async () => { expect(await probeCandidate('crash', probeThrows)).toBe( - 'not-defined', + 'indeterminate', ) }) }) @@ -253,7 +262,7 @@ describe('bazel-repo-discovery', () => { it('probeCandidate logs the throw reason under verbose', async () => { await probeCandidate('crash', probeThrows, true) expect(loggedLines()).toMatch( - /probe @crash:\s*not-defined \(probe threw: bazel exploded\)/, + /probe @crash:\s*indeterminate \(probe threw: bazel exploded\)/, ) }) }) diff --git a/src/commands/manifest/bazel/cmd-manifest-bazel.mts b/src/commands/manifest/bazel/cmd-manifest-bazel.mts index efb3f0e30..0ab10e861 100644 --- a/src/commands/manifest/bazel/cmd-manifest-bazel.mts +++ b/src/commands/manifest/bazel/cmd-manifest-bazel.mts @@ -56,6 +56,11 @@ const config: CliCommandConfig = { description: 'Output directory for generated manifests; default: ./.socket/bazel-manifests/', }, + perRepoTimeout: { + type: 'number', + description: + 'Per-hub bazel cquery timeout in milliseconds; default: 120000', + }, verbose: { type: 'boolean', description: @@ -100,6 +105,12 @@ const config: CliCommandConfig = { `, } +// The explicit `socket manifest bazel` command gives each hub more time than +// the auto-manifest path: a user running it directly is waiting on this one +// extraction, whereas auto-manifest must not stall the wider scan. Auto's +// shorter default lives in extract_bazel_to_maven.mts. +const EXPLICIT_PER_REPO_TIMEOUT_MS = 120_000 + export const cmdManifestBazel = { description: config.description, hidden: config.hidden, @@ -110,6 +121,11 @@ export type EcosystemOutcome = { ecosystem: 'maven' | 'pypi' status: ExtractBazelStatus manifestPaths: string[] + // Machine-readable completeness signal. True only when the ecosystem's + // extraction was complete; a `partial` upload sets this false so the CLI + // surfaces it honestly (exit 0 + prominent warning) rather than as plain + // success. + complete: boolean } // Pure outcome-matrix evaluator. Exported so dispatcher behavior can be @@ -138,10 +154,20 @@ export function evaluateEcosystemOutcomes( const hardFailures = outcomes.filter(o => o.status === 'hardFailure') const noDiscoveries = outcomes.filter(o => o.status === 'noEcosystem') + // Surface a machine-readable completeness signal for every produced + // ecosystem so a partial upload is never presented as a complete one. The + // per-workspace / per-hub detail is written to the manifest dir's + // completeness summary by the extractor; this is the human-facing echo. + for (const outcome of produced) { + logger.info( + `Bazel ${outcome.ecosystem} extraction status: ${outcome.status} (complete=${outcome.complete}).`, + ) + } + for (const partial of outcomes) { if (partial.status === 'partial') { logger.warn( - `Bazel ${partial.ecosystem} manifest generation was partial; the uploaded SBOM is known-incomplete.`, + `WARNING: Bazel ${partial.ecosystem} manifest generation was PARTIAL. The uploaded SBOM is known-incomplete and may under-report dependencies; review the completeness summary before relying on the results.`, ) } } @@ -188,14 +214,18 @@ function pypiOutcome(result: { manifestPath?: string | undefined noEcosystemFound?: boolean | undefined ok: boolean -}): { manifestPaths: string[]; status: ExtractBazelStatus } { +}): { complete: boolean; manifestPaths: string[]; status: ExtractBazelStatus } { if (result.noEcosystemFound) { - return { manifestPaths: [], status: 'noEcosystem' } + return { complete: false, manifestPaths: [], status: 'noEcosystem' } } if (result.ok && result.manifestPath) { - return { manifestPaths: [result.manifestPath], status: 'complete' } + return { + complete: true, + manifestPaths: [result.manifestPath], + status: 'complete', + } } - return { manifestPaths: [], status: 'hardFailure' } + return { complete: false, manifestPaths: [], status: 'hardFailure' } } async function run( @@ -232,6 +262,7 @@ async function run( const { ecosystem } = cli.flags let { bazel, bazelFlags, bazelOutputBase, bazelRc, out, verbose } = cli.flags + let perRepoTimeout = cli.flags['perRepoTimeout'] as number | undefined // Set defaults for any flag/arg that is not given. Check socket.json first. if (!bazel) { @@ -287,6 +318,19 @@ async function run( verbose = false } } + if (perRepoTimeout === undefined) { + if (sockJson.defaults?.manifest?.bazel?.perRepoTimeout !== undefined) { + perRepoTimeout = sockJson.defaults?.manifest?.bazel?.perRepoTimeout + logger.info( + `Using default --per-repo-timeout from ${SOCKET_JSON}:`, + perRepoTimeout, + ) + } else { + // Explicit invocation default; longer than the auto-manifest default + // because the user is waiting on this single extraction. + perRepoTimeout = EXPLICIT_PER_REPO_TIMEOUT_MS + } + } if (verbose) { logger.group('- ', parentName, config.commandName, ':') @@ -347,9 +391,11 @@ async function run( bin: bazel as string | undefined, cwd, out: out as string, + perRepoTimeoutMs: perRepoTimeout, verbose: Boolean(verbose), }) outcomes.push({ + complete: mavenResult.complete, ecosystem: 'maven', manifestPaths: mavenResult.manifestPaths, status: mavenResult.status, diff --git a/src/commands/manifest/bazel/cmd-manifest-bazel.test.mts b/src/commands/manifest/bazel/cmd-manifest-bazel.test.mts index b0a07833f..8cc8ea254 100644 --- a/src/commands/manifest/bazel/cmd-manifest-bazel.test.mts +++ b/src/commands/manifest/bazel/cmd-manifest-bazel.test.mts @@ -1,6 +1,31 @@ -import { describe, expect, it } from 'vitest' +import { beforeEach, describe, expect, it, vi } from 'vitest' -import { evaluateEcosystemOutcomes } from './cmd-manifest-bazel.mts' +import { logger } from '@socketsecurity/registry/lib/logger' + +// Mock the extractor so the `run` wiring test can assert which timeout reaches +// it without a real Bazel toolchain. The `cmdit`/spawnSocketCli tests below +// run in a child process and are unaffected by these in-process mocks. +vi.mock('./extract_bazel_to_maven.mts', () => ({ + extractBazelToMaven: vi.fn(async () => ({ + artifactCount: 1, + complete: true, + manifestPaths: ['/tmp/maven_install.json'], + status: 'complete', + workspaceOutcomes: [], + })), +})) +vi.mock('./extract_bazel_to_pypi.mts', () => ({ + extractBazelToPypi: vi.fn(async () => ({ + noEcosystemFound: true, + ok: false, + })), +})) + +import { + cmdManifestBazel, + evaluateEcosystemOutcomes, +} from './cmd-manifest-bazel.mts' +import { extractBazelToMaven } from './extract_bazel_to_maven.mts' import constants, { FLAG_CONFIG, FLAG_DRY_RUN, @@ -8,6 +33,7 @@ import constants, { import { cmdit, spawnSocketCli } from '../../../../test/utils.mts' import type { EcosystemOutcome } from './cmd-manifest-bazel.mts' +import type { CliCommandContext } from '../../../utils/meow-with-subcommands.mts' describe('socket manifest bazel', async () => { const { binCliPath } = constants @@ -68,36 +94,43 @@ const auto = (outcomes: EcosystemOutcome[]) => evaluateEcosystemOutcomes(outcomes, false) const COMPLETE_MAVEN: EcosystemOutcome = { + complete: true, ecosystem: 'maven', manifestPaths: ['/tmp/maven_install.json'], status: 'complete', } const COMPLETE_PYPI: EcosystemOutcome = { + complete: true, ecosystem: 'pypi', manifestPaths: ['/tmp/requirements.txt'], status: 'complete', } const NO_MAVEN: EcosystemOutcome = { + complete: false, ecosystem: 'maven', manifestPaths: [], status: 'noEcosystem', } const NO_PYPI: EcosystemOutcome = { + complete: false, ecosystem: 'pypi', manifestPaths: [], status: 'noEcosystem', } const HARDFAIL_MAVEN: EcosystemOutcome = { + complete: false, ecosystem: 'maven', manifestPaths: [], status: 'hardFailure', } const HARDFAIL_PYPI: EcosystemOutcome = { + complete: false, ecosystem: 'pypi', manifestPaths: [], status: 'hardFailure', } const PARTIAL_MAVEN: EcosystemOutcome = { + complete: false, ecosystem: 'maven', manifestPaths: ['/tmp/maven_install.json'], status: 'partial', @@ -178,4 +211,64 @@ describe('evaluateEcosystemOutcomes (explicit mode)', () => { /Bazel manifest generation failed for explicitly requested ecosystem\(s\): maven/, ) }) + + it('exits 0 on partial but emits a prominent warning and a completeness signal', () => { + const warnSpy = vi.spyOn(logger, 'warn').mockImplementation(() => logger) + const infoSpy = vi.spyOn(logger, 'info').mockImplementation(() => logger) + try { + expect(() => explicit([PARTIAL_MAVEN])).not.toThrow() + const warned = warnSpy.mock.calls.map(c => String(c[0])).join('\n') + const informed = infoSpy.mock.calls.map(c => String(c[0])).join('\n') + // Prominent partial warning naming the known-incomplete SBOM. + expect(warned).toMatch(/PARTIAL/) + expect(warned).toMatch(/known-incomplete/) + // Machine-readable completeness signal echoed for the produced ecosystem. + expect(informed).toMatch(/extraction status: partial \(complete=false\)/) + } finally { + warnSpy.mockRestore() + infoSpy.mockRestore() + } + }) + + it('does not flag a complete run as incomplete', () => { + const infoSpy = vi.spyOn(logger, 'info').mockImplementation(() => logger) + try { + expect(() => explicit([COMPLETE_MAVEN])).not.toThrow() + const informed = infoSpy.mock.calls.map(c => String(c[0])).join('\n') + expect(informed).toMatch(/extraction status: complete \(complete=true\)/) + } finally { + infoSpy.mockRestore() + } + }) +}) + +describe('perRepoTimeout flag wiring', () => { + const importMeta = { url: 'file:///cmd-manifest-bazel.test.mts' } as ImportMeta + + beforeEach(() => { + vi.mocked(extractBazelToMaven).mockClear() + }) + + it('defaults the explicit command to a 120s per-repo timeout', async () => { + await cmdManifestBazel.run( + [FLAG_CONFIG, '{}', '.'], + importMeta, + { parentName: 'manifest' } as CliCommandContext, + ) + expect(extractBazelToMaven).toHaveBeenCalledTimes(1) + expect(extractBazelToMaven).toHaveBeenCalledWith( + expect.objectContaining({ perRepoTimeoutMs: 120_000 }), + ) + }) + + it('flows a --per-repo-timeout override through to the extractor', async () => { + await cmdManifestBazel.run( + ['--per-repo-timeout', '45000', FLAG_CONFIG, '{}', '.'], + importMeta, + { parentName: 'manifest' } as CliCommandContext, + ) + expect(extractBazelToMaven).toHaveBeenCalledWith( + expect.objectContaining({ perRepoTimeoutMs: 45_000 }), + ) + }) }) diff --git a/src/commands/manifest/bazel/extract_bazel_to_maven.mts b/src/commands/manifest/bazel/extract_bazel_to_maven.mts index aaa7f2cec..a7ef9d3a6 100644 --- a/src/commands/manifest/bazel/extract_bazel_to_maven.mts +++ b/src/commands/manifest/bazel/extract_bazel_to_maven.mts @@ -1,4 +1,10 @@ -import { existsSync, promises as fs, mkdirSync, mkdtempSync } from 'node:fs' +import { + existsSync, + promises as fs, + mkdirSync, + mkdtempSync, + readdirSync, +} from 'node:fs' import os from 'node:os' import path from 'node:path' @@ -31,6 +37,7 @@ import { IGNORED_DIRS } from '../../../utils/glob.mts' import type { CqueryRepoResult, ExtractedArtifact } from './bazel-cquery.mts' import type { BazelQueryOptions } from './bazel-query-runner.mts' import type { WorkspaceMode } from './bazel-workspace-detect.mts' +import type { Dirent } from 'node:fs' export type ExtractBazelOptions = { bazelFlags: string | undefined @@ -52,9 +59,11 @@ export type ExtractBazelOptions = { out: string // Use the auto-manifest sibling directory instead of writing directly to `out`. outLayout?: 'flat' - // Per-repo cquery timeout in milliseconds. Auto-manifest default is 60s - // (the orchestrator's job is to not stall the wider scan); explicit - // invocations may bump it. + // Per-repo cquery timeout in milliseconds. When the caller leaves this + // unset the orchestrator falls back to DEFAULT_PER_REPO_TIMEOUT_MS (the + // auto-manifest default, kept short so the wider scan is not stalled). The + // explicit `socket manifest bazel` command wires this to a CLI flag with a + // longer default. perRepoTimeoutMs?: number | undefined verbose: boolean } @@ -78,15 +87,69 @@ export type ExtractBazelStatus = | 'noEcosystem' | 'partial' +// Per-hub extraction state inside one workspace. Recorded so the CLI can emit +// a machine-readable completeness signal instead of presenting a partial +// extraction as complete. +// - `populated` — the hub yielded >=1 artifact and a manifest was written. +// - `empty` — the hub is defined but has no Maven targets. +// - `not-defined` — the probed conventional name does not exist here. +// - `skipped-lockfile` — a committed maven_install.json already covers this +// hub, so the CLI deliberately did not re-emit it. +// - `failed` — the hub's cquery errored, timed out, or its graph was +// known-incomplete (dropped/pruned edges, --keep_going). +// - `indeterminate` — discovery could not classify the hub (probe threw or +// returned an unrecognized error); NOT evidence of absence. +export type HubState = + | 'populated' + | 'empty' + | 'not-defined' + | 'skipped-lockfile' + | 'failed' + | 'indeterminate' + +export type HubOutcome = { + hub: string + state: HubState + // Short, machine-stable reason when the hub is `failed`/`indeterminate`. + reason?: string | undefined +} + +// Per-workspace outcome. `load` distinguishes a workspace we could not even +// read (`failed` — e.g. an unbound-var MODULE.bazel fragment) from one we +// analyzed (`loaded`). A workspace that failed to load contributes to a +// hard failure when nothing else was analyzable, and to a partial otherwise. +export type WorkspaceOutcome = { + relPath: string + load: 'loaded' | 'failed' + hubs: HubOutcome[] + // Set when the workspace itself could not be analyzed. + reason?: string | undefined +} + export type ExtractBazelResult = { artifactCount: number manifestPaths: string[] status: ExtractBazelStatus + // True only when `status === 'complete'`. Surfaced so downstream consumers + // (and the CLI's emitted summary) get a single machine-readable + // completeness flag without re-deriving it from `status`. + complete: boolean + // Per-workspace / per-hub analyzability breakdown backing the completeness + // signal. Empty for `noEcosystem` and early `hardFailure` (toolchain setup + // failed before any workspace was inspected). + workspaceOutcomes: WorkspaceOutcome[] } const DEFAULT_PER_REPO_TIMEOUT_MS = 60_000 const REAP_TIMEOUT_MS = 10_000 +// Machine-readable completeness signal emitted alongside the synthetic +// manifests. A `complete: false` summary tells a downstream consumer (e.g. +// depscan) that the uploaded SBOM is known-incomplete so it must not be +// treated as an authoritative full closure. Enforcement of this signal is a +// separate downstream follow-up; the CLI only emits it. +const COMPLETENESS_SUMMARY_FILE_NAME = 'socket-bazel-manifest-summary.json' + // Default directory-prune policy for the Bazel workspace walk. The // orchestrator applies this unconditionally so neither caller (the explicit // `socket manifest bazel` command nor `--auto-manifest`) can omit it and let @@ -261,6 +324,112 @@ export function dedupArtifactsByCoord( return [...byCoord.values()] } +// The committed lockfile name the server-side walker already ingests for a +// hub: `maven_install.json` for a hub literally named `maven`, else +// `_maven_install.json`. Centralised so the gate and the synthetic +// writer agree on the name. +function hubManifestFileName(repoName: string): string { + return repoName === 'maven' + ? 'maven_install.json' + : `${repoName}_maven_install.json` +} + +// Does a committed lockfile already cover this workspace/hub? The server-side +// walker globs `**/*maven_install.json` at any depth, so a real committed +// lockfile anywhere under the workspace root is already ingested and the CLI +// must not re-emit a synthetic complement (double-emit). We check the +// workspace root itself and any committed lockfile beneath it, while skipping +// the directory we are about to write synthetic manifests into so we never +// mistake our own prior output for a committed file. +function committedLockfileCovers(args: { + fileName: string + manifestDir: string + workspaceRoot: string +}): string | undefined { + const { fileName, manifestDir, workspaceRoot } = args + // Resolve once so the manifest-dir skip comparison is path-normalized. + const manifestDirResolved = path.resolve(manifestDir) + const stack: string[] = [workspaceRoot] + while (stack.length) { + const dir = stack.pop()! + // Never treat our own synthetic output directory as a committed lockfile. + if (path.resolve(dir) === manifestDirResolved) { + continue + } + let entries: Dirent[] + try { + entries = readdirSync(dir, { withFileTypes: true }) + } catch { + continue + } + for (const entry of entries) { + if (entry.isDirectory()) { + const name = entry.name + // Don't descend the noise dirs the walker also prunes; this keeps the + // gate cheap and avoids walking node_modules/VCS trees. + if ( + DEFAULT_BAZEL_WALKER_IGNORE_DIR_NAMES.has(name) || + DEFAULT_BAZEL_WALKER_IGNORE_DIR_PREFIXES.some(p => name.startsWith(p)) + ) { + continue + } + stack.push(path.join(dir, entry.name)) + } else if (entry.isFile() && entry.name === fileName) { + return path.join(dir, entry.name) + } + } + } + return undefined +} + +// Emit the machine-readable completeness summary next to the manifests. This +// is the CLI's honest "is this SBOM complete?" signal in the emitted output; +// it carries the run status plus the per-workspace / per-hub breakdown so a +// downstream consumer can detect a known-incomplete upload. Best-effort: a +// failure to write the summary must never sink an otherwise-usable run, so it +// is logged (under verbose) and swallowed. +async function writeCompletenessSummary(args: { + artifactCount: number + complete: boolean + manifestDir: string + manifestPaths: string[] + status: ExtractBazelStatus + verbose: boolean + workspaceOutcomes: WorkspaceOutcome[] +}): Promise { + const { + artifactCount, + complete, + manifestDir, + manifestPaths, + status, + verbose, + workspaceOutcomes, + } = args + const summary = { + artifactCount, + complete, + ecosystem: 'maven', + manifestCount: manifestPaths.length, + status, + workspaces: workspaceOutcomes, + } + try { + mkdirSync(manifestDir, { recursive: true }) + await fs.writeFile( + path.join(manifestDir, COMPLETENESS_SUMMARY_FILE_NAME), + JSON.stringify(summary, null, 2), + 'utf8', + ) + } catch (e) { + if (verbose) { + logger.log( + `[VERBOSE] completeness summary not written (${getErrorCause(e)}); the run result still carries the signal`, + ) + } + } +} + type WriteHubManifestResult = { artifactCount: number droppedArtifacts: string[] @@ -297,10 +466,7 @@ async function writeHubManifest(args: { prunedEdges, } } - const fileName = - repoName === 'maven' - ? 'maven_install.json' - : `${repoName}_maven_install.json` + const fileName = hubManifestFileName(repoName) const hubDir = relPath ? path.join(manifestDir, relPath) : manifestDir mkdirSync(hubDir, { recursive: true }) const manifestPath = path.join(hubDir, fileName) @@ -319,13 +485,22 @@ async function writeHubManifest(args: { // On `show_extension` failure (or a parse that yields zero root hubs) under // Bzlmod, fall through to the conventional-name probe so partial discovery // is still possible. +type DiscoverResult = { + candidates: string[] + // Conventional names whose probe could not be classified (threw or returned + // an unrecognized error). A non-empty list means discovery may have missed + // a hub, so the run can never be reported complete. + indeterminateProbes: string[] +} + async function discoverCandidatesForWorkspace( workspaceRoot: string, mode: WorkspaceMode, queryOpts: BazelQueryOptions, verbose: boolean, -): Promise { +): Promise { const candidates: string[] = [] + const indeterminateProbes: string[] = [] let showExtensionSucceeded = false if (mode.bzlmod) { const extResult = await runBazelModShowMavenExtension(queryOpts) @@ -372,7 +547,7 @@ async function discoverCandidatesForWorkspace( showExtensionSucceeded ? [] : [...CONVENTIONAL_MAVEN_REPO_NAMES] ).filter(name => !seen.has(name)) if (!toProbe.length) { - return candidates + return { candidates, indeterminateProbes } } const probe = buildMavenProbeFor(queryOpts) for (const name of toProbe) { @@ -381,9 +556,14 @@ async function discoverCandidatesForWorkspace( if (status === 'populated') { candidates.push(name) seen.add(name) + } else if (status === 'indeterminate') { + // The probe failed for a reason we can't classify; we have no proof the + // hub is absent. Record it so the run is flagged not-complete rather + // than silently treating the hub as "no Maven here". + indeterminateProbes.push(name) } } - return candidates + return { candidates, indeterminateProbes } } // Best-effort reap of a Bazel server. Spawned with a short timeout so @@ -493,7 +673,13 @@ export async function extractBazelToMaven( logger.log(e) logger.groupEnd() } - return { artifactCount: 0, manifestPaths: [], status: 'hardFailure' } + return { + artifactCount: 0, + complete: false, + manifestPaths: [], + status: 'hardFailure', + workspaceOutcomes: [], + } } logger.info(`Using bazel: ${bin}`) @@ -517,6 +703,16 @@ export async function extractBazelToMaven( let anyRepos = false let hubsSucceeded = 0 let hubsFailed = 0 + // Per-workspace / per-hub analyzability breakdown backing the completeness + // signal the CLI emits. A run is only `complete` when no workspace failed to + // load, no probe was indeterminate, and every queried hub succeeded cleanly. + const workspaceOutcomes: WorkspaceOutcome[] = [] + let anyIndeterminate = false + let anyWorkspaceLoadFailed = false + // A hub we deliberately skipped because a committed lockfile already covers + // it. This is a SUCCESSFUL no-op (the server already ingests that lockfile), + // so it must not be conflated with "discovered a hub we failed to extract". + let anyHubCoveredByLockfile = false try { // Always apply the default prune policy so no caller can forget it; @@ -539,7 +735,13 @@ export async function extractBazelToMaven( logger.warn( `No Bazel workspace found at ${cwd} or beneath (looked for MODULE.bazel / WORKSPACE / WORKSPACE.bazel).`, ) - return { artifactCount: 0, manifestPaths: [], status: 'noEcosystem' } + return { + artifactCount: 0, + complete: false, + manifestPaths: [], + status: 'noEcosystem', + workspaceOutcomes: [], + } } if (verbose) { logger.log( @@ -550,15 +752,31 @@ export async function extractBazelToMaven( for (const workspaceRoot of workspaceRoots) { const relPath = path.relative(cwd, workspaceRoot) + const hubOutcomes: HubOutcome[] = [] let mode: WorkspaceMode try { mode = detectWorkspaceMode(workspaceRoot) } catch (e) { + // A workspace we cannot even read is a load failure, NOT "no Maven + // here": record it so the run is flagged not-complete (a hard failure + // when nothing else was analyzable, partial otherwise) rather than + // silently skipped. + const reason = getErrorCause(e) if (verbose) { logger.log( - `[VERBOSE] workspace ${workspaceRoot}: detect failed (${getErrorCause(e)}); skipping`, + `[VERBOSE] workspace ${workspaceRoot}: load failed (${reason})`, ) } + logger.warn( + `Workspace ${relPath || '.'}: failed to load (${reason}); it could not be analyzed.`, + ) + anyWorkspaceLoadFailed = true + workspaceOutcomes.push({ + hubs: [], + load: 'failed', + reason, + relPath, + }) continue } logger.info( @@ -576,19 +794,58 @@ export async function extractBazelToMaven( verbose, }) - // eslint-disable-next-line no-await-in-loop - const candidates = await discoverCandidatesForWorkspace( - workspaceRoot, - mode, - queryOptsFor(outputUserRoot), - verbose, - ) + const { candidates, indeterminateProbes } = + // eslint-disable-next-line no-await-in-loop + await discoverCandidatesForWorkspace( + workspaceRoot, + mode, + queryOptsFor(outputUserRoot), + verbose, + ) + for (const indeterminate of indeterminateProbes) { + anyIndeterminate = true + hubOutcomes.push({ + hub: indeterminate, + reason: 'probe-indeterminate', + state: 'indeterminate', + }) + } logger.info( `Workspace ${relPath || '.'}: discovered ${candidates.length} Maven repo(s): ${ candidates.join(', ') || '(none)' }`, ) for (const repoName of candidates) { + // Committed-lockfile gate: the server-side walker already ingests any + // committed maven_install.json / _maven_install.json under the + // workspace; the CLI's synthetic manifest is the COMPLEMENT, not a + // duplicate. Skip emitting when a committed lockfile already covers + // this hub. A skip is a successful no-op, so it runs BEFORE + // `anyRepos` is flipped (which marks "a hub we needed to extract"). + const committed = committedLockfileCovers({ + fileName: hubManifestFileName(repoName), + manifestDir, + workspaceRoot, + }) + if (committed) { + anyHubCoveredByLockfile = true + logger.info( + `@${repoName}: committed lockfile already covers this hub (${path.relative(cwd, committed) || committed}); skipping synthetic manifest.`, + ) + hubOutcomes.push({ + hub: repoName, + reason: 'committed-lockfile', + state: 'skipped-lockfile', + }) + if (verbose) { + logger.log( + `[VERBOSE] @${repoName}: skipped (committed lockfile at ${committed})`, + ) + } + continue + } + // We are about to extract this hub: it is a real candidate we must + // analyze, so mark the ecosystem present. anyRepos = true if (verbose) { logger.log( @@ -608,6 +865,11 @@ export async function extractBazelToMaven( `@${repoName}: cquery timed out after ${perRepoTimeoutMs}ms; reaping server`, ) hubsFailed += 1 + hubOutcomes.push({ + hub: repoName, + reason: 'cquery-timeout', + state: 'failed', + }) // eslint-disable-next-line no-await-in-loop await reapBazelServer(bin, outputUserRoot, verbose) // eslint-disable-next-line no-await-in-loop @@ -624,6 +886,11 @@ export async function extractBazelToMaven( if (result.status === 'error') { logger.warn(`@${repoName}: cquery failed; skipping this hub`) hubsFailed += 1 + hubOutcomes.push({ + hub: repoName, + reason: 'cquery-error', + state: 'failed', + }) continue } // A scan must never silently upload a graph missing edges it knows @@ -661,6 +928,11 @@ export async function extractBazelToMaven( `@${repoName}: failed to write manifest (${getErrorCause(e)}); skipping this hub`, ) hubsFailed += 1 + hubOutcomes.push({ + hub: repoName, + reason: 'manifest-write-failed', + state: 'failed', + }) continue } if (written.droppedArtifacts.length) { @@ -680,8 +952,14 @@ export async function extractBazelToMaven( totalArtifacts += written.artifactCount if (hubPartial) { hubsFailed += 1 + hubOutcomes.push({ + hub: repoName, + reason: 'incomplete-graph', + state: 'failed', + }) } else { hubsSucceeded += 1 + hubOutcomes.push({ hub: repoName, state: 'populated' }) } if (verbose) { logger.log( @@ -693,6 +971,13 @@ export async function extractBazelToMaven( // edges were dropped the partial signal still applies. if (hubPartial) { hubsFailed += 1 + hubOutcomes.push({ + hub: repoName, + reason: 'incomplete-graph', + state: 'failed', + }) + } else { + hubOutcomes.push({ hub: repoName, state: 'empty' }) } if (verbose) { logger.log( @@ -701,35 +986,123 @@ export async function extractBazelToMaven( } } } + workspaceOutcomes.push({ + hubs: hubOutcomes, + load: 'loaded', + relPath, + }) + if (verbose) { + for (const outcome of hubOutcomes) { + logger.log( + `[VERBOSE] workspace ${relPath || '.'} hub @${outcome.hub}: ${outcome.state}${ + outcome.reason ? ` (${outcome.reason})` : '' + }`, + ) + } + } } if (!manifestPaths.length) { - if (!anyRepos) { + // Every discovered hub was already covered by a committed lockfile and + // nothing else needed extraction: writing zero synthetic manifests is + // the CORRECT complement, not a failure. The run is complete only when + // no workspace failed to load and no probe was indeterminate. + if ( + anyHubCoveredByLockfile && + !anyRepos && + !anyWorkspaceLoadFailed && + !anyIndeterminate + ) { + logger.success( + 'All discovered Maven hub(s) are already covered by committed lockfiles; nothing to generate.', + ) + await writeCompletenessSummary({ + artifactCount: 0, + complete: true, + manifestDir, + manifestPaths: [], + status: 'complete', + verbose, + workspaceOutcomes, + }) + return { + artifactCount: 0, + complete: true, + manifestPaths: [], + status: 'complete', + workspaceOutcomes, + } + } + // Nothing was emitted. If nothing was analyzable at all (no repos to + // extract, no committed-lockfile coverage, no workspace load failure, no + // indeterminate probe) this is a genuine absence; otherwise it's a hard + // failure — something was present but we could not extract it. + if ( + !anyRepos && + !anyWorkspaceLoadFailed && + !anyIndeterminate && + !anyHubCoveredByLockfile + ) { if (verbose) { logger.info( 'No Maven artifacts extracted. failureCategory=no-supported-ecosystem', ) } - return { artifactCount: 0, manifestPaths: [], status: 'noEcosystem' } + return { + artifactCount: 0, + complete: false, + manifestPaths: [], + status: 'noEcosystem', + workspaceOutcomes, + } } logger.fail( - 'Discovered Maven repo(s) but wrote zero manifests. failureCategory=ecosystem-detected-but-empty', + 'Discovered or partially analyzed Maven workspace(s) but wrote zero manifests. failureCategory=ecosystem-detected-but-empty', ) - return { artifactCount: 0, manifestPaths: [], status: 'hardFailure' } + await writeCompletenessSummary({ + artifactCount: 0, + complete: false, + manifestDir, + manifestPaths: [], + status: 'hardFailure', + verbose, + workspaceOutcomes, + }) + return { + artifactCount: 0, + complete: false, + manifestPaths: [], + status: 'hardFailure', + workspaceOutcomes, + } } - const status: ExtractBazelStatus = hubsFailed ? 'partial' : 'complete' + // Manifests were written, so the run is not a hard failure. It is only + // `complete` when every queried hub succeeded cleanly AND no workspace + // failed to load AND no probe was indeterminate; any of those means the + // emitted SBOM is known-incomplete (partial under the hybrid rule). + const knownIncomplete = + hubsFailed > 0 || anyWorkspaceLoadFailed || anyIndeterminate + const status: ExtractBazelStatus = knownIncomplete ? 'partial' : 'complete' if (status === 'complete') { logger.success( `Wrote ${manifestPaths.length} manifest(s), ${totalArtifacts} artifact(s) total.`, ) } else { + const loadNote = anyWorkspaceLoadFailed + ? ', at least one workspace failed to load' + : '' + const indetNote = anyIndeterminate + ? ', at least one hub could not be classified' + : '' logger.warn( - `Wrote ${manifestPaths.length} manifest(s), ${totalArtifacts} artifact(s) total — partial run: ${hubsSucceeded} hub(s) succeeded, ${hubsFailed} failed or incomplete.`, + `Wrote ${manifestPaths.length} manifest(s), ${totalArtifacts} artifact(s) total — partial run: ${hubsSucceeded} hub(s) succeeded, ${hubsFailed} failed or incomplete${loadNote}${indetNote}. The uploaded SBOM is known-incomplete.`, ) } if (verbose) { logger.log('[VERBOSE] outputs:', { + anyIndeterminate, + anyWorkspaceLoadFailed, artifactCount: totalArtifacts, hubsFailed, hubsSucceeded, @@ -738,7 +1111,22 @@ export async function extractBazelToMaven( status, }) } - return { artifactCount: totalArtifacts, manifestPaths, status } + await writeCompletenessSummary({ + artifactCount: totalArtifacts, + complete: status === 'complete', + manifestDir, + manifestPaths, + status, + verbose, + workspaceOutcomes, + }) + return { + artifactCount: totalArtifacts, + complete: status === 'complete', + manifestPaths, + status, + workspaceOutcomes, + } } catch (e) { logger.fail(`Unexpected error in bazel2maven: ${getErrorCause(e)}`) if (verbose) { @@ -748,7 +1136,13 @@ export async function extractBazelToMaven( } else { logger.info('Re-run with --verbose for the full stack.') } - return { artifactCount: 0, manifestPaths: [], status: 'hardFailure' } + return { + artifactCount: 0, + complete: false, + manifestPaths: [], + status: 'hardFailure', + workspaceOutcomes, + } } finally { for (const dir of mintedRoots) { // eslint-disable-next-line no-await-in-loop diff --git a/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts index 9b4a0fbcf..b2a07d92d 100644 --- a/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts +++ b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts @@ -697,6 +697,171 @@ Fetched repositories: } }) + it('flags partial (never complete) when a probe is indeterminate but another hub succeeds', async () => { + // WORKSPACE mode so the conventional-name probe runs. `maven` succeeds and + // extracts; `maven_install` probe returns an unrecognized non-zero exit + // (indeterminate). The run must be partial, never complete, and carry the + // completeness signal. + vi.mocked(detectWorkspaceMode).mockReturnValue({ + bzlmod: false, + workspace: true, + }) + vi.mocked(buildMavenProbeFor).mockReturnValue(async (name: string) => { + if (name === 'maven') { + return { code: 0, stdout: '@maven//:foo\n', stderr: '' } + } + if (name === 'maven_install') { + // Unrecognized non-zero exit -> indeterminate. + return { code: 1, stdout: '', stderr: 'bazel internal error\n' } + } + return { + code: 1, + stdout: '', + stderr: "ERROR: No repository visible as '@x' from main repository\n", + } + }) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('partial') + expect(result.complete).toBe(false) + expect(result.manifestPaths).toHaveLength(1) + // The indeterminate hub is recorded in the completeness signal. + const hubStates = result.workspaceOutcomes.flatMap(w => + w.hubs.map(h => h.state), + ) + expect(hubStates).toContain('indeterminate') + }) + + it('hard-fails (never complete) when the only probe is indeterminate and nothing extracts', async () => { + vi.mocked(detectWorkspaceMode).mockReturnValue({ + bzlmod: false, + workspace: true, + }) + // Every conventional name probe returns an unrecognized non-zero exit. + vi.mocked(buildMavenProbeFor).mockReturnValue(async () => ({ + code: 1, + stdout: '', + stderr: 'bazel internal error\n', + })) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + // Nothing analyzable was produced, but a probe was indeterminate, so this + // is a hard failure, NOT noEcosystem (which would imply "no Maven here"). + expect(result.status).toBe('hardFailure') + expect(result.complete).toBe(false) + }) + + it('skips emitting a hub manifest when a committed lockfile already covers it', async () => { + // A committed maven_install.json under the workspace means the server-side + // walker already ingests it; the CLI must NOT re-emit a synthetic copy. + writeFileSync( + path.join(tmp, 'maven_install.json'), + JSON.stringify({ artifacts: {}, dependencies: {} }), + 'utf8', + ) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + // The hub was skipped, so no synthetic manifest and the cquery never runs. + expect(result.manifestPaths).toHaveLength(0) + expect(runMetadataCqueryForRepo).not.toHaveBeenCalled() + const skipped = result.workspaceOutcomes.flatMap(w => + w.hubs.filter(h => h.state === 'skipped-lockfile').map(h => h.hub), + ) + expect(skipped).toContain('maven') + }) + + it('still emits a synthetic manifest when no committed lockfile covers the hub', async () => { + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.manifestPaths).toHaveLength(1) + expect(runMetadataCqueryForRepo).toHaveBeenCalledTimes(1) + }) + + it('writes a completeness summary carrying the machine-readable signal', async () => { + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + status: 'partial', + unresolvedLabels: ['@maven//:missing'], + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('partial') + const summary = JSON.parse( + readFileSync( + path.join( + tmp, + '.socket-auto-manifest', + 'socket-bazel-manifest-summary.json', + ), + 'utf8', + ), + ) as { complete: boolean; status: string; workspaces: unknown[] } + expect(summary.complete).toBe(false) + expect(summary.status).toBe('partial') + expect(Array.isArray(summary.workspaces)).toBe(true) + }) + it('writes maven_install.json into .socket-auto-manifest in flat layout', async () => { vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( mkResult({ diff --git a/src/commands/manifest/generate_auto_manifest.mts b/src/commands/manifest/generate_auto_manifest.mts index 663891793..0794be55b 100644 --- a/src/commands/manifest/generate_auto_manifest.mts +++ b/src/commands/manifest/generate_auto_manifest.mts @@ -154,8 +154,24 @@ export async function generateAutoManifest({ if (mavenResult.status === 'complete' || mavenResult.status === 'partial') { generatedFiles.push(...mavenResult.manifestPaths) if (mavenResult.status === 'partial') { + // Hybrid handling: still upload the partial SBOM, but be loud AND + // leave a machine-readable trail. The extractor writes a completeness + // summary (complete=false + per-hub/workspace breakdown) into the + // manifest dir; that summary is the structured signal a downstream + // consumer reads to know this upload is known-incomplete. + const incomplete = mavenResult.workspaceOutcomes + .flatMap(w => + w.load === 'failed' + ? [`${w.relPath || '.'} (workspace load failed)`] + : w.hubs + .filter( + h => h.state === 'failed' || h.state === 'indeterminate', + ) + .map(h => `${w.relPath || '.'}@${h.hub} (${h.state})`), + ) + .join(', ') logger.warn( - `Bazel Maven manifest generation was partial (${mavenResult.manifestPaths.length} manifest(s) written); some hubs failed or had incomplete dependency graphs. Uploading what was generated.`, + `WARNING: Bazel Maven manifest generation was PARTIAL (${mavenResult.manifestPaths.length} manifest(s) written); the uploaded SBOM is known-incomplete and may under-report dependencies. Incomplete: ${incomplete || 'see completeness summary'}. Uploading what was generated.`, ) } } else { diff --git a/src/commands/manifest/generate_auto_manifest.test.mts b/src/commands/manifest/generate_auto_manifest.test.mts index f8ecf97af..fe2e59235 100644 --- a/src/commands/manifest/generate_auto_manifest.test.mts +++ b/src/commands/manifest/generate_auto_manifest.test.mts @@ -4,8 +4,10 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' vi.mock('./bazel/extract_bazel_to_maven.mts', () => ({ extractBazelToMaven: vi.fn(async () => ({ artifactCount: 1, + complete: true, manifestPaths: ['/tmp/repo/.socket-auto-manifest/maven_install.json'], status: 'complete', + workspaceOutcomes: [], })), })) vi.mock('./convert_gradle_to_maven.mts', () => ({ @@ -27,6 +29,8 @@ vi.mock('../../utils/socket-json.mts', () => ({ readOrDefaultSocketJson: vi.fn(() => ({})), })) +import { logger } from '@socketsecurity/registry/lib/logger' + import { extractBazelToMaven } from './bazel/extract_bazel_to_maven.mts' import { convertGradleToFacts } from './convert-gradle-to-facts.mts' import { convertGradleToMaven } from './convert_gradle_to_maven.mts' @@ -52,8 +56,10 @@ describe('generateAutoManifest — bazel branch', () => { vi.mocked(readOrDefaultSocketJson).mockReturnValue({} as SocketJson) vi.mocked(extractBazelToMaven).mockResolvedValue({ artifactCount: 1, + complete: true, manifestPaths: ['/tmp/repo/.socket-auto-manifest/maven_install.json'], status: 'complete', + workspaceOutcomes: [], }) }) @@ -151,8 +157,10 @@ describe('generateAutoManifest — bazel branch', () => { it('does not run PyPI by default when Maven has no discovery', async () => { vi.mocked(extractBazelToMaven).mockResolvedValueOnce({ artifactCount: 0, + complete: false, manifestPaths: [], status: 'noEcosystem', + workspaceOutcomes: [], }) const result = await generateAutoManifest({ cwd: '/tmp/repo', @@ -167,8 +175,10 @@ describe('generateAutoManifest — bazel branch', () => { it('throws when Maven hard-fails', async () => { vi.mocked(extractBazelToMaven).mockResolvedValueOnce({ artifactCount: 0, + complete: false, manifestPaths: [], status: 'hardFailure', + workspaceOutcomes: [], }) await expect( generateAutoManifest({ @@ -185,8 +195,10 @@ describe('generateAutoManifest — bazel branch', () => { it('does NOT throw when Maven has no discovery', async () => { vi.mocked(extractBazelToMaven).mockResolvedValueOnce({ artifactCount: 0, + complete: false, manifestPaths: [], status: 'noEcosystem', + workspaceOutcomes: [], }) const result = await generateAutoManifest({ cwd: '/tmp/repo', @@ -198,25 +210,44 @@ describe('generateAutoManifest — bazel branch', () => { expect(result.generatedFiles).toEqual([]) }) - it('pushes manifests and warns on a partial Maven run', async () => { + it('pushes the partial manifests and warns loudly with the incompleteness detail', async () => { vi.mocked(extractBazelToMaven).mockResolvedValueOnce({ artifactCount: 2, + complete: false, manifestPaths: [ '/tmp/repo/.socket-auto-manifest/maven_install.json', '/tmp/repo/.socket-auto-manifest/sub/maven_install.json', ], status: 'partial', + workspaceOutcomes: [ + { + hubs: [{ hub: 'maven', reason: 'cquery-timeout', state: 'failed' }], + load: 'loaded', + relPath: 'sub', + }, + ], }) - const result = await generateAutoManifest({ - cwd: '/tmp/repo', - detected: { ...baseDetected, bazel: true, count: 1 }, - outputKind: 'text', - verbose: false, - }) - expect(result.generatedFiles).toEqual([ - '/tmp/repo/.socket-auto-manifest/maven_install.json', - '/tmp/repo/.socket-auto-manifest/sub/maven_install.json', - ]) + const warnSpy = vi.spyOn(logger, 'warn').mockImplementation(() => logger) + try { + const result = await generateAutoManifest({ + cwd: '/tmp/repo', + detected: { ...baseDetected, bazel: true, count: 1 }, + outputKind: 'text', + verbose: false, + }) + // Hybrid: the partial SBOM is still uploaded. + expect(result.generatedFiles).toEqual([ + '/tmp/repo/.socket-auto-manifest/maven_install.json', + '/tmp/repo/.socket-auto-manifest/sub/maven_install.json', + ]) + const warned = warnSpy.mock.calls.map(c => String(c[0])).join('\n') + expect(warned).toMatch(/PARTIAL/) + expect(warned).toMatch(/known-incomplete/) + // The structured outcome detail surfaces the failing hub. + expect(warned).toMatch(/sub@maven \(failed\)/) + } finally { + warnSpy.mockRestore() + } }) it('runs BOTH bazel and gradle branches when both are detected', async () => { diff --git a/src/utils/socket-json.mts b/src/utils/socket-json.mts index 5bbbb21d4..e8c7ad257 100644 --- a/src/utils/socket-json.mts +++ b/src/utils/socket-json.mts @@ -47,6 +47,7 @@ export interface SocketJson { bin?: string | undefined disabled?: boolean | undefined out?: string | undefined + perRepoTimeout?: number | undefined verbose?: boolean | undefined } conda?: { From 1a31cde4419c1f99c9bfc31b92c618369d6e0c66 Mon Sep 17 00:00:00 2001 From: Simon Jensen Date: Fri, 12 Jun 2026 12:36:33 +0200 Subject: [PATCH 4/7] test(manifest/bazel): index access instead of Array.at for mock call lookup Array.prototype.at requires the es2022 lib; the stricter typechecker flagged it on the mock.calls tuple type. Use length-1 index access, which is lib-agnostic. --- src/commands/manifest/bazel/extract_bazel_to_maven.test.mts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts index b2a07d92d..96d859d29 100644 --- a/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts +++ b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts @@ -509,7 +509,8 @@ Fetched repositories: outLayout: 'flat', verbose: false, }) - const call = vi.mocked(findWorkspaceRoots).mock.calls.at(-1)![0] + const calls = vi.mocked(findWorkspaceRoots).mock.calls + const call = calls[calls.length - 1]![0] const names = [...(call.ignoreDirNames ?? [])] expect(names).toContain('node_modules') expect(names).toContain('.git') @@ -536,7 +537,8 @@ Fetched repositories: outLayout: 'flat', verbose: false, }) - const call = vi.mocked(findWorkspaceRoots).mock.calls.at(-1)![0] + const calls = vi.mocked(findWorkspaceRoots).mock.calls + const call = calls[calls.length - 1]![0] const names = [...(call.ignoreDirNames ?? [])] expect(names).toEqual( expect.arrayContaining(['node_modules', 'custom_dir']), From 24ea6cd31db56841a19d9c18e8897549957b730a Mon Sep 17 00:00:00 2001 From: Simon Jensen Date: Fri, 12 Jun 2026 13:24:52 +0200 Subject: [PATCH 5/7] fix(manifest/bazel): scope committed-lockfile gate to its own workspace root The committed-lockfile gate walked the whole tree under a workspace root and matched a maven_install.json at any depth. A nested workspace or test fixture lockfile then wrongly marked the root @maven hub as already covered, so its synthetic manifest was skipped and its distinct coordinates were never emitted while the run could still report complete. Scope the gate to depth-0: a committed lockfile only covers the workspace it lives directly in, since each workspace is analyzed independently and the server walker ingests every committed lockfile against its own workspace. Also exclude the CLI's own synthetic output directories (.socket-auto-manifest, bazel-manifests) by name so a stale prior-run manifest can never be misread as a committed lockfile. Mark Maven hub enumeration indeterminate when `bazel mod show_extension` fails to execute (non-zero exit), distinct from a clean run that finds no maven extension. A failed enumeration may have missed custom-named hubs, so the run is reported known-incomplete (partial when other hubs succeeded, hard failure when nothing is analyzable) instead of silently falling back to conventional-name probes and reporting complete. --- .../manifest/bazel/extract_bazel_to_maven.mts | 127 ++++--- .../bazel/extract_bazel_to_maven.test.mts | 358 ++++++++++++++++++ 2 files changed, 442 insertions(+), 43 deletions(-) diff --git a/src/commands/manifest/bazel/extract_bazel_to_maven.mts b/src/commands/manifest/bazel/extract_bazel_to_maven.mts index a7ef9d3a6..6758c8159 100644 --- a/src/commands/manifest/bazel/extract_bazel_to_maven.mts +++ b/src/commands/manifest/bazel/extract_bazel_to_maven.mts @@ -334,49 +334,57 @@ function hubManifestFileName(repoName: string): string { : `${repoName}_maven_install.json` } -// Does a committed lockfile already cover this workspace/hub? The server-side -// walker globs `**/*maven_install.json` at any depth, so a real committed -// lockfile anywhere under the workspace root is already ingested and the CLI -// must not re-emit a synthetic complement (double-emit). We check the -// workspace root itself and any committed lockfile beneath it, while skipping -// the directory we are about to write synthetic manifests into so we never -// mistake our own prior output for a committed file. +// Directory basenames the CLI itself writes synthetic manifests into. A file +// living inside one of these is our own output, NOT a committed lockfile, no +// matter which run wrote it: the auto-manifest sibling dir (flat layout) and +// the explicit-command default output dir. The gate must never read a file in +// one of these as evidence of committed coverage, or a stale prior-run +// synthetic file would let a later run wrongly skip a hub. +const CLI_SYNTHETIC_OUTPUT_DIR_NAMES: ReadonlySet = new Set([ + '.socket-auto-manifest', + 'bazel-manifests', +]) + +// Does a committed lockfile already cover THIS hub at THIS hub's own workspace +// root? Each workspace is processed independently by the caller, and a +// committed lockfile covers the workspace it lives IN — a nested workspace's +// `maven_install.json` covers that nested hub, not this one. The server-side +// walker ingests every committed `**/*_maven_install.json`, but each one only +// covers its own workspace. So the gate checks DEPTH-0 only: a lockfile named +// for this hub sitting directly in `workspaceRoot`. A recursive descent would +// let an unrelated nested/fixture lockfile mask an uncovered root hub — +// silently dropping its distinct coordinates. +// +// The CLI's own synthetic output is never a committed lockfile: we skip the +// current run's `manifestDir` and any known synthetic output dir basename so a +// stale prior-run file can't be misread as committed. function committedLockfileCovers(args: { fileName: string manifestDir: string workspaceRoot: string }): string | undefined { const { fileName, manifestDir, workspaceRoot } = args - // Resolve once so the manifest-dir skip comparison is path-normalized. + // The current run's synthetic output dir, resolved for an exact compare. const manifestDirResolved = path.resolve(manifestDir) - const stack: string[] = [workspaceRoot] - while (stack.length) { - const dir = stack.pop()! - // Never treat our own synthetic output directory as a committed lockfile. - if (path.resolve(dir) === manifestDirResolved) { - continue - } - let entries: Dirent[] - try { - entries = readdirSync(dir, { withFileTypes: true }) - } catch { - continue - } - for (const entry of entries) { - if (entry.isDirectory()) { - const name = entry.name - // Don't descend the noise dirs the walker also prunes; this keeps the - // gate cheap and avoids walking node_modules/VCS trees. - if ( - DEFAULT_BAZEL_WALKER_IGNORE_DIR_NAMES.has(name) || - DEFAULT_BAZEL_WALKER_IGNORE_DIR_PREFIXES.some(p => name.startsWith(p)) - ) { - continue - } - stack.push(path.join(dir, entry.name)) - } else if (entry.isFile() && entry.name === fileName) { - return path.join(dir, entry.name) - } + const workspaceRootResolved = path.resolve(workspaceRoot) + // The committed lockfile, if any, lives directly in the hub's own workspace + // root — not in a nested workspace and not in the CLI's output dir. + if ( + workspaceRootResolved === manifestDirResolved || + CLI_SYNTHETIC_OUTPUT_DIR_NAMES.has(path.basename(workspaceRootResolved)) + ) { + // The workspace root IS an output location; nothing here is committed. + return undefined + } + let entries: Dirent[] + try { + entries = readdirSync(workspaceRootResolved, { withFileTypes: true }) + } catch { + return undefined + } + for (const entry of entries) { + if (entry.isFile() && entry.name === fileName) { + return path.join(workspaceRootResolved, entry.name) } } return undefined @@ -491,6 +499,13 @@ type DiscoverResult = { // an unrecognized error). A non-empty list means discovery may have missed // a hub, so the run can never be reported complete. indeterminateProbes: string[] + // True when authoritative hub enumeration could not be performed: under + // Bzlmod, `bazel mod show_extension` FAILED TO EXECUTE (non-zero exit). That + // is NOT the legitimate "ran fine, no maven extension defined" case (which + // is a clean code-0 with zero kept hubs) — it means we may have missed + // custom-named hubs the conventional-name probe cannot find, so the run can + // never be reported complete. + discoveryIndeterminate: boolean } async function discoverCandidatesForWorkspace( @@ -502,6 +517,7 @@ async function discoverCandidatesForWorkspace( const candidates: string[] = [] const indeterminateProbes: string[] = [] let showExtensionSucceeded = false + let discoveryIndeterminate = false if (mode.bzlmod) { const extResult = await runBazelModShowMavenExtension(queryOpts) if (extResult.code === 0) { @@ -532,10 +548,19 @@ async function discoverCandidatesForWorkspace( } } } - } else if (verbose) { - logger.log( - `[VERBOSE] workspace ${workspaceRoot}: show_extension failed (code=${extResult.code}); falling back to conventional probe`, - ) + } else { + // show_extension FAILED TO EXECUTE (non-zero exit). This is distinct + // from a clean run that found no maven extension (code 0, zero kept + // hubs): a failed enumeration means custom-named hubs may exist that the + // conventional-name probe below cannot find. Mark discovery + // indeterminate so the run is never reported complete, while still + // falling through to the conventional probe for best-effort coverage. + discoveryIndeterminate = true + if (verbose) { + logger.log( + `[VERBOSE] workspace ${workspaceRoot}: show_extension failed to execute (code=${extResult.code}); hub enumeration is indeterminate — falling back to conventional probe`, + ) + } } } // Probe candidates the show_extension path could not authoritatively @@ -547,7 +572,7 @@ async function discoverCandidatesForWorkspace( showExtensionSucceeded ? [] : [...CONVENTIONAL_MAVEN_REPO_NAMES] ).filter(name => !seen.has(name)) if (!toProbe.length) { - return { candidates, indeterminateProbes } + return { candidates, discoveryIndeterminate, indeterminateProbes } } const probe = buildMavenProbeFor(queryOpts) for (const name of toProbe) { @@ -563,7 +588,7 @@ async function discoverCandidatesForWorkspace( indeterminateProbes.push(name) } } - return { candidates, indeterminateProbes } + return { candidates, discoveryIndeterminate, indeterminateProbes } } // Best-effort reap of a Bazel server. Spawned with a short timeout so @@ -794,7 +819,7 @@ export async function extractBazelToMaven( verbose, }) - const { candidates, indeterminateProbes } = + const { candidates, discoveryIndeterminate, indeterminateProbes } = // eslint-disable-next-line no-await-in-loop await discoverCandidatesForWorkspace( workspaceRoot, @@ -802,6 +827,22 @@ export async function extractBazelToMaven( queryOptsFor(outputUserRoot), verbose, ) + // Authoritative hub enumeration failed to execute (e.g. `bazel mod + // show_extension` errored under Bzlmod): custom-named hubs may have been + // missed, so the run can never be complete. Record it as an + // indeterminate hub outcome under a synthetic name so the completeness + // signal carries the gap. + if (discoveryIndeterminate) { + anyIndeterminate = true + hubOutcomes.push({ + hub: '(enumeration)', + reason: 'show-extension-failed', + state: 'indeterminate', + }) + logger.warn( + `Workspace ${relPath || '.'}: Maven hub enumeration failed; custom-named hubs may be missing. The run is reported known-incomplete.`, + ) + } for (const indeterminate of indeterminateProbes) { anyIndeterminate = true hubOutcomes.push({ diff --git a/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts index 96d859d29..08a29c123 100644 --- a/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts +++ b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts @@ -808,6 +808,364 @@ Fetched repositories: expect(skipped).toContain('maven') }) + it('extracts the root hub even when a nested dir holds a maven_install.json (no any-depth match)', async () => { + // The root @maven is UNCOVERED: there is no maven_install.json directly in + // the workspace root. A nested fixture/example holds its own + // maven_install.json, which covers ITS workspace, not the root hub. An + // any-depth gate would wrongly judge the root hub covered, skip its + // synthetic emit, and silently drop its distinct coordinates. The gate is + // depth-0, so the root hub must still be extracted. + const nested = path.join(tmp, 'examples', 'nested') + mkdirSync(nested, { recursive: true }) + writeFileSync( + path.join(nested, 'maven_install.json'), + JSON.stringify({ artifacts: {}, dependencies: {} }), + 'utf8', + ) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:rootonly:1.0', 'rootonly')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + // The root hub was NOT skipped: cquery ran and the synthetic manifest + // carrying the root's distinct coordinate was emitted. + expect(runMetadataCqueryForRepo).toHaveBeenCalledTimes(1) + expect(result.manifestPaths).toHaveLength(1) + const manifest = readManifest(tmp) as { + artifacts: Record + } + expect(Object.keys(manifest.artifacts)).toEqual(['com.example:rootonly']) + const skipped = result.workspaceOutcomes.flatMap(w => + w.hubs.filter(h => h.state === 'skipped-lockfile').map(h => h.hub), + ) + expect(skipped).toEqual([]) + }) + + it('reports complete:true with zero synthetic manifests when every hub is covered by a committed root-level lockfile', async () => { + // A committed maven_install.json sits directly in the workspace root, so + // the only discovered hub is covered. The CLI writes zero synthetic + // manifests and the run must headline complete:true. + writeFileSync( + path.join(tmp, 'maven_install.json'), + JSON.stringify({ artifacts: {}, dependencies: {} }), + 'utf8', + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('complete') + expect(result.complete).toBe(true) + expect(result.manifestPaths).toHaveLength(0) + expect(runMetadataCqueryForRepo).not.toHaveBeenCalled() + // The emitted completeness summary also headlines complete:true. + const summary = JSON.parse( + readFileSync( + path.join( + tmp, + '.socket-auto-manifest', + 'socket-bazel-manifest-summary.json', + ), + 'utf8', + ), + ) as { complete: boolean; status: string } + expect(summary.complete).toBe(true) + expect(summary.status).toBe('complete') + }) + + it('does not treat a prior-run synthetic manifest in the output dir as a committed lockfile', async () => { + // A previous run left a synthetic maven_install.json inside the output dir + // (.socket-auto-manifest). A later run must NOT read it as a committed + // lockfile and skip the hub; it must re-extract. + const outputDir = path.join(tmp, '.socket-auto-manifest') + mkdirSync(outputDir, { recursive: true }) + writeFileSync( + path.join(outputDir, 'maven_install.json'), + JSON.stringify({ artifacts: {}, dependencies: {} }), + 'utf8', + ) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + // The stale synthetic file did not gate the hub: cquery ran and a manifest + // was emitted. + expect(runMetadataCqueryForRepo).toHaveBeenCalledTimes(1) + expect(result.manifestPaths).toHaveLength(1) + const skipped = result.workspaceOutcomes.flatMap(w => + w.hubs.filter(h => h.state === 'skipped-lockfile').map(h => h.hub), + ) + expect(skipped).toEqual([]) + }) + + it('maps a hub named maven to maven_install.json for the committed-lockfile gate', async () => { + // The default `maven` hub is covered by a committed `maven_install.json`. + writeFileSync( + path.join(tmp, 'maven_install.json'), + JSON.stringify({ artifacts: {}, dependencies: {} }), + 'utf8', + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(runMetadataCqueryForRepo).not.toHaveBeenCalled() + const skipped = result.workspaceOutcomes.flatMap(w => + w.hubs.filter(h => h.state === 'skipped-lockfile').map(h => h.hub), + ) + expect(skipped).toContain('maven') + }) + + it('maps a non-default hub to _maven_install.json for the committed-lockfile gate', async () => { + // A non-default hub `maven_dev` is covered only by a committed file named + // `maven_dev_maven_install.json`. A bare `maven_install.json` must NOT + // cover it, and the prefixed file must. + vi.mocked(runBazelModShowMavenExtension).mockResolvedValue({ + code: 0, + stdout: `## @@rules_jvm_external+//:extensions.bzl%maven: + +Fetched repositories: + - maven_dev (imported by ) +`, + stderr: '', + }) + writeFileSync( + path.join(tmp, 'maven_dev_maven_install.json'), + JSON.stringify({ artifacts: {}, dependencies: {} }), + 'utf8', + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(runMetadataCqueryForRepo).not.toHaveBeenCalled() + const skipped = result.workspaceOutcomes.flatMap(w => + w.hubs.filter(h => h.state === 'skipped-lockfile').map(h => h.hub), + ) + expect(skipped).toContain('maven_dev') + }) + + it('flags partial (never complete) when show_extension fails to execute but a probed hub extracts', async () => { + // show_extension errored (non-zero exit): authoritative hub enumeration is + // indeterminate, so custom-named hubs may have been missed. The + // conventional probe still finds @maven and extracts it, but the run must + // be partial — never silently complete. + vi.mocked(runBazelModShowMavenExtension).mockResolvedValue({ + code: 37, + stdout: '', + stderr: 'ERROR: bazel mod show_extension failed to run\n', + }) + vi.mocked(buildMavenProbeFor).mockReturnValue(async (name: string) => { + if (name === 'maven') { + return { code: 0, stdout: '@maven//:foo\n', stderr: '' } + } + return { + code: 1, + stdout: '', + stderr: "ERROR: No repository visible as '@x' from main repository\n", + } + }) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('partial') + expect(result.complete).toBe(false) + expect(result.manifestPaths).toHaveLength(1) + // The failed enumeration is recorded as an indeterminate hub outcome. + const hubStates = result.workspaceOutcomes.flatMap(w => + w.hubs.map(h => h.state), + ) + expect(hubStates).toContain('indeterminate') + }) + + it('stays complete when show_extension runs cleanly and finds no maven extension (legitimate not-defined)', async () => { + // show_extension ran fine (code 0) but the report has no maven section, so + // the parse yields zero hubs. This is the legitimate "no maven extension + // defined" case — NOT an execution failure. The conventional probe then + // finds @maven and extracts it; the run is complete (no indeterminate + // enumeration outcome). + vi.mocked(runBazelModShowMavenExtension).mockResolvedValue({ + code: 0, + stdout: 'No extensions defined.\n', + stderr: '', + }) + vi.mocked(buildMavenProbeFor).mockReturnValue(async (name: string) => { + if (name === 'maven') { + return { code: 0, stdout: '@maven//:foo\n', stderr: '' } + } + return { + code: 1, + stdout: '', + stderr: "ERROR: No repository visible as '@x' from main repository\n", + } + }) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('complete') + expect(result.complete).toBe(true) + const hubStates = result.workspaceOutcomes.flatMap(w => + w.hubs.map(h => h.state), + ) + expect(hubStates).not.toContain('indeterminate') + }) + + it('hard-fails (never complete) when show_extension fails to execute and nothing extracts', async () => { + // Enumeration failed and no conventional hub probes populated: nothing + // analyzable was produced, but the indeterminate enumeration means this is + // NOT a clean "no Maven here" — it must be a hard failure, never complete. + vi.mocked(runBazelModShowMavenExtension).mockResolvedValue({ + code: 37, + stdout: '', + stderr: 'ERROR: bazel mod show_extension failed to run\n', + }) + vi.mocked(buildMavenProbeFor).mockReturnValue(async () => ({ + code: 1, + stdout: '', + stderr: "ERROR: No repository visible as '@x' from main repository\n", + })) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('hardFailure') + expect(result.complete).toBe(false) + }) + + it('never reports complete when one workspace fails to load while another extracts', async () => { + // Two workspaces: the first loads and extracts cleanly; the second throws + // on load (e.g. an unreadable MODULE.bazel). A load failure is NOT "no + // Maven here" — the run must be partial (a manifest was written), never + // complete. + const nested = path.join(tmp, 'broken') + mkdirSync(nested, { recursive: true }) + vi.mocked(findWorkspaceRoots).mockReturnValue([tmp, nested]) + vi.mocked(detectWorkspaceMode).mockImplementation((root: string) => { + if (root === nested) { + throw new Error('unbound variable in MODULE.bazel') + } + return { bzlmod: true, workspace: false } + }) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('partial') + expect(result.complete).toBe(false) + expect(result.manifestPaths).toHaveLength(1) + const loadFailed = result.workspaceOutcomes.filter( + w => w.load === 'failed', + ) + expect(loadFailed).toHaveLength(1) + }) + + it('hard-fails (never complete) when the only workspace fails to load', async () => { + // A single workspace that cannot be read produces zero manifests. This is + // a load failure, not noEcosystem — it must be a hard failure, never + // complete and never silently "no Maven here". + vi.mocked(detectWorkspaceMode).mockImplementation(() => { + throw new Error('unbound variable in MODULE.bazel') + }) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('hardFailure') + expect(result.complete).toBe(false) + expect(result.manifestPaths).toEqual([]) + }) + it('still emits a synthetic manifest when no committed lockfile covers the hub', async () => { vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( mkResult({ From f95952d9f380cd51f8f3268cee1626fdf3e84d3f Mon Sep 17 00:00:00 2001 From: Simon Jensen Date: Fri, 12 Jun 2026 13:44:50 +0200 Subject: [PATCH 6/7] fix(manifest/bazel): classify show_extension non-zero exits instead of blanket-flagging bazel mod show_extension for the maven extension exits non-zero on every bzlmod repo that doesn't depend on rules_jvm_external, because its argument resolution throws before any Starlark runs. Treating any non-zero exit as an indeterminate enumeration wrongly flipped those legitimate no-Maven repos to a hard failure, aborting the user's whole auto-manifest scan. Classify the result by stderr shape via classifyShowExtensionResult: - extension/module not in the dependency graph (arg-resolution error) is a legitimate not-defined, contributing no Maven (noEcosystem) - a genuine MODULE.bazel evaluation/load failure, or a missing binary (normalized code -1), stays indeterminate so the run is never complete - an unrecognized non-zero exit biases to not-defined; only positive eval-failure stderr escalates to indeterminate Replace the old hard-fail test (whose mock was a legitimate no-Maven repo) with a noEcosystem assertion, and add coverage for the not-in-graph and genuine-eval-failure paths. --- .../manifest/bazel/bazel-repo-discovery.mts | 100 ++++++++++++++++++ .../bazel/bazel-repo-discovery.test.mts | 95 +++++++++++++++++ .../manifest/bazel/extract_bazel_to_maven.mts | 62 +++++++---- .../bazel/extract_bazel_to_maven.test.mts | 76 +++++++++++-- 4 files changed, 300 insertions(+), 33 deletions(-) diff --git a/src/commands/manifest/bazel/bazel-repo-discovery.mts b/src/commands/manifest/bazel/bazel-repo-discovery.mts index 8da537598..94a745e72 100644 --- a/src/commands/manifest/bazel/bazel-repo-discovery.mts +++ b/src/commands/manifest/bazel/bazel-repo-discovery.mts @@ -86,6 +86,106 @@ const SHOW_EXT_SECTION_HEADER_RE = const FETCHED_HUB_BULLET_RE = /^ {2}- (?\S+) \(imported by (?[^)]+)\)\s*$/ +// `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven` +// exits non-zero in two very different situations, and conflating them is +// dangerous for a security tool: +// +// (a) `@rules_jvm_external` simply isn't in the root module's resolved +// dependency graph. This is the COMMON case for any bzlmod repo that +// doesn't use rules_jvm_external (no Maven at all). Bazel's ModCommand +// resolves the extension argument up front via +// `ExtensionArg.resolveToExtensionId`, which throws +// `InvalidArgumentException` and exits non-zero before evaluating any +// Starlark. This is NOT a failure to analyze; it is a positive, +// authoritative "there is no maven extension here". It must map to +// `not-defined` so the workspace cleanly contributes no Maven. +// +// (b) The module graph genuinely fails to evaluate: a Starlark eval error, +// an unbound name (e.g. a MODULE.bazel referencing `PYTHON_VERSION` / +// `pip` before definition), a syntax error, or the bazel binary itself +// being missing/spawn-failed (normalized to code -1). Here we have NO +// evidence about whether a maven extension exists, so it must map to +// `indeterminate` and the run can never be reported complete. +// +// We classify by stderr shape. The exact wording differs across Bazel +// versions; the regex families below are intentionally broad and SHOULD be +// confirmed against live `bazel mod show_extension` output. + +// Family (a): the extension / module is not resolvable in the dependency +// graph — an argument-resolution error, not an evaluation failure. These all +// mean "rules_jvm_external (and thus the maven extension) is not present", +// i.e. legitimately not-defined. +const SHOW_EXT_NOT_IN_GRAPH_STDERR_RE = + /(?:in extension argument|extension argument)?.*(?:not (?:found|resolvable|defined)|no such (?:module|repo(?:sitory)?)|cannot be resolved|is not (?:a )?(?:visible |known )?(?:module|repo(?:sitory)?|extension)|not in the (?:dependency )?graph|unknown (?:module|extension)|does not (?:exist|use the extension))/i +// Bazel's canonical phrasing when the named module backing the extension +// (here `rules_jvm_external`) isn't a dependency of the root module. +const SHOW_EXT_MODULE_NOT_DEP_STDERR_RE = + /(?:rules_jvm_external|module ['"`]?[A-Za-z0-9._+~-]+['"`]?).*(?:is not (?:a )?(?:direct )?dep(?:endenc(?:y|ies))?|not (?:a )?dependency)/i + +// Family (b): a genuine evaluation / load failure of the module graph. These +// mean we could not determine whether a maven extension exists, so the result +// is indeterminate, never a clean not-defined. +const SHOW_EXT_EVAL_FAILURE_STDERR_RE = + /(?:error (?:evaluating|loading|computing)|failed to (?:evaluate|load)|evaluation (?:of|failed)|cannot load|syntax error|name ['"`]?[A-Za-z0-9_]+['"`]? is not defined|variable ['"`]?[A-Za-z0-9_]+['"`]? (?:is|was) (?:referenced|not)|unbound|invalid MODULE\.bazel|MODULE\.bazel.*(?:error|failed)|Traceback|Error in)/i + +// Outcome of running `bazel mod show_extension` for the maven extension, +// distinct from the per-repo `ProbeStatus`: +// `not-defined` — authoritative: no maven extension in this workspace +// (clean run with zero kept hubs, OR rules_jvm_external is +// not in the dependency graph). +// `indeterminate` — enumeration could not be performed (eval/load failure, +// binary missing); the run must not be reported complete. +// `defined` — the report parsed and yielded one or more root hubs; +// the caller uses the parsed hub list directly. +export type ShowExtensionStatus = 'defined' | 'indeterminate' | 'not-defined' + +// Classify a `bazel mod show_extension` result. `keptRootHubCount` is the +// number of root-imported hubs the caller parsed from a code-0 run (see +// `parseShowExtensionOutput` + the `` importer filter); it disambiguates +// the code-0 cases without re-parsing here. +// +// IMPORTANT (security correctness): a non-zero exit is the DEFAULT outcome for +// every bzlmod repo that does not use rules_jvm_external, so we must NOT treat +// non-zero as indeterminate by default. We only escalate to `indeterminate` +// when stderr looks like a real evaluation/load failure; an argument/resolution +// error about the missing extension is the legitimate no-Maven case. +export function classifyShowExtensionResult( + result: ProbeResult, + keptRootHubCount: number, +): ShowExtensionStatus { + if (result.code === 0) { + // Clean run. Either it enumerated root hubs (`defined`) or it ran fine and + // found no maven extension for the root (`not-defined`). + return keptRootHubCount > 0 ? 'defined' : 'not-defined' + } + // A spawn failure / missing binary is normalized to code -1 upstream; there + // is no usable stderr classification and we definitely could not enumerate. + if (result.code === -1) { + return 'indeterminate' + } + const { stderr } = result + // A genuine module-graph evaluation/load failure wins: we cannot conclude + // anything about maven presence, so surface it as indeterminate. + if (SHOW_EXT_EVAL_FAILURE_STDERR_RE.test(stderr)) { + return 'indeterminate' + } + // The maven extension / rules_jvm_external is simply not in the dependency + // graph: an argument-resolution error. This is the common no-Maven bzlmod + // repo and is authoritatively not-defined. + if ( + SHOW_EXT_NOT_IN_GRAPH_STDERR_RE.test(stderr) || + SHOW_EXT_MODULE_NOT_DEP_STDERR_RE.test(stderr) + ) { + return 'not-defined' + } + // Truly unrecognized non-zero exit. Bias toward not-defined: the dominant + // real-world non-zero case is "extension not in the graph", and a missing + // bullet here would otherwise abort the user's entire scan. We only reach + // `indeterminate` above when stderr positively looks like an eval/load + // failure, which is the case the flag exists for. + return 'not-defined' +} + // Pure parser for `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven` // stdout. Returns the hub repos listed under `Fetched repositories:` — i.e. // items annotated with `(imported by ...)` — each carrying the set of modules diff --git a/src/commands/manifest/bazel/bazel-repo-discovery.test.mts b/src/commands/manifest/bazel/bazel-repo-discovery.test.mts index 248cb53b8..d29b375ec 100644 --- a/src/commands/manifest/bazel/bazel-repo-discovery.test.mts +++ b/src/commands/manifest/bazel/bazel-repo-discovery.test.mts @@ -5,6 +5,7 @@ import { logger } from '@socketsecurity/registry/lib/logger' import { CONVENTIONAL_MAVEN_REPO_NAMES, classifyProbeResult, + classifyShowExtensionResult, parseShowExtensionOutput, probeCandidate, } from './bazel-repo-discovery.mts' @@ -13,6 +14,7 @@ import type { ProbeResult, ProbeStatus, RepoProbe, + ShowExtensionStatus, } from './bazel-repo-discovery.mts' // Truncated text-format report Bazel 8.4.2 emits on tink-java for @@ -206,6 +208,99 @@ describe('bazel-repo-discovery', () => { }) }) + describe('classifyShowExtensionResult', () => { + // NOTE: the exact bazel stderr wording for these error families should be + // confirmed against a live `bazel mod show_extension` run; the sandbox + // blocks bazel here, so the strings below are representative shapes. + it('classifies code=0 with parsed root hubs as defined', () => { + expect( + classifyShowExtensionResult(probeResult({ code: 0 }), 2), + ).toBe('defined') + }) + + it('classifies a clean code=0 run with zero kept hubs as not-defined', () => { + // Ran fine, no maven extension for the root: legitimate absence. + expect( + classifyShowExtensionResult( + probeResult({ code: 0, stdout: 'No extensions defined.\n' }), + 0, + ), + ).toBe('not-defined') + }) + + it('classifies "module is not a dependency of the root module" (rules_jvm_external not in dep graph) as not-defined', () => { + // The COMMON no-Maven bzlmod repo: ModCommand resolves the extension + // argument up front and throws InvalidArgumentException before any + // Starlark runs. Non-zero exit, but authoritatively "no maven here". + expect( + classifyShowExtensionResult( + probeResult({ + code: 1, + stderr: + "ERROR: In extension argument '@rules_jvm_external//:extensions.bzl%maven': module 'rules_jvm_external' is not a dependency of the root module\n", + }), + 0, + ), + ).toBe('not-defined') + }) + + it('classifies a generic "extension not found / not resolvable" arg error as not-defined', () => { + expect( + classifyShowExtensionResult( + probeResult({ + code: 1, + stderr: + 'ERROR: extension argument: no such module @rules_jvm_external\n', + }), + 0, + ), + ).toBe('not-defined') + }) + + it('classifies a genuine MODULE.bazel evaluation failure (unbound name) as indeterminate', () => { + expect( + classifyShowExtensionResult( + probeResult({ + code: 1, + stderr: + "ERROR: Error evaluating MODULE.bazel: name 'PYTHON_VERSION' is not defined\n", + }), + 0, + ), + ).toBe('indeterminate') + }) + + it('classifies a Starlark syntax error in the module graph as indeterminate', () => { + expect( + classifyShowExtensionResult( + probeResult({ + code: 1, + stderr: 'ERROR: /work/MODULE.bazel:3:1: syntax error near pip\n', + }), + 0, + ), + ).toBe('indeterminate') + }) + + it('classifies a spawn failure / missing binary (normalized code -1) as indeterminate', () => { + expect( + classifyShowExtensionResult(probeResult({ code: -1 }), 0), + ).toBe('indeterminate') + }) + + it('biases a truly unrecognized non-zero exit toward not-defined (extension-not-in-graph dominates; never abort the scan)', () => { + // We only escalate to indeterminate when stderr positively looks like an + // eval/load failure. An unrecognized arg-style error must not flip a + // no-Maven repo into a hard failure that aborts the whole scan. + expect( + classifyShowExtensionResult( + probeResult({ code: 7, stderr: 'ERROR: something unexpected\n' }), + 0, + ), + ).toBe('not-defined') + }) + }) + describe('probeCandidate', () => { it('returns the classified status from a probe', async () => { expect( diff --git a/src/commands/manifest/bazel/extract_bazel_to_maven.mts b/src/commands/manifest/bazel/extract_bazel_to_maven.mts index 6758c8159..6248d1d21 100644 --- a/src/commands/manifest/bazel/extract_bazel_to_maven.mts +++ b/src/commands/manifest/bazel/extract_bazel_to_maven.mts @@ -23,6 +23,7 @@ import { import { CONVENTIONAL_MAVEN_REPO_NAMES, ROOT_MODULE_IMPORTER, + classifyShowExtensionResult, parseShowExtensionOutput, probeCandidate, } from './bazel-repo-discovery.mts' @@ -500,11 +501,15 @@ type DiscoverResult = { // a hub, so the run can never be reported complete. indeterminateProbes: string[] // True when authoritative hub enumeration could not be performed: under - // Bzlmod, `bazel mod show_extension` FAILED TO EXECUTE (non-zero exit). That - // is NOT the legitimate "ran fine, no maven extension defined" case (which - // is a clean code-0 with zero kept hubs) — it means we may have missed - // custom-named hubs the conventional-name probe cannot find, so the run can - // never be reported complete. + // Bzlmod, `bazel mod show_extension` failed in a way that signals the module + // graph itself could not be evaluated (Starlark eval error, unbound name, + // syntax error, or the binary being missing). That is distinct from BOTH a + // clean code-0 run with zero kept hubs AND a non-zero exit that merely means + // rules_jvm_external isn't in the dependency graph — those are legitimate + // "no maven extension here" outcomes (the common no-Maven bzlmod repo) and + // must NOT flip the run to indeterminate. Only a genuine evaluation failure + // means we may have missed custom-named hubs, so the run can never be + // reported complete. See `classifyShowExtensionResult`. discoveryIndeterminate: boolean } @@ -520,15 +525,20 @@ async function discoverCandidatesForWorkspace( let discoveryIndeterminate = false if (mode.bzlmod) { const extResult = await runBazelModShowMavenExtension(queryOpts) - if (extResult.code === 0) { - // The maven extension generates a hub for EVERY module that uses it — - // the root's own `maven.install` hub(s) plus the rulesets' internal - // hubs (rules_jvm_external_deps, stardoc_maven, …). Keep only hubs - // imported by ; the rest are build-tooling, not the user's SBOM. - const entries = parseShowExtensionOutput(extResult.stdout) - const kept = entries.filter(e => - e.importers.includes(ROOT_MODULE_IMPORTER), - ) + // The maven extension generates a hub for EVERY module that uses it — the + // root's own `maven.install` hub(s) plus the rulesets' internal hubs + // (rules_jvm_external_deps, stardoc_maven, …). Keep only hubs imported by + // ; the rest are build-tooling, not the user's SBOM. On a non-zero + // exit the output is empty, so `kept` is naturally empty too. + const entries = parseShowExtensionOutput(extResult.stdout) + const kept = entries.filter(e => e.importers.includes(ROOT_MODULE_IMPORTER)) + // Classify the run rather than treating ANY non-zero exit as a failure: + // `bazel mod show_extension` exits non-zero on every bzlmod repo that + // doesn't depend on rules_jvm_external (its argument resolution throws + // before any Starlark runs), so a blanket non-zero=indeterminate would + // wrongly flag the common no-Maven repo and abort the user's whole scan. + const showExtStatus = classifyShowExtensionResult(extResult, kept.length) + if (showExtStatus === 'defined') { candidates.push(...kept.map(e => e.name)) // Gate the probe fallback on the KEPT count, not the raw parse: a // report listing only transitive ruleset hubs (all filtered out) must @@ -548,19 +558,27 @@ async function discoverCandidatesForWorkspace( } } } - } else { - // show_extension FAILED TO EXECUTE (non-zero exit). This is distinct - // from a clean run that found no maven extension (code 0, zero kept - // hubs): a failed enumeration means custom-named hubs may exist that the - // conventional-name probe below cannot find. Mark discovery - // indeterminate so the run is never reported complete, while still - // falling through to the conventional probe for best-effort coverage. + } else if (showExtStatus === 'indeterminate') { + // The module graph itself could not be evaluated (Starlark eval error, + // unbound name, syntax error, or a missing binary normalized to code + // -1). We have NO evidence about whether custom-named maven hubs exist, + // so mark discovery indeterminate — the run can never be reported + // complete — while still falling through to the conventional probe for + // best-effort coverage. discoveryIndeterminate = true if (verbose) { logger.log( - `[VERBOSE] workspace ${workspaceRoot}: show_extension failed to execute (code=${extResult.code}); hub enumeration is indeterminate — falling back to conventional probe`, + `[VERBOSE] workspace ${workspaceRoot}: show_extension failed to evaluate the module graph (code=${extResult.code}); hub enumeration is indeterminate — falling back to conventional probe`, ) } + } else if (verbose) { + // `not-defined`: either a clean run with no root maven extension, or a + // non-zero exit that merely means rules_jvm_external isn't in the + // dependency graph. Both are authoritative "no maven here"; we still + // probe conventional names for a hybrid WORKSPACE-maven repo. + logger.log( + `[VERBOSE] workspace ${workspaceRoot}: show_extension reports no root maven extension (code=${extResult.code}); treating as not-defined — probing conventional hub names`, + ) } } // Probe candidates the show_extension path could not authoritatively diff --git a/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts index 08a29c123..6b50ac7df 100644 --- a/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts +++ b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts @@ -986,15 +986,19 @@ Fetched repositories: expect(skipped).toContain('maven_dev') }) - it('flags partial (never complete) when show_extension fails to execute but a probed hub extracts', async () => { - // show_extension errored (non-zero exit): authoritative hub enumeration is - // indeterminate, so custom-named hubs may have been missed. The + it('flags partial (never complete) when show_extension fails to evaluate the module graph but a probed hub extracts', async () => { + // show_extension hit a genuine module-graph EVALUATION failure (not merely + // "rules_jvm_external isn't a dependency"): authoritative hub enumeration + // is indeterminate, so custom-named hubs may have been missed. The // conventional probe still finds @maven and extracts it, but the run must // be partial — never silently complete. + // NOTE: exact bazel stderr wording for an eval failure should be confirmed + // against a live bazel run (sandbox blocks bazel here). vi.mocked(runBazelModShowMavenExtension).mockResolvedValue({ - code: 37, + code: 1, stdout: '', - stderr: 'ERROR: bazel mod show_extension failed to run\n', + stderr: + "ERROR: Error evaluating MODULE.bazel: name 'PYTHON_VERSION' is not defined\n", }) vi.mocked(buildMavenProbeFor).mockReturnValue(async (name: string) => { if (name === 'maven') { @@ -1077,14 +1081,60 @@ Fetched repositories: expect(hubStates).not.toContain('indeterminate') }) - it('hard-fails (never complete) when show_extension fails to execute and nothing extracts', async () => { - // Enumeration failed and no conventional hub probes populated: nothing - // analyzable was produced, but the indeterminate enumeration means this is - // NOT a clean "no Maven here" — it must be a hard failure, never complete. + it('reports noEcosystem (never hard-fails) when show_extension exits non-zero on a no-Maven bzlmod repo and nothing extracts', async () => { + // `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven` + // exits non-zero on EVERY bzlmod repo that doesn't depend on + // rules_jvm_external — its argument resolution throws before any Starlark + // runs. This generic non-zero exit (no eval-failure signature) is the + // common no-Maven case, NOT a failed enumeration. With no probed hub + // populating, the run is a clean noEcosystem — it must NOT hard-fail, which + // would abort the user's entire `scan create --auto-manifest`. + // NOTE: exact bazel stderr wording should be confirmed against a live bazel + // run (sandbox blocks bazel here). vi.mocked(runBazelModShowMavenExtension).mockResolvedValue({ - code: 37, + code: 1, stdout: '', - stderr: 'ERROR: bazel mod show_extension failed to run\n', + stderr: + "ERROR: In extension argument '@rules_jvm_external//:extensions.bzl%maven': module 'rules_jvm_external' is not a dependency of the root module\n", + }) + vi.mocked(buildMavenProbeFor).mockReturnValue(async () => ({ + code: 1, + stdout: '', + stderr: "ERROR: No repository visible as '@x' from main repository\n", + })) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('noEcosystem') + expect(result.complete).toBe(false) + // No hub was flagged indeterminate: the non-zero exit was correctly read as + // "no maven extension here", not a failed enumeration. + const hubStates = result.workspaceOutcomes.flatMap(w => + w.hubs.map(h => h.state), + ) + expect(hubStates).not.toContain('indeterminate') + }) + + it('hard-fails (never complete) when show_extension fails to evaluate the module graph and nothing extracts', async () => { + // A genuine module-graph evaluation failure (Starlark eval error / unbound + // name) leaves hub enumeration indeterminate. With no probed hub + // populating, nothing analyzable was produced — and because enumeration was + // indeterminate this is NOT a clean "no Maven here", so it must be a hard + // failure, never complete and never silently noEcosystem. + // NOTE: exact bazel stderr wording should be confirmed against a live bazel + // run (sandbox blocks bazel here). + vi.mocked(runBazelModShowMavenExtension).mockResolvedValue({ + code: 1, + stdout: '', + stderr: + "ERROR: Error evaluating MODULE.bazel: name 'pip' is not defined\n", }) vi.mocked(buildMavenProbeFor).mockReturnValue(async () => ({ code: 1, @@ -1103,6 +1153,10 @@ Fetched repositories: }) expect(result.status).toBe('hardFailure') expect(result.complete).toBe(false) + const hubStates = result.workspaceOutcomes.flatMap(w => + w.hubs.map(h => h.state), + ) + expect(hubStates).toContain('indeterminate') }) it('never reports complete when one workspace fails to load while another extracts', async () => { From 632d5e9a141961fae7f1db358cb08d5ecd4d78ca Mon Sep 17 00:00:00 2001 From: Simon Jensen Date: Fri, 12 Jun 2026 13:49:17 +0200 Subject: [PATCH 7/7] fix(manifest/bazel): match real show_extension not-in-graph wording The not-in-graph classifier relied on the conservative default for Bazel's actual phrasing ('No module with the apparent repo name X exists in the dependency graph'). Add that verified wording explicitly and pin both the real not-in-graph (exit 2 -> not-defined) and real unbound-name eval failure (exit 2 -> indeterminate, eval-failure checked first) with tests. --- .../manifest/bazel/bazel-repo-discovery.mts | 7 ++-- .../bazel/bazel-repo-discovery.test.mts | 35 +++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/commands/manifest/bazel/bazel-repo-discovery.mts b/src/commands/manifest/bazel/bazel-repo-discovery.mts index 94a745e72..931c79b25 100644 --- a/src/commands/manifest/bazel/bazel-repo-discovery.mts +++ b/src/commands/manifest/bazel/bazel-repo-discovery.mts @@ -114,9 +114,12 @@ const FETCHED_HUB_BULLET_RE = // Family (a): the extension / module is not resolvable in the dependency // graph — an argument-resolution error, not an evaluation failure. These all // mean "rules_jvm_external (and thus the maven extension) is not present", -// i.e. legitimately not-defined. +// i.e. legitimately not-defined. The `no module ... exists in the dependency +// graph` branch is Bazel's verified real wording (`bazel mod show_extension` +// against a bzlmod repo without rules_jvm_external: "No module with the +// apparent repo name @rules_jvm_external exists in the dependency graph"). const SHOW_EXT_NOT_IN_GRAPH_STDERR_RE = - /(?:in extension argument|extension argument)?.*(?:not (?:found|resolvable|defined)|no such (?:module|repo(?:sitory)?)|cannot be resolved|is not (?:a )?(?:visible |known )?(?:module|repo(?:sitory)?|extension)|not in the (?:dependency )?graph|unknown (?:module|extension)|does not (?:exist|use the extension))/i + /(?:in extension argument|extension argument)?.*(?:not (?:found|resolvable|defined)|no such (?:module|repo(?:sitory)?)|cannot be resolved|is not (?:a )?(?:visible |known )?(?:module|repo(?:sitory)?|extension)|not in the (?:dependency )?graph|no module[^\n]*exists in the (?:dependency )?graph|unknown (?:module|extension)|does not (?:exist|use the extension))/i // Bazel's canonical phrasing when the named module backing the extension // (here `rules_jvm_external`) isn't a dependency of the root module. const SHOW_EXT_MODULE_NOT_DEP_STDERR_RE = diff --git a/src/commands/manifest/bazel/bazel-repo-discovery.test.mts b/src/commands/manifest/bazel/bazel-repo-discovery.test.mts index d29b375ec..0be94f9c2 100644 --- a/src/commands/manifest/bazel/bazel-repo-discovery.test.mts +++ b/src/commands/manifest/bazel/bazel-repo-discovery.test.mts @@ -244,6 +244,41 @@ describe('bazel-repo-discovery', () => { ).toBe('not-defined') }) + it('classifies the real Bazel "no module ... exists in the dependency graph" arg error (exit 2) as not-defined', () => { + // Verbatim stderr from `bazel mod show_extension` on a bzlmod repo + // without rules_jvm_external (verified on real Bazel against angular and + // buildbuddy: exit code 2). This is the dominant no-Maven case and must + // never be escalated to indeterminate / hardFailure. + expect( + classifyShowExtensionResult( + probeResult({ + code: 2, + stderr: + 'ERROR: In extension argument @rules_jvm_external//:extensions.bzl%maven: No module with the apparent repo name @rules_jvm_external exists in the dependency graph. Type \'bazel help mod\' for syntax and help.\n', + }), + 0, + ), + ).toBe('not-defined') + }) + + it('classifies the real Bazel unbound-name MODULE.bazel failure (exit 2) as indeterminate', () => { + // Verbatim stderr from `bazel mod show_extension --enable_bzlmod` on the + // envoy mobile/ fragment (verified on real Bazel: exit 2). A genuine + // eval failure: we cannot conclude maven is absent, so it is + // indeterminate even though the unbound-name text also trips the + // not-in-graph "not defined" branch (eval-failure is checked first). + expect( + classifyShowExtensionResult( + probeResult({ + code: 2, + stderr: + "ERROR: /work/mobile/MODULE.bazel:26:1: name 'pip' is not defined (did you mean 'zip'?)\nERROR: syntax error in MODULE.bazel file for .\n", + }), + 0, + ), + ).toBe('indeterminate') + }) + it('classifies a generic "extension not found / not resolvable" arg error as not-defined', () => { expect( classifyShowExtensionResult(