Skip to content
5 changes: 3 additions & 2 deletions src/commands/manifest/bazel/bazel-cquery.mts
Original file line number Diff line number Diff line change
Expand Up @@ -551,12 +551,13 @@ export async function runMetadataCqueryForRepo(
signal?: unknown
stderr?: unknown
stdout?: unknown
timedOut?: unknown
}
const stdout = typeof err.stdout === 'string' ? err.stdout : ''
const stderr = typeof err.stderr === 'string' ? err.stderr : ''
// On a `timeout`, the registry spawn kills the child, so Node sets
// `killed: true` and `signal: 'SIGTERM'` (or `SIGKILL`). There is no
// `timedOut` flag on the real rejection, so do not test for one.
const timedOut =
err.timedOut === true ||
err.killed === true ||
err.signal === 'SIGTERM' ||
err.signal === 'SIGKILL'
Expand Down
8 changes: 6 additions & 2 deletions src/commands/manifest/bazel/bazel-cquery.test.mts
Original file line number Diff line number Diff line change
Expand Up @@ -642,11 +642,15 @@ describe('runMetadataCqueryForRepo', () => {
expect(r.artifacts).toEqual([])
})

it('returns status:timeout when spawn rejects with timedOut=true', async () => {
it('returns status:timeout when spawn is killed on timeout (killed=true + SIGTERM)', async () => {
// The real registry spawn does not set `timedOut`; on a `timeout` it kills
// the child, so Node populates `killed: true` and `signal: 'SIGTERM'`.
// Mock that shape so the test pins the behaviour real spawn produces.
mocked.mockRejectedValueOnce(
Object.assign(new Error('command timed out'), {
code: null,
timedOut: true,
killed: true,
signal: 'SIGTERM',
stderr: '',
stdout: '',
}),
Expand Down
8 changes: 8 additions & 0 deletions src/commands/manifest/bazel/bazel-query-runner.mts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ function buildBazelModShowMavenExtensionArgv(
'mod',
'show_extension',
'@rules_jvm_external//:extensions.bzl%maven',
// A read-only scan must never rewrite the user's MODULE.bazel.lock; pin
// the lockfile read-only before user flags, mirroring the query/cquery
// argv builders.
'--lockfile_mode=off',
Comment thread
simonhj marked this conversation as resolved.
// Belt-and-suspenders output reducer mirroring the PyPI path: bias the
// report toward the root module's usages. The authoritative pruning is
// the importers-filter applied to the parsed output, so this is not
Expand All @@ -101,6 +105,10 @@ function buildBazelModShowPipExtensionArgv(opts: BazelQueryOptions): string[] {
'mod',
'show_extension',
'@rules_python//python/extensions:pip.bzl%pip',
// A read-only scan must never rewrite the user's MODULE.bazel.lock; pin
// the lockfile read-only before user flags, mirroring the query/cquery
// argv builders.
'--lockfile_mode=off',
'--extension_usages=<root>',
...userFlags,
]
Expand Down
13 changes: 13 additions & 0 deletions src/commands/manifest/bazel/bazel-query-runner.test.mts
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,21 @@ describe('runBazelModShowMavenExtension', () => {
'mod',
'show_extension',
'@rules_jvm_external//:extensions.bzl%maven',
'--lockfile_mode=off',
'--extension_usages=<root>',
])
})

it('pins the lockfile read-only so the scan never rewrites MODULE.bazel.lock', async () => {
await runBazelModShowMavenExtension({
bin: 'bazel',
cwd: '/repo',
invocationFlags: [],
})
const argv = mocked.mock.calls[0]![1] as string[]
expect(argv).toContain('--lockfile_mode=off')
})

it('threads outputUserRoot ahead of the subcommand', async () => {
await runBazelModShowMavenExtension({
bin: 'bazel',
Expand All @@ -273,6 +284,7 @@ describe('runBazelModShowMavenExtension', () => {
'mod',
'show_extension',
'@rules_jvm_external//:extensions.bzl%maven',
'--lockfile_mode=off',
'--extension_usages=<root>',
])
})
Expand Down Expand Up @@ -320,6 +332,7 @@ describe('runBazelModShowPipExtension', () => {
'mod',
'show_extension',
'@rules_python//python/extensions:pip.bzl%pip',
'--lockfile_mode=off',
'--extension_usages=<root>',
])
})
Expand Down
134 changes: 127 additions & 7 deletions src/commands/manifest/bazel/bazel-repo-discovery.mts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,17 @@ export type ProbeResult = {

export type RepoProbe = (repoName: string) => Promise<ProbeResult>

export type ProbeStatus = 'populated' | 'empty' | 'not-defined'
// `indeterminate` means the probe could not be classified: an unrecognized
// non-zero exit, or the probe threw outright (the Bazel invocation itself
// failed). It is NOT evidence that the repo is undefined — treating it as
// `not-defined` would silently under-report a hub that may well hold Maven
// deps. The orchestrator must propagate it so the run is never reported
// `complete` when a probe was indeterminate.
export type ProbeStatus =
| 'populated'
| 'empty'
| 'not-defined'
| 'indeterminate'

// Conventional Maven hub names rules_jvm_external sets up under
// WORKSPACE-mode invocations. Probing each one is cheap (a failed visibility
Expand Down Expand Up @@ -76,6 +86,109 @@ const SHOW_EXT_SECTION_HEADER_RE =
const FETCHED_HUB_BULLET_RE =
/^ {2}- (?<name>\S+) \(imported by (?<importers>[^)]+)\)\s*$/

// `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven`
// exits non-zero in two very different situations, and conflating them is
// dangerous for a security tool:
//
// (a) `@rules_jvm_external` simply isn't in the root module's resolved
// dependency graph. This is the COMMON case for any bzlmod repo that
// doesn't use rules_jvm_external (no Maven at all). Bazel's ModCommand
// resolves the extension argument up front via
// `ExtensionArg.resolveToExtensionId`, which throws
// `InvalidArgumentException` and exits non-zero before evaluating any
// Starlark. This is NOT a failure to analyze; it is a positive,
// authoritative "there is no maven extension here". It must map to
// `not-defined` so the workspace cleanly contributes no Maven.
//
// (b) The module graph genuinely fails to evaluate: a Starlark eval error,
// an unbound name (e.g. a MODULE.bazel referencing `PYTHON_VERSION` /
// `pip` before definition), a syntax error, or the bazel binary itself
// being missing/spawn-failed (normalized to code -1). Here we have NO
// evidence about whether a maven extension exists, so it must map to
// `indeterminate` and the run can never be reported complete.
//
// We classify by stderr shape. The exact wording differs across Bazel
// versions; the regex families below are intentionally broad and SHOULD be
// confirmed against live `bazel mod show_extension` output.

// Family (a): the extension / module is not resolvable in the dependency
// graph — an argument-resolution error, not an evaluation failure. These all
// mean "rules_jvm_external (and thus the maven extension) is not present",
// i.e. legitimately not-defined. The `no module ... exists in the dependency
// graph` branch is Bazel's verified real wording (`bazel mod show_extension`
// against a bzlmod repo without rules_jvm_external: "No module with the
// apparent repo name @rules_jvm_external exists in the dependency graph").
const SHOW_EXT_NOT_IN_GRAPH_STDERR_RE =
/(?:in extension argument|extension argument)?.*(?:not (?:found|resolvable|defined)|no such (?:module|repo(?:sitory)?)|cannot be resolved|is not (?:a )?(?:visible |known )?(?:module|repo(?:sitory)?|extension)|not in the (?:dependency )?graph|no module[^\n]*exists in the (?:dependency )?graph|unknown (?:module|extension)|does not (?:exist|use the extension))/i
// Bazel's canonical phrasing when the named module backing the extension
// (here `rules_jvm_external`) isn't a dependency of the root module.
const SHOW_EXT_MODULE_NOT_DEP_STDERR_RE =
/(?:rules_jvm_external|module ['"`]?[A-Za-z0-9._+~-]+['"`]?).*(?:is not (?:a )?(?:direct )?dep(?:endenc(?:y|ies))?|not (?:a )?dependency)/i

// Family (b): a genuine evaluation / load failure of the module graph. These
// mean we could not determine whether a maven extension exists, so the result
// is indeterminate, never a clean not-defined.
const SHOW_EXT_EVAL_FAILURE_STDERR_RE =
/(?:error (?:evaluating|loading|computing)|failed to (?:evaluate|load)|evaluation (?:of|failed)|cannot load|syntax error|name ['"`]?[A-Za-z0-9_]+['"`]? is not defined|variable ['"`]?[A-Za-z0-9_]+['"`]? (?:is|was) (?:referenced|not)|unbound|invalid MODULE\.bazel|MODULE\.bazel.*(?:error|failed)|Traceback|Error in)/i

// Outcome of running `bazel mod show_extension` for the maven extension,
// distinct from the per-repo `ProbeStatus`:
// `not-defined` — authoritative: no maven extension in this workspace
// (clean run with zero kept hubs, OR rules_jvm_external is
// not in the dependency graph).
// `indeterminate` — enumeration could not be performed (eval/load failure,
// binary missing); the run must not be reported complete.
// `defined` — the report parsed and yielded one or more root hubs;
// the caller uses the parsed hub list directly.
export type ShowExtensionStatus = 'defined' | 'indeterminate' | 'not-defined'

// Classify a `bazel mod show_extension` result. `keptRootHubCount` is the
// number of root-imported hubs the caller parsed from a code-0 run (see
// `parseShowExtensionOutput` + the `<root>` importer filter); it disambiguates
// the code-0 cases without re-parsing here.
//
// IMPORTANT (security correctness): a non-zero exit is the DEFAULT outcome for
// every bzlmod repo that does not use rules_jvm_external, so we must NOT treat
// non-zero as indeterminate by default. We only escalate to `indeterminate`
// when stderr looks like a real evaluation/load failure; an argument/resolution
// error about the missing extension is the legitimate no-Maven case.
export function classifyShowExtensionResult(
result: ProbeResult,
keptRootHubCount: number,
): ShowExtensionStatus {
if (result.code === 0) {
// Clean run. Either it enumerated root hubs (`defined`) or it ran fine and
// found no maven extension for the root (`not-defined`).
return keptRootHubCount > 0 ? 'defined' : 'not-defined'
}
// A spawn failure / missing binary is normalized to code -1 upstream; there
// is no usable stderr classification and we definitely could not enumerate.
if (result.code === -1) {
return 'indeterminate'
}
const { stderr } = result
// A genuine module-graph evaluation/load failure wins: we cannot conclude
// anything about maven presence, so surface it as indeterminate.
if (SHOW_EXT_EVAL_FAILURE_STDERR_RE.test(stderr)) {
return 'indeterminate'
}
// The maven extension / rules_jvm_external is simply not in the dependency
// graph: an argument-resolution error. This is the common no-Maven bzlmod
// repo and is authoritatively not-defined.
if (
SHOW_EXT_NOT_IN_GRAPH_STDERR_RE.test(stderr) ||
SHOW_EXT_MODULE_NOT_DEP_STDERR_RE.test(stderr)
) {
return 'not-defined'
}
// Truly unrecognized non-zero exit. Bias toward not-defined: the dominant
// real-world non-zero case is "extension not in the graph", and a missing
// bullet here would otherwise abort the user's entire scan. We only reach
// `indeterminate` above when stderr positively looks like an eval/load
// failure, which is the case the flag exists for.
return 'not-defined'
}

// Pure parser for `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven`
// stdout. Returns the hub repos listed under `Fetched repositories:` — i.e.
// items annotated with `(imported by ...)` — each carrying the set of modules
Expand Down Expand Up @@ -154,13 +267,16 @@ export function classifyProbeResult(result: ProbeResult): ProbeStatus {
return 'empty'
}
// Code 0 with empty stdout: WORKSPACE-mode probes do this when the repo
// name isn't declared (Exp 5c). Treat as not-defined.
// name isn't declared. Treat as not-defined.
if (result.code === 0) {
return 'not-defined'
}
// Code 1 with no recognizable message: be conservative and call it
// not-defined so the orchestrator skips it without erroring the workspace.
return 'not-defined'
// Non-zero exit with no recognizable message: the probe failed for a reason
// we can't classify (Bazel infra error, analysis crash, unexpected stderr).
// This is NOT proof the repo is undefined, so do NOT downgrade it to
// not-defined — surface it as indeterminate so the orchestrator can flag
// the workspace as not fully analyzable rather than silently skipping it.
return 'indeterminate'
}

// Convenience: probe a single candidate and return its classified status,
Expand All @@ -176,14 +292,18 @@ export async function probeCandidate(
try {
result = await probe(repoName)
} catch (e) {
// A thrown probe means the Bazel invocation itself failed; we have no
// evidence about whether the repo exists. Surface it as indeterminate so
// the run is not reported complete, rather than swallowing it as a
// not-defined skip.
if (verbose) {
logger.log(
`[VERBOSE] discovery: probe @${repoName}: not-defined (probe threw: ${
`[VERBOSE] discovery: probe @${repoName}: indeterminate (probe threw: ${
e instanceof Error ? e.message : String(e)
})`,
)
}
return 'not-defined'
return 'indeterminate'
}
const status = classifyProbeResult(result)
if (verbose) {
Expand Down
Loading
Loading