Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions src/__tests__/CodexACPAgent/e2e/acp-e2e-mcp-approval.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import type * as acp from "@agentclientprotocol/sdk";
import path from "node:path";
import {afterEach, beforeEach, expect, it} from "vitest";
import {ApprovalOptionId} from "../../../ApprovalOptionId";
import {
createAuthenticatedFixture,
createPermissionResponse,
describeE2E,
expectEndTurn,
type PermissionResponder,
type SpawnedAgentFixture,
} from "./acp-e2e-test-utils";

const MCP_SERVER_NAME = "integration-mcp";
const MCP_ECHO_MESSAGE = "mcp approval e2e";

function createMcpServer(): acp.McpServerStdio {
return {
name: MCP_SERVER_NAME,
command: process.execPath,
args: [path.join(process.cwd(), "node_modules/mcp-hello-world/build/stdio.js")],
env: [],
};
}

function isMcpPermissionRequest(request: acp.RequestPermissionRequest): boolean {
return request.toolCall.kind === "execute" && request._meta?.["is_mcp_tool_approval"] === true;
}

function createMcpPermissionResponder(optionId: ApprovalOptionId): PermissionResponder {
return (request) => createPermissionResponse(isMcpPermissionRequest(request) ? optionId : null);
}

describeE2E("E2E MCP approval tests", () => {
let fixture: SpawnedAgentFixture;
let sessionId: string;

beforeEach(async () => {
fixture = await createAuthenticatedFixture();
sessionId = (await fixture.createSession([createMcpServer()])).sessionId;
});

afterEach(async () => {
await fixture.dispose();
});

function expectMcpToolPermissionRequest(): void {
const requests = fixture.readPermissionRequests(sessionId, "execute");
expect(requests.length).toBe(1);
expect(isMcpPermissionRequest(requests[0]!)).toBe(true);
}

it("executes an approved MCP tool call", async () => {
fixture.setPermissionResponder(createMcpPermissionResponder(ApprovalOptionId.AllowOnce));

await fixture.expectPromptText(
sessionId,
`Use the ${MCP_SERVER_NAME} MCP echo tool with message "${MCP_ECHO_MESSAGE}". Reply with exactly the tool result and no extra text.`,
(text) => expect(text).toContain(`You said: ${MCP_ECHO_MESSAGE}`),
);
expectMcpToolPermissionRequest();
});

it("ends turn when MCP tool call is rejected", async () => {
fixture.setPermissionResponder(createMcpPermissionResponder(ApprovalOptionId.RejectOnce));

expectEndTurn(await fixture.connection.prompt({
sessionId,
prompt: [{
type: "text",
text: `Use the ${MCP_SERVER_NAME} MCP echo tool with message "${MCP_ECHO_MESSAGE}". Stop if the tool call is rejected.`,
}],
}));
expectMcpToolPermissionRequest();
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import {afterEach, expect, it} from "vitest";
import {AgentMode} from "../../../AgentMode";
import {
createAuthenticatedFixture,
describeE2E,
OTHER_TEST_MODEL_ID,
type SpawnedAgentFixture,
} from "./acp-e2e-test-utils";

describeE2E("E2E session persistence tests", () => {
let beforeRestartFixture: SpawnedAgentFixture | null = null;
let afterRestartFixture: SpawnedAgentFixture | null = null;

afterEach(async () => {
await afterRestartFixture?.dispose();
await beforeRestartFixture?.dispose();
afterRestartFixture = null;
beforeRestartFixture = null;
});

it("persists a session across ACP process restart", async () => {
beforeRestartFixture = await createAuthenticatedFixture();
const sessionId = (await beforeRestartFixture.createSession()).sessionId;

await beforeRestartFixture.connection.unstable_setSessionModel({sessionId, modelId: OTHER_TEST_MODEL_ID.toString()});
const memorizedToken = "token-for-tests-123";
await beforeRestartFixture.expectPromptText(
sessionId,
`Remember this token - "${memorizedToken}". Reply with exactly before-restart-ok and nothing else.`,
(text) => expect(text.toLowerCase()).toContain("before-restart-ok"),
);

afterRestartFixture = await beforeRestartFixture.restart();

const loadSessionResponse = await afterRestartFixture.connection.loadSession({
sessionId,
cwd: afterRestartFixture.workspaceDir,
mcpServers: [],
});
expect(loadSessionResponse.models?.currentModelId).toBe(OTHER_TEST_MODEL_ID.toString());

await afterRestartFixture.expectPromptText(
sessionId,
"What token did I ask you to remember earlier? Reply with just the token and nothing else.",
(text) => expect(text.toLowerCase()).toContain(memorizedToken),
);
});
});
47 changes: 46 additions & 1 deletion src/__tests__/CodexACPAgent/e2e/acp-e2e-shell-approval.test.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import fs from "node:fs";
import path from "node:path";
import {afterEach, beforeEach, expect, it} from "vitest";
import {afterEach, beforeEach, expect, it, vi} from "vitest";
import {ApprovalOptionId} from "../../../ApprovalOptionId";
import {
createAuthenticatedFixture,
createPermissionResponse,
createPermissionResponder,
createReadOnlyFixture,
Expand Down Expand Up @@ -69,3 +70,47 @@ describeE2E("E2E shell approval tests", () => {
expect(fixture.readPermissionRequests(sessionId, "execute").length).toBe(2);
});
});

describeE2E("E2E shell cancellation tests", () => {
let fixture: SpawnedAgentFixture | null = null;

afterEach(async () => {
await fixture?.dispose();
fixture = null;
});

function isProcessRunning(pid: number): boolean {
try {
process.kill(pid, 0);
return true;
} catch {
return false;
}
}

it("cancels a running shell command", async () => {
fixture = await createAuthenticatedFixture();
const sessionId = (await fixture.createSession()).sessionId;
const pidFilePath = path.join(fixture.workspaceDir, "cancel-command.pid");
const command = `/bin/sh -c 'echo $$ > "${pidFilePath}"; exec sleep 100'`;

const promptResponse = fixture.connection.prompt({
sessionId,
prompt: [{type: "text", text: `Use your shell tool to run exactly \`${command}\`.`}],
});

const pid = await vi.waitFor(() => {
const content = fs.existsSync(pidFilePath) ? fs.readFileSync(pidFilePath, "utf8").trim() : "";
const parsed = Number.parseInt(content, 10);
expect(parsed).toBeGreaterThan(0);
return parsed;
}, {timeout: 10_000});
expect(isProcessRunning(pid)).toBe(true);
await fixture.connection.cancel({sessionId});

expect((await promptResponse).stopReason).toBe("cancelled");
await vi.waitFor(() => {
expect(isProcessRunning(pid)).toBe(false);
}, {timeout: 5_000});
});
});
32 changes: 16 additions & 16 deletions src/__tests__/CodexACPAgent/e2e/acp-e2e-test-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ export {
type PermissionResponder,
} from "./permission-responders";
export {
DEFAULT_TEST_MODEL_ID,
type SpawnedAgentFixture,
type TestSkill,
OTHER_TEST_MODEL_ID,
} from "./spawned-agent-fixture";

export const RUN_E2E_TESTS = process.env["RUN_E2E_TESTS"] === "true";
Expand Down Expand Up @@ -101,24 +103,22 @@ async function createSpawnedFixture(
authenticate: Authenticator,
extraEnv?: NodeJS.ProcessEnv,
): Promise<SpawnedAgentFixture> {
const fixture = createSpawnedAgentFixture(extraEnv);
const connection = fixture.connection;

const initializeResponse = await connection.initialize({
protocolVersion: acp.PROTOCOL_VERSION,
clientCapabilities: buildClientCapabilities(),
clientInfo: {
name: "vitest",
version: "1.0.0",
},
});
return await createSpawnedAgentFixture(async (connection) => {
const initializeResponse = await connection.initialize({
protocolVersion: acp.PROTOCOL_VERSION,
clientCapabilities: buildClientCapabilities(),
clientInfo: {
name: "vitest",
version: "1.0.0",
},
});

if (initializeResponse.protocolVersion !== acp.PROTOCOL_VERSION) {
throw new Error(`Unexpected protocol version: ${initializeResponse.protocolVersion}`);
}
if (initializeResponse.protocolVersion !== acp.PROTOCOL_VERSION) {
throw new Error(`Unexpected protocol version: ${initializeResponse.protocolVersion}`);
}

await authenticate(connection, initializeResponse.authMethods ?? []);
return fixture;
await authenticate(connection, initializeResponse.authMethods ?? []);
}, extraEnv);
}

export function requireLiveApiKey(): string {
Expand Down
15 changes: 5 additions & 10 deletions src/__tests__/CodexACPAgent/e2e/acp-e2e.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ import {AgentMode} from "../../../AgentMode";
import {
createAuthenticatedFixture,
createGatewayFixture,
DEFAULT_TEST_MODEL_ID,
describeE2E,
OTHER_TEST_MODEL_ID,
requireLiveApiKey,
type SpawnedAgentFixture,
} from "./acp-e2e-test-utils";
Expand Down Expand Up @@ -44,20 +46,13 @@ describeE2E("E2E tests", () => {
throw new Error("Agent did not return initial model state.");
}
expect(models.availableModels.length).toBeGreaterThan(0);

const selectedModelId =
models.availableModels.find((model) => model.modelId !== models.currentModelId)?.modelId
?? models.currentModelId
?? models.availableModels[0]?.modelId;
if (!selectedModelId) {
throw new Error("No available models returned by ACP server.");
}
expect(models.currentModelId).toBe(DEFAULT_TEST_MODEL_ID.toString());

await fixture.connection.unstable_setSessionModel({
sessionId: session.sessionId,
modelId: selectedModelId,
modelId: OTHER_TEST_MODEL_ID.toString(),
});
await fixture.expectStatus(session.sessionId, {Model: selectedModelId});
await fixture.expectStatus(session.sessionId, {Model: OTHER_TEST_MODEL_ID});
});

it("changes session mode via setSessionMode and reflects it in /status", async () => {
Expand Down
Loading
Loading