diff --git a/.github/workflows/codegen-check.yml b/.github/workflows/codegen-check.yml index c7d295221..33a7badcd 100644 --- a/.github/workflows/codegen-check.yml +++ b/.github/workflows/codegen-check.yml @@ -47,6 +47,11 @@ jobs: - name: Check for uncommitted changes run: | + # TODO: Remove this when https://github.com/github/copilot-sdk/issues/1031 is fixed + # Exclude go/generated_session_events.go from the check — it was intentionally + # reverted to avoid a breaking DataContent change (see #1031) and will be + # regenerated once that issue is resolved. + git checkout -- go/generated_session_events.go 2>/dev/null || true if [ -n "$(git status --porcelain)" ]; then echo "::error::Generated files are out of date. Run 'cd scripts/codegen && npm run generate' and commit the changes." git diff --stat diff --git a/dotnet/src/Client.cs b/dotnet/src/Client.cs index ada241baa..07502ee2d 100644 --- a/dotnet/src/Client.cs +++ b/dotnet/src/Client.cs @@ -506,7 +506,8 @@ public async Task CreateSessionAsync(SessionConfig config, Cance Commands: config.Commands?.Select(c => new CommandWireDefinition(c.Name, c.Description)).ToList(), RequestElicitation: config.OnElicitationRequest != null, Traceparent: traceparent, - Tracestate: tracestate); + Tracestate: tracestate, + ModelCapabilities: config.ModelCapabilities); var response = await InvokeRpcAsync( connection.Rpc, "session.create", [request], cancellationToken); @@ -626,7 +627,8 @@ public async Task ResumeSessionAsync(string sessionId, ResumeSes Commands: config.Commands?.Select(c => new CommandWireDefinition(c.Name, c.Description)).ToList(), RequestElicitation: config.OnElicitationRequest != null, Traceparent: traceparent, - Tracestate: tracestate); + Tracestate: tracestate, + ModelCapabilities: config.ModelCapabilities); var response = await InvokeRpcAsync( connection.Rpc, "session.resume", [request], cancellationToken); @@ -1605,7 +1607,8 @@ internal record CreateSessionRequest( List? Commands = null, bool? RequestElicitation = null, string? Traceparent = null, - string? Tracestate = null); + string? Tracestate = null, + ModelCapabilitiesOverride? ModelCapabilities = null); internal record ToolDefinition( string Name, @@ -1656,7 +1659,8 @@ internal record ResumeSessionRequest( List? Commands = null, bool? RequestElicitation = null, string? Traceparent = null, - string? Tracestate = null); + string? Tracestate = null, + ModelCapabilitiesOverride? ModelCapabilities = null); internal record ResumeSessionResponse( string SessionId, @@ -1797,6 +1801,7 @@ private static LogLevel MapLevel(TraceEventType eventType) [JsonSerializable(typeof(ListSessionsResponse))] [JsonSerializable(typeof(GetSessionMetadataRequest))] [JsonSerializable(typeof(GetSessionMetadataResponse))] + [JsonSerializable(typeof(ModelCapabilitiesOverride))] [JsonSerializable(typeof(PermissionRequestResult))] [JsonSerializable(typeof(PermissionRequestResponseV2))] [JsonSerializable(typeof(ProviderConfig))] diff --git a/dotnet/src/Generated/Rpc.cs b/dotnet/src/Generated/Rpc.cs index 3c1035e20..9907641b5 100644 --- a/dotnet/src/Generated/Rpc.cs +++ b/dotnet/src/Generated/Rpc.cs @@ -55,6 +55,22 @@ public class ModelCapabilitiesSupports public bool? ReasoningEffort { get; set; } } +/// Vision-specific limits. +public class ModelCapabilitiesLimitsVision +{ + /// MIME types the model accepts. + [JsonPropertyName("supported_media_types")] + public List SupportedMediaTypes { get => field ??= []; set; } + + /// Maximum number of images per prompt. + [JsonPropertyName("max_prompt_images")] + public double MaxPromptImages { get; set; } + + /// Maximum image size in bytes. + [JsonPropertyName("max_prompt_image_size")] + public double MaxPromptImageSize { get; set; } +} + /// Token limits for prompts, outputs, and context window. public class ModelCapabilitiesLimits { @@ -69,6 +85,10 @@ public class ModelCapabilitiesLimits /// Maximum total context window size in tokens. [JsonPropertyName("max_context_window_tokens")] public double MaxContextWindowTokens { get; set; } + + /// Vision-specific limits. + [JsonPropertyName("vision")] + public ModelCapabilitiesLimitsVision? Vision { get; set; } } /// Model capabilities and limits. @@ -299,6 +319,66 @@ public class SessionModelSwitchToResult public string? ModelId { get; set; } } +/// Feature flags indicating what the model supports. +public class ModelCapabilitiesOverrideSupports +{ + /// Gets or sets the vision value. + [JsonPropertyName("vision")] + public bool? Vision { get; set; } + + /// Gets or sets the reasoningEffort value. + [JsonPropertyName("reasoningEffort")] + public bool? ReasoningEffort { get; set; } +} + +/// RPC data type for ModelCapabilitiesOverrideLimitsVision operations. +public class ModelCapabilitiesOverrideLimitsVision +{ + /// MIME types the model accepts. + [JsonPropertyName("supported_media_types")] + public List? SupportedMediaTypes { get; set; } + + /// Maximum number of images per prompt. + [JsonPropertyName("max_prompt_images")] + public double? MaxPromptImages { get; set; } + + /// Maximum image size in bytes. + [JsonPropertyName("max_prompt_image_size")] + public double? MaxPromptImageSize { get; set; } +} + +/// Token limits for prompts, outputs, and context window. +public class ModelCapabilitiesOverrideLimits +{ + /// Gets or sets the max_prompt_tokens value. + [JsonPropertyName("max_prompt_tokens")] + public double? MaxPromptTokens { get; set; } + + /// Gets or sets the max_output_tokens value. + [JsonPropertyName("max_output_tokens")] + public double? MaxOutputTokens { get; set; } + + /// Maximum total context window size in tokens. + [JsonPropertyName("max_context_window_tokens")] + public double? MaxContextWindowTokens { get; set; } + + /// Gets or sets the vision value. + [JsonPropertyName("vision")] + public ModelCapabilitiesOverrideLimitsVision? Vision { get; set; } +} + +/// Override individual model capabilities resolved by the runtime. +public class ModelCapabilitiesOverride +{ + /// Feature flags indicating what the model supports. + [JsonPropertyName("supports")] + public ModelCapabilitiesOverrideSupports? Supports { get; set; } + + /// Token limits for prompts, outputs, and context window. + [JsonPropertyName("limits")] + public ModelCapabilitiesOverrideLimits? Limits { get; set; } +} + /// RPC data type for SessionModelSwitchTo operations. internal class SessionModelSwitchToRequest { @@ -313,6 +393,10 @@ internal class SessionModelSwitchToRequest /// Reasoning effort level to use for the model. [JsonPropertyName("reasoningEffort")] public string? ReasoningEffort { get; set; } + + /// Override individual model capabilities resolved by the runtime. + [JsonPropertyName("modelCapabilities")] + public ModelCapabilitiesOverride? ModelCapabilities { get; set; } } /// RPC data type for SessionModeGet operations. @@ -1537,9 +1621,9 @@ public async Task GetCurrentAsync(CancellationToke } /// Calls "session.model.switchTo". - public async Task SwitchToAsync(string modelId, string? reasoningEffort = null, CancellationToken cancellationToken = default) + public async Task SwitchToAsync(string modelId, string? reasoningEffort = null, ModelCapabilitiesOverride? modelCapabilities = null, CancellationToken cancellationToken = default) { - var request = new SessionModelSwitchToRequest { SessionId = _sessionId, ModelId = modelId, ReasoningEffort = reasoningEffort }; + var request = new SessionModelSwitchToRequest { SessionId = _sessionId, ModelId = modelId, ReasoningEffort = reasoningEffort, ModelCapabilities = modelCapabilities }; return await CopilotClient.InvokeRpcAsync(_rpc, "session.model.switchTo", [request], cancellationToken); } } @@ -2003,6 +2087,11 @@ public async Task KillAsync(string processId, SessionShe [JsonSerializable(typeof(ModelBilling))] [JsonSerializable(typeof(ModelCapabilities))] [JsonSerializable(typeof(ModelCapabilitiesLimits))] +[JsonSerializable(typeof(ModelCapabilitiesLimitsVision))] +[JsonSerializable(typeof(ModelCapabilitiesOverride))] +[JsonSerializable(typeof(ModelCapabilitiesOverrideLimits))] +[JsonSerializable(typeof(ModelCapabilitiesOverrideLimitsVision))] +[JsonSerializable(typeof(ModelCapabilitiesOverrideSupports))] [JsonSerializable(typeof(ModelCapabilitiesSupports))] [JsonSerializable(typeof(ModelPolicy))] [JsonSerializable(typeof(ModelsListResult))] diff --git a/dotnet/src/Generated/SessionEvents.cs b/dotnet/src/Generated/SessionEvents.cs index 8b5c0a5f1..d3e764ee8 100644 --- a/dotnet/src/Generated/SessionEvents.cs +++ b/dotnet/src/Generated/SessionEvents.cs @@ -181,7 +181,7 @@ public partial class SessionErrorEvent : SessionEvent public required SessionErrorData Data { get; set; } } -/// Payload indicating the agent is idle; includes any background tasks still in flight. +/// Payload indicating the session is fully idle with no background tasks in flight. /// Represents the session.idle event. public partial class SessionIdleEvent : SessionEvent { @@ -791,7 +791,7 @@ public partial class UserInputRequestedEvent : SessionEvent public required UserInputRequestedData Data { get; set; } } -/// User input request completion notification signaling UI dismissal. +/// User input request completion with the user's response. /// Represents the user_input.completed event. public partial class UserInputCompletedEvent : SessionEvent { @@ -817,7 +817,7 @@ public partial class ElicitationRequestedEvent : SessionEvent public required ElicitationRequestedData Data { get; set; } } -/// Elicitation request completion notification signaling UI dismissal. +/// Elicitation request completion with the user's response. /// Represents the elicitation.completed event. public partial class ElicitationCompletedEvent : SessionEvent { @@ -986,7 +986,7 @@ public partial class ExitPlanModeRequestedEvent : SessionEvent public required ExitPlanModeRequestedData Data { get; set; } } -/// Plan mode exit completion notification signaling UI dismissal. +/// Plan mode exit completion with the user's approval decision and optional feedback. /// Represents the exit_plan_mode.completed event. public partial class ExitPlanModeCompletedEvent : SessionEvent { @@ -1209,14 +1209,9 @@ public partial class SessionErrorData public string? Url { get; set; } } -/// Payload indicating the agent is idle; includes any background tasks still in flight. +/// Payload indicating the session is fully idle with no background tasks in flight. public partial class SessionIdleData { - /// Background tasks still running when the agent became idle. - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] - [JsonPropertyName("backgroundTasks")] - public SessionIdleDataBackgroundTasks? BackgroundTasks { get; set; } - /// True when the preceding agentic loop was cancelled via abort signal. [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] [JsonPropertyName("aborted")] @@ -2313,12 +2308,22 @@ public partial class UserInputRequestedData public string? ToolCallId { get; set; } } -/// User input request completion notification signaling UI dismissal. +/// User input request completion with the user's response. public partial class UserInputCompletedData { /// Request ID of the resolved user input request; clients should dismiss any UI for this request. [JsonPropertyName("requestId")] public required string RequestId { get; set; } + + /// The user's answer to the input request. + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("answer")] + public string? Answer { get; set; } + + /// Whether the answer was typed as free-form text rather than selected from choices. + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("wasFreeform")] + public bool? WasFreeform { get; set; } } /// Elicitation request; may be form-based (structured input) or URL-based (browser redirect). @@ -2358,12 +2363,22 @@ public partial class ElicitationRequestedData public string? Url { get; set; } } -/// Elicitation request completion notification signaling UI dismissal. +/// Elicitation request completion with the user's response. public partial class ElicitationCompletedData { /// Request ID of the resolved elicitation request; clients should dismiss any UI for this request. [JsonPropertyName("requestId")] public required string RequestId { get; set; } + + /// The user action: "accept" (submitted form), "decline" (explicitly refused), or "cancel" (dismissed). + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("action")] + public ElicitationCompletedDataAction? Action { get; set; } + + /// The submitted form data when action is 'accept'; keys match the requested schema fields. + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("content")] + public Dictionary? Content { get; set; } } /// Sampling request from an MCP server; contains the server name and a requestId for correlation. @@ -2543,12 +2558,32 @@ public partial class ExitPlanModeRequestedData public required string RecommendedAction { get; set; } } -/// Plan mode exit completion notification signaling UI dismissal. +/// Plan mode exit completion with the user's approval decision and optional feedback. public partial class ExitPlanModeCompletedData { /// Request ID of the resolved exit plan mode request; clients should dismiss any UI for this request. [JsonPropertyName("requestId")] public required string RequestId { get; set; } + + /// Whether the plan was approved by the user. + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("approved")] + public bool? Approved { get; set; } + + /// Which action the user selected (e.g. 'autopilot', 'interactive', 'exit_only'). + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("selectedAction")] + public string? SelectedAction { get; set; } + + /// Whether edits should be auto-approved without confirmation. + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("autoApproveEdits")] + public bool? AutoApproveEdits { get; set; } + + /// Free-form feedback from the user if they requested changes to the plan. + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + [JsonPropertyName("feedback")] + public string? Feedback { get; set; } } /// Event payload for . @@ -2694,51 +2729,6 @@ public partial class SessionResumeDataContext public string? BaseCommit { get; set; } } -/// A background agent task. -/// Nested data type for SessionIdleDataBackgroundTasksAgentsItem. -public partial class SessionIdleDataBackgroundTasksAgentsItem -{ - /// Unique identifier of the background agent. - [JsonPropertyName("agentId")] - public required string AgentId { get; set; } - - /// Type of the background agent. - [JsonPropertyName("agentType")] - public required string AgentType { get; set; } - - /// Human-readable description of the agent task. - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] - [JsonPropertyName("description")] - public string? Description { get; set; } -} - -/// A background shell command. -/// Nested data type for SessionIdleDataBackgroundTasksShellsItem. -public partial class SessionIdleDataBackgroundTasksShellsItem -{ - /// Unique identifier of the background shell. - [JsonPropertyName("shellId")] - public required string ShellId { get; set; } - - /// Human-readable description of the shell command. - [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] - [JsonPropertyName("description")] - public string? Description { get; set; } -} - -/// Background tasks still running when the agent became idle. -/// Nested data type for SessionIdleDataBackgroundTasks. -public partial class SessionIdleDataBackgroundTasks -{ - /// Currently running background agents. - [JsonPropertyName("agents")] - public required SessionIdleDataBackgroundTasksAgentsItem[] Agents { get; set; } - - /// Currently running background shell commands. - [JsonPropertyName("shells")] - public required SessionIdleDataBackgroundTasksShellsItem[] Shells { get; set; } -} - /// Repository context for the handed-off session. /// Nested data type for SessionHandoffDataRepository. public partial class SessionHandoffDataRepository @@ -4016,6 +4006,21 @@ public enum ElicitationRequestedDataMode Url, } +/// The user action: "accept" (submitted form), "decline" (explicitly refused), or "cancel" (dismissed). +[JsonConverter(typeof(JsonStringEnumConverter))] +public enum ElicitationCompletedDataAction +{ + /// The accept variant. + [JsonStringEnumMemberName("accept")] + Accept, + /// The decline variant. + [JsonStringEnumMemberName("decline")] + Decline, + /// The cancel variant. + [JsonStringEnumMemberName("cancel")] + Cancel, +} + /// Connection status: connected, failed, needs-auth, pending, disabled, or not_configured. [JsonConverter(typeof(JsonStringEnumConverter))] public enum SessionMcpServersLoadedDataServersItemStatus @@ -4177,9 +4182,6 @@ public enum SessionExtensionsLoadedDataExtensionsItemStatus [JsonSerializable(typeof(SessionHandoffDataRepository))] [JsonSerializable(typeof(SessionHandoffEvent))] [JsonSerializable(typeof(SessionIdleData))] -[JsonSerializable(typeof(SessionIdleDataBackgroundTasks))] -[JsonSerializable(typeof(SessionIdleDataBackgroundTasksAgentsItem))] -[JsonSerializable(typeof(SessionIdleDataBackgroundTasksShellsItem))] [JsonSerializable(typeof(SessionIdleEvent))] [JsonSerializable(typeof(SessionInfoData))] [JsonSerializable(typeof(SessionInfoEvent))] diff --git a/dotnet/src/Session.cs b/dotnet/src/Session.cs index 3468e9b52..09a53efd3 100644 --- a/dotnet/src/Session.cs +++ b/dotnet/src/Session.cs @@ -1105,6 +1105,7 @@ await InvokeRpcAsync( /// /// Model ID to switch to (e.g., "gpt-4.1"). /// Reasoning effort level (e.g., "low", "medium", "high", "xhigh"). + /// Per-property overrides for model capabilities, deep-merged over runtime defaults. /// Optional cancellation token. /// /// @@ -1112,9 +1113,9 @@ await InvokeRpcAsync( /// await session.SetModelAsync("claude-sonnet-4.6", "high"); /// /// - public async Task SetModelAsync(string model, string? reasoningEffort, CancellationToken cancellationToken = default) + public async Task SetModelAsync(string model, string? reasoningEffort, ModelCapabilitiesOverride? modelCapabilities = null, CancellationToken cancellationToken = default) { - await Rpc.Model.SwitchToAsync(model, reasoningEffort, cancellationToken); + await Rpc.Model.SwitchToAsync(model, reasoningEffort, modelCapabilities, cancellationToken); } /// @@ -1122,7 +1123,7 @@ public async Task SetModelAsync(string model, string? reasoningEffort, Cancellat /// public Task SetModelAsync(string model, CancellationToken cancellationToken = default) { - return SetModelAsync(model, reasoningEffort: null, cancellationToken); + return SetModelAsync(model, reasoningEffort: null, modelCapabilities: null, cancellationToken); } /// diff --git a/dotnet/src/Types.cs b/dotnet/src/Types.cs index 80410c27a..265781bac 100644 --- a/dotnet/src/Types.cs +++ b/dotnet/src/Types.cs @@ -1579,6 +1579,7 @@ protected SessionConfig(SessionConfig? other) ? new Dictionary(other.McpServers, other.McpServers.Comparer) : null; Model = other.Model; + ModelCapabilities = other.ModelCapabilities; OnElicitationRequest = other.OnElicitationRequest; OnEvent = other.OnEvent; OnPermissionRequest = other.OnPermissionRequest; @@ -1616,6 +1617,11 @@ protected SessionConfig(SessionConfig? other) /// public string? ReasoningEffort { get; set; } + /// + /// Per-property overrides for model capabilities, deep-merged over runtime defaults. + /// + public ModelCapabilitiesOverride? ModelCapabilities { get; set; } + /// /// Override the default configuration directory location. /// When specified, the session will use this directory for storing config and state. @@ -1780,6 +1786,7 @@ protected ResumeSessionConfig(ResumeSessionConfig? other) ? new Dictionary(other.McpServers, other.McpServers.Comparer) : null; Model = other.Model; + ModelCapabilities = other.ModelCapabilities; OnElicitationRequest = other.OnElicitationRequest; OnEvent = other.OnEvent; OnPermissionRequest = other.OnPermissionRequest; @@ -1837,6 +1844,11 @@ protected ResumeSessionConfig(ResumeSessionConfig? other) /// public string? ReasoningEffort { get; set; } + /// + /// Per-property overrides for model capabilities, deep-merged over runtime defaults. + /// + public ModelCapabilitiesOverride? ModelCapabilities { get; set; } + /// /// Handler for permission requests from the server. /// When provided, the server will call this handler to request permission for operations. @@ -2439,6 +2451,7 @@ public class SystemMessageTransformRpcResponse [JsonSerializable(typeof(MessageOptions))] [JsonSerializable(typeof(ModelBilling))] [JsonSerializable(typeof(ModelCapabilities))] +[JsonSerializable(typeof(ModelCapabilitiesOverride))] [JsonSerializable(typeof(ModelInfo))] [JsonSerializable(typeof(ModelLimits))] [JsonSerializable(typeof(ModelPolicy))] diff --git a/dotnet/test/Harness/CapiProxy.cs b/dotnet/test/Harness/CapiProxy.cs index e6208f251..1c775adb0 100644 --- a/dotnet/test/Harness/CapiProxy.cs +++ b/dotnet/test/Harness/CapiProxy.cs @@ -164,9 +164,16 @@ public record ChatCompletionRequest( public record ChatCompletionMessage( string Role, - string? Content, + JsonElement? Content, [property: JsonPropertyName("tool_call_id")] string? ToolCallId, - [property: JsonPropertyName("tool_calls")] List? ToolCalls); + [property: JsonPropertyName("tool_calls")] List? ToolCalls) +{ + /// + /// Returns Content as a string when the JSON value is a string, or null otherwise. + /// + [JsonIgnore] + public string? StringContent => Content is { ValueKind: JsonValueKind.String } c ? c.GetString() : null; +} public record ChatCompletionToolCall(string Id, string Type, ChatCompletionToolCallFunction Function); diff --git a/dotnet/test/Harness/E2ETestBase.cs b/dotnet/test/Harness/E2ETestBase.cs index e982090cb..d1756ea61 100644 --- a/dotnet/test/Harness/E2ETestBase.cs +++ b/dotnet/test/Harness/E2ETestBase.cs @@ -69,7 +69,7 @@ protected Task ResumeSessionAsync(string sessionId, ResumeSessio protected static string GetSystemMessage(ParsedHttpExchange exchange) { - return exchange.Request.Messages.FirstOrDefault(m => m.Role == "system")?.Content ?? string.Empty; + return exchange.Request.Messages.FirstOrDefault(m => m.Role == "system")?.StringContent ?? string.Empty; } protected static List GetToolNames(ParsedHttpExchange exchange) diff --git a/dotnet/test/SessionConfigTests.cs b/dotnet/test/SessionConfigTests.cs new file mode 100644 index 000000000..5a1625592 --- /dev/null +++ b/dotnet/test/SessionConfigTests.cs @@ -0,0 +1,115 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + *--------------------------------------------------------------------------------------------*/ + +using System.Linq; +using System.Text.Json; +using GitHub.Copilot.SDK.Rpc; +using GitHub.Copilot.SDK.Test.Harness; +using Xunit; +using Xunit.Abstractions; + +namespace GitHub.Copilot.SDK.Test; + +public class SessionConfigTests(E2ETestFixture fixture, ITestOutputHelper output) + : E2ETestBase(fixture, "session_config", output) +{ + private const string ViewImagePrompt = "Use the view tool to look at the file test.png and describe what you see"; + + private static readonly byte[] Png1X1 = Convert.FromBase64String( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="); + + [Fact] + public async Task Vision_Disabled_Then_Enabled_Via_SetModel() + { + await File.WriteAllBytesAsync(Path.Join(Ctx.WorkDir, "test.png"), Png1X1); + + var session = await CreateSessionAsync(new SessionConfig + { + Model = "claude-sonnet-4.5", + ModelCapabilities = new ModelCapabilitiesOverride + { + Supports = new ModelCapabilitiesOverrideSupports { Vision = false }, + }, + }); + + // Turn 1: vision off — no image_url expected + await session.SendAndWaitAsync(new MessageOptions { Prompt = ViewImagePrompt }); + var trafficAfterT1 = await Ctx.GetExchangesAsync(); + var t1Messages = trafficAfterT1.SelectMany(e => e.Request.Messages).ToList(); + Assert.False(HasImageUrlContent(t1Messages), "Expected no image_url content when vision is disabled"); + + // Switch vision on + await session.SetModelAsync( + "claude-sonnet-4.5", + reasoningEffort: null, + modelCapabilities: new ModelCapabilitiesOverride + { + Supports = new ModelCapabilitiesOverrideSupports { Vision = true }, + }); + + // Turn 2: vision on — image_url expected + await session.SendAndWaitAsync(new MessageOptions { Prompt = ViewImagePrompt }); + var trafficAfterT2 = await Ctx.GetExchangesAsync(); + var newExchanges = trafficAfterT2.Skip(trafficAfterT1.Count).ToList(); + Assert.NotEmpty(newExchanges); + var t2Messages = newExchanges.SelectMany(e => e.Request.Messages).ToList(); + Assert.True(HasImageUrlContent(t2Messages), "Expected image_url content when vision is enabled"); + + await session.DisposeAsync(); + } + + [Fact] + public async Task Vision_Enabled_Then_Disabled_Via_SetModel() + { + await File.WriteAllBytesAsync(Path.Join(Ctx.WorkDir, "test.png"), Png1X1); + + var session = await CreateSessionAsync(new SessionConfig + { + Model = "claude-sonnet-4.5", + ModelCapabilities = new ModelCapabilitiesOverride + { + Supports = new ModelCapabilitiesOverrideSupports { Vision = true }, + }, + }); + + // Turn 1: vision on — image_url expected + await session.SendAndWaitAsync(new MessageOptions { Prompt = ViewImagePrompt }); + var trafficAfterT1 = await Ctx.GetExchangesAsync(); + var t1Messages = trafficAfterT1.SelectMany(e => e.Request.Messages).ToList(); + Assert.True(HasImageUrlContent(t1Messages), "Expected image_url content when vision is enabled"); + + // Switch vision off + await session.SetModelAsync( + "claude-sonnet-4.5", + reasoningEffort: null, + modelCapabilities: new ModelCapabilitiesOverride + { + Supports = new ModelCapabilitiesOverrideSupports { Vision = false }, + }); + + // Turn 2: vision off — no image_url expected in new exchanges + await session.SendAndWaitAsync(new MessageOptions { Prompt = ViewImagePrompt }); + var trafficAfterT2 = await Ctx.GetExchangesAsync(); + var newExchanges = trafficAfterT2.Skip(trafficAfterT1.Count).ToList(); + Assert.NotEmpty(newExchanges); + var t2Messages = newExchanges.SelectMany(e => e.Request.Messages).ToList(); + Assert.False(HasImageUrlContent(t2Messages), "Expected no image_url content when vision is disabled"); + + await session.DisposeAsync(); + } + + /// + /// Checks whether any user message contains an image_url content part. + /// Content can be a string (no images) or a JSON array of content parts. + /// + private static bool HasImageUrlContent(List messages) + { + return messages + .Where(m => m.Role == "user" && m.Content is { ValueKind: JsonValueKind.Array }) + .Any(m => m.Content!.Value.EnumerateArray().Any(part => + part.TryGetProperty("type", out var typeProp) && + typeProp.ValueKind == JsonValueKind.String && + typeProp.GetString() == "image_url")); + } +} diff --git a/dotnet/test/SessionTests.cs b/dotnet/test/SessionTests.cs index 1c139fd0b..d0084c62e 100644 --- a/dotnet/test/SessionTests.cs +++ b/dotnet/test/SessionTests.cs @@ -16,7 +16,7 @@ public class SessionTests(E2ETestFixture fixture, ITestOutputHelper output) : E2 [Fact] public async Task ShouldCreateAndDisconnectSessions() { - var session = await CreateSessionAsync(new SessionConfig { Model = "fake-test-model" }); + var session = await CreateSessionAsync(new SessionConfig { Model = "claude-sonnet-4.5" }); Assert.Matches(@"^[a-f0-9-]+$", session.SessionId); @@ -594,23 +594,25 @@ public async Task DisposeAsync_From_Handler_Does_Not_Deadlock() [Fact] public async Task Should_Accept_Blob_Attachments() { + var pngBase64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="; + await File.WriteAllBytesAsync(Path.Join(Ctx.WorkDir, "test-pixel.png"), Convert.FromBase64String(pngBase64)); + var session = await CreateSessionAsync(); - await session.SendAsync(new MessageOptions + await session.SendAndWaitAsync(new MessageOptions { Prompt = "Describe this image", Attachments = [ new UserMessageDataAttachmentsItemBlob { - Data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==", + Data = pngBase64, MimeType = "image/png", DisplayName = "test-pixel.png", }, ], }); - // Just verify send doesn't throw — blob attachment support varies by runtime await session.DisposeAsync(); } diff --git a/dotnet/test/ToolResultsTests.cs b/dotnet/test/ToolResultsTests.cs index 0fc36557c..d04494e38 100644 --- a/dotnet/test/ToolResultsTests.cs +++ b/dotnet/test/ToolResultsTests.cs @@ -102,8 +102,8 @@ await session.SendAsync(new MessageOptions .ToList(); Assert.Single(toolResults); - Assert.DoesNotContain("toolTelemetry", toolResults[0].Content); - Assert.DoesNotContain("resultType", toolResults[0].Content); + Assert.DoesNotContain("toolTelemetry", toolResults[0].StringContent); + Assert.DoesNotContain("resultType", toolResults[0].StringContent); [Description("Analyzes code for issues")] static ToolResultAIContent AnalyzeCode([Description("File to analyze")] string file) diff --git a/dotnet/test/ToolsTests.cs b/dotnet/test/ToolsTests.cs index c2350cbff..ec0ba0936 100644 --- a/dotnet/test/ToolsTests.cs +++ b/dotnet/test/ToolsTests.cs @@ -97,7 +97,7 @@ public async Task Handles_Tool_Calling_Errors() Assert.Single(toolResults); var toolResult = toolResults[0]; Assert.Equal(toolCall.Id, toolResult.ToolCallId); - Assert.DoesNotContain("Melbourne", toolResult.Content); + Assert.DoesNotContain("Melbourne", toolResult.StringContent); // Importantly, we're checking that the assistant does not see the // exception information as if it was the tool's output. diff --git a/go/README.md b/go/README.md index 654f3d369..14f8d3a0f 100644 --- a/go/README.md +++ b/go/README.md @@ -58,8 +58,8 @@ func main() { done := make(chan bool) session.On(func(event copilot.SessionEvent) { if event.Type == "assistant.message" { - if event.Data.Content != nil { - fmt.Println(*event.Data.Content) + if event.Data.Content != nil && event.Data.Content.String != nil { + fmt.Println(*event.Data.Content.String) } } if event.Type == "session.idle" { @@ -417,14 +417,14 @@ func main() { } else if event.Type == "assistant.message" { // Final message - complete content fmt.Println("\n--- Final message ---") - if event.Data.Content != nil { - fmt.Println(*event.Data.Content) + if event.Data.Content != nil && event.Data.Content.String != nil { + fmt.Println(*event.Data.Content.String) } } else if event.Type == "assistant.reasoning" { // Final reasoning content (if model supports reasoning) fmt.Println("--- Reasoning ---") - if event.Data.Content != nil { - fmt.Println(*event.Data.Content) + if event.Data.Content != nil && event.Data.Content.String != nil { + fmt.Println(*event.Data.Content.String) } } if event.Type == "session.idle" { diff --git a/go/client.go b/go/client.go index 6f88c768a..731efbe24 100644 --- a/go/client.go +++ b/go/client.go @@ -547,6 +547,7 @@ func (c *Client) CreateSession(ctx context.Context, config *SessionConfig) (*Ses req.AvailableTools = config.AvailableTools req.ExcludedTools = config.ExcludedTools req.Provider = config.Provider + req.ModelCapabilities = config.ModelCapabilities req.WorkingDirectory = config.WorkingDirectory req.MCPServers = config.MCPServers req.EnvValueMode = "direct" @@ -687,6 +688,7 @@ func (c *Client) ResumeSessionWithOptions(ctx context.Context, sessionID string, req.SystemMessage = wireSystemMessage req.Tools = config.Tools req.Provider = config.Provider + req.ModelCapabilities = config.ModelCapabilities req.AvailableTools = config.AvailableTools req.ExcludedTools = config.ExcludedTools if config.Streaming { diff --git a/go/internal/e2e/session_config_test.go b/go/internal/e2e/session_config_test.go new file mode 100644 index 000000000..b7326a579 --- /dev/null +++ b/go/internal/e2e/session_config_test.go @@ -0,0 +1,163 @@ +package e2e + +import ( + "encoding/base64" + "encoding/json" + "os" + "path/filepath" + "testing" + + copilot "github.com/github/copilot-sdk/go" + "github.com/github/copilot-sdk/go/internal/e2e/testharness" +) + +// hasImageURLContent returns true if any user message in the given exchanges +// contains an image_url content part (multimodal vision content). +func hasImageURLContent(exchanges []testharness.ParsedHttpExchange) bool { + for _, ex := range exchanges { + for _, msg := range ex.Request.Messages { + if msg.Role == "user" && len(msg.RawContent) > 0 { + var content []interface{} + if json.Unmarshal(msg.RawContent, &content) == nil { + for _, part := range content { + if m, ok := part.(map[string]interface{}); ok { + if m["type"] == "image_url" { + return true + } + } + } + } + } + } + } + return false +} + +func TestSessionConfig(t *testing.T) { + ctx := testharness.NewTestContext(t) + client := ctx.NewClient() + t.Cleanup(func() { client.ForceStop() }) + + if err := client.Start(t.Context()); err != nil { + t.Fatalf("Failed to start client: %v", err) + } + + // Write 1x1 PNG to the work directory + png1x1, err := base64.StdEncoding.DecodeString("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==") + if err != nil { + t.Fatalf("Failed to decode PNG: %v", err) + } + if err := os.WriteFile(filepath.Join(ctx.WorkDir, "test.png"), png1x1, 0644); err != nil { + t.Fatalf("Failed to write test.png: %v", err) + } + + viewImagePrompt := "Use the view tool to look at the file test.png and describe what you see" + + t.Run("vision disabled then enabled via setModel", func(t *testing.T) { + ctx.ConfigureForTest(t) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + ModelCapabilities: &copilot.ModelCapabilitiesOverride{ + Supports: &copilot.ModelCapabilitiesOverrideSupports{ + Vision: copilot.Bool(false), + }, + }, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + + // Turn 1: vision off — no image_url expected + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: viewImagePrompt}); err != nil { + t.Fatalf("Failed to send message: %v", err) + } + + trafficAfterT1, err := ctx.GetExchanges() + if err != nil { + t.Fatalf("Failed to get exchanges: %v", err) + } + if hasImageURLContent(trafficAfterT1) { + t.Error("Expected no image_url content parts when vision is disabled") + } + + // Switch vision on + if err := session.SetModel(t.Context(), "claude-sonnet-4.5", &copilot.SetModelOptions{ + ModelCapabilities: &copilot.ModelCapabilitiesOverride{ + Supports: &copilot.ModelCapabilitiesOverrideSupports{ + Vision: copilot.Bool(true), + }, + }, + }); err != nil { + t.Fatalf("SetModel returned error: %v", err) + } + + // Turn 2: vision on — image_url expected in new exchanges + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: viewImagePrompt}); err != nil { + t.Fatalf("Failed to send second message: %v", err) + } + + trafficAfterT2, err := ctx.GetExchanges() + if err != nil { + t.Fatalf("Failed to get exchanges after turn 2: %v", err) + } + newExchanges := trafficAfterT2[len(trafficAfterT1):] + if !hasImageURLContent(newExchanges) { + t.Error("Expected image_url content parts when vision is enabled") + } + }) + + t.Run("vision enabled then disabled via setModel", func(t *testing.T) { + ctx.ConfigureForTest(t) + + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + ModelCapabilities: &copilot.ModelCapabilitiesOverride{ + Supports: &copilot.ModelCapabilitiesOverrideSupports{ + Vision: copilot.Bool(true), + }, + }, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + + // Turn 1: vision on — image_url expected + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: viewImagePrompt}); err != nil { + t.Fatalf("Failed to send message: %v", err) + } + + trafficAfterT1, err := ctx.GetExchanges() + if err != nil { + t.Fatalf("Failed to get exchanges: %v", err) + } + if !hasImageURLContent(trafficAfterT1) { + t.Error("Expected image_url content parts when vision is enabled") + } + + // Switch vision off + if err := session.SetModel(t.Context(), "claude-sonnet-4.5", &copilot.SetModelOptions{ + ModelCapabilities: &copilot.ModelCapabilitiesOverride{ + Supports: &copilot.ModelCapabilitiesOverrideSupports{ + Vision: copilot.Bool(false), + }, + }, + }); err != nil { + t.Fatalf("SetModel returned error: %v", err) + } + + // Turn 2: vision off — no image_url expected in new exchanges + if _, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: viewImagePrompt}); err != nil { + t.Fatalf("Failed to send second message: %v", err) + } + + trafficAfterT2, err := ctx.GetExchanges() + if err != nil { + t.Fatalf("Failed to get exchanges after turn 2: %v", err) + } + newExchanges := trafficAfterT2[len(trafficAfterT1):] + if hasImageURLContent(newExchanges) { + t.Error("Expected no image_url content parts when vision is disabled") + } + }) +} diff --git a/go/internal/e2e/session_test.go b/go/internal/e2e/session_test.go index caab5255e..35824819a 100644 --- a/go/internal/e2e/session_test.go +++ b/go/internal/e2e/session_test.go @@ -1,6 +1,9 @@ package e2e import ( + "encoding/base64" + "os" + "path/filepath" "regexp" "strings" "sync" @@ -20,7 +23,7 @@ func TestSession(t *testing.T) { t.Run("should create and disconnect sessions", func(t *testing.T) { ctx.ConfigureForTest(t) - session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{OnPermissionRequest: copilot.PermissionHandler.ApproveAll, Model: "fake-test-model"}) + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{OnPermissionRequest: copilot.PermissionHandler.ApproveAll, Model: "claude-sonnet-4.5"}) if err != nil { t.Fatalf("Failed to create session: %v", err) } @@ -43,8 +46,8 @@ func TestSession(t *testing.T) { t.Errorf("Expected session.start sessionId to match") } - if messages[0].Data.SelectedModel == nil || *messages[0].Data.SelectedModel != "fake-test-model" { - t.Errorf("Expected selectedModel to be 'fake-test-model', got %v", messages[0].Data.SelectedModel) + if messages[0].Data.SelectedModel == nil || *messages[0].Data.SelectedModel != "claude-sonnet-4.5" { + t.Errorf("Expected selectedModel to be 'claude-sonnet-4.5', got %v", messages[0].Data.SelectedModel) } if err := session.Disconnect(); err != nil { @@ -1052,6 +1055,13 @@ func TestSessionBlobAttachment(t *testing.T) { t.Run("should accept blob attachments", func(t *testing.T) { ctx.ConfigureForTest(t) + // Write the image to disk so the model can view it + data := "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==" + pngBytes, _ := base64.StdEncoding.DecodeString(data) + if err := os.WriteFile(filepath.Join(ctx.WorkDir, "test-pixel.png"), pngBytes, 0644); err != nil { + t.Fatalf("Failed to write test image: %v", err) + } + session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ OnPermissionRequest: copilot.PermissionHandler.ApproveAll, }) @@ -1059,10 +1069,9 @@ func TestSessionBlobAttachment(t *testing.T) { t.Fatalf("Failed to create session: %v", err) } - data := "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==" mimeType := "image/png" displayName := "test-pixel.png" - _, err = session.Send(t.Context(), copilot.MessageOptions{ + _, err = session.SendAndWait(t.Context(), copilot.MessageOptions{ Prompt: "Describe this image", Attachments: []copilot.Attachment{ { @@ -1077,7 +1086,6 @@ func TestSessionBlobAttachment(t *testing.T) { t.Fatalf("Send with blob attachment failed: %v", err) } - // Just verify send doesn't error — blob attachment support varies by runtime session.Disconnect() }) } diff --git a/go/internal/e2e/testharness/proxy.go b/go/internal/e2e/testharness/proxy.go index 91f8a8e0a..0caf19403 100644 --- a/go/internal/e2e/testharness/proxy.go +++ b/go/internal/e2e/testharness/proxy.go @@ -172,10 +172,35 @@ type ChatCompletionRequest struct { // ChatCompletionMessage represents a message in the chat completion request. type ChatCompletionMessage struct { - Role string `json:"role"` - Content string `json:"content,omitempty"` - ToolCallID string `json:"tool_call_id,omitempty"` - ToolCalls []ToolCall `json:"tool_calls,omitempty"` + Role string `json:"role"` + Content string `json:"content,omitempty"` + RawContent json.RawMessage `json:"-"` + ToolCallID string `json:"tool_call_id,omitempty"` + ToolCalls []ToolCall `json:"tool_calls,omitempty"` +} + +// UnmarshalJSON handles Content being either a plain string or an array of +// content parts (e.g. multimodal messages with image_url entries). +func (m *ChatCompletionMessage) UnmarshalJSON(data []byte) error { + type Alias ChatCompletionMessage + aux := &struct { + Content json.RawMessage `json:"content,omitempty"` + *Alias + }{ + Alias: (*Alias)(m), + } + if err := json.Unmarshal(data, aux); err != nil { + return err + } + m.RawContent = aux.Content + m.Content = "" + if len(aux.Content) > 0 { + var s string + if json.Unmarshal(aux.Content, &s) == nil { + m.Content = s + } + } + return nil } // ToolCall represents a tool call in an assistant message. diff --git a/go/rpc/generated_rpc.go b/go/rpc/generated_rpc.go index 3e7b336b7..6eee90963 100644 --- a/go/rpc/generated_rpc.go +++ b/go/rpc/generated_rpc.go @@ -33,7 +33,7 @@ type Model struct { // Billing information Billing *Billing `json:"billing,omitempty"` // Model capabilities and limits - Capabilities Capabilities `json:"capabilities"` + Capabilities ModelCapabilities `json:"capabilities"` // Default reasoning effort level (only present if model supports reasoning effort) DefaultReasoningEffort *string `json:"defaultReasoningEffort,omitempty"` // Model identifier (e.g., "claude-sonnet-4.5") @@ -53,25 +53,37 @@ type Billing struct { } // Model capabilities and limits -type Capabilities struct { +type ModelCapabilities struct { // Token limits for prompts, outputs, and context window - Limits Limits `json:"limits"` + Limits ModelCapabilitiesLimits `json:"limits"` // Feature flags indicating what the model supports - Supports Supports `json:"supports"` + Supports ModelCapabilitiesSupports `json:"supports"` } // Token limits for prompts, outputs, and context window -type Limits struct { +type ModelCapabilitiesLimits struct { // Maximum total context window size in tokens MaxContextWindowTokens float64 `json:"max_context_window_tokens"` // Maximum number of output/completion tokens MaxOutputTokens *float64 `json:"max_output_tokens,omitempty"` // Maximum number of prompt/input tokens MaxPromptTokens *float64 `json:"max_prompt_tokens,omitempty"` + // Vision-specific limits + Vision *ModelCapabilitiesLimitsVision `json:"vision,omitempty"` +} + +// Vision-specific limits +type ModelCapabilitiesLimitsVision struct { + // Maximum image size in bytes + MaxPromptImageSize float64 `json:"max_prompt_image_size"` + // Maximum number of images per prompt + MaxPromptImages float64 `json:"max_prompt_images"` + // MIME types the model accepts + SupportedMediaTypes []string `json:"supported_media_types"` } // Feature flags indicating what the model supports -type Supports struct { +type ModelCapabilitiesSupports struct { // Whether this model supports reasoning effort configuration ReasoningEffort *bool `json:"reasoningEffort,omitempty"` // Whether this model supports vision/image input @@ -234,12 +246,46 @@ type SessionModelSwitchToResult struct { } type SessionModelSwitchToParams struct { + // Override individual model capabilities resolved by the runtime + ModelCapabilities *ModelCapabilitiesOverride `json:"modelCapabilities,omitempty"` // Model identifier to switch to ModelID string `json:"modelId"` // Reasoning effort level to use for the model ReasoningEffort *string `json:"reasoningEffort,omitempty"` } +// Override individual model capabilities resolved by the runtime +type ModelCapabilitiesOverride struct { + // Token limits for prompts, outputs, and context window + Limits *ModelCapabilitiesOverrideLimits `json:"limits,omitempty"` + // Feature flags indicating what the model supports + Supports *ModelCapabilitiesOverrideSupports `json:"supports,omitempty"` +} + +// Token limits for prompts, outputs, and context window +type ModelCapabilitiesOverrideLimits struct { + // Maximum total context window size in tokens + MaxContextWindowTokens *float64 `json:"max_context_window_tokens,omitempty"` + MaxOutputTokens *float64 `json:"max_output_tokens,omitempty"` + MaxPromptTokens *float64 `json:"max_prompt_tokens,omitempty"` + Vision *ModelCapabilitiesOverrideLimitsVision `json:"vision,omitempty"` +} + +type ModelCapabilitiesOverrideLimitsVision struct { + // Maximum image size in bytes + MaxPromptImageSize *float64 `json:"max_prompt_image_size,omitempty"` + // Maximum number of images per prompt + MaxPromptImages *float64 `json:"max_prompt_images,omitempty"` + // MIME types the model accepts + SupportedMediaTypes []string `json:"supported_media_types,omitempty"` +} + +// Feature flags indicating what the model supports +type ModelCapabilitiesOverrideSupports struct { + ReasoningEffort *bool `json:"reasoningEffort,omitempty"` + Vision *bool `json:"vision,omitempty"` +} + type SessionModeGetResult struct { // The current agent mode. Mode Mode `json:"mode"` @@ -981,6 +1027,9 @@ func (a *ModelApi) SwitchTo(ctx context.Context, params *SessionModelSwitchToPar if params.ReasoningEffort != nil { req["reasoningEffort"] = *params.ReasoningEffort } + if params.ModelCapabilities != nil { + req["modelCapabilities"] = *params.ModelCapabilities + } } raw, err := a.client.Request("session.model.switchTo", req) if err != nil { diff --git a/go/samples/chat.go b/go/samples/chat.go index 4d5e98d7d..677aafdfe 100644 --- a/go/samples/chat.go +++ b/go/samples/chat.go @@ -37,7 +37,7 @@ func main() { switch event.Type { case copilot.SessionEventTypeAssistantReasoning: if event.Data.Content != nil { - output = fmt.Sprintf("[reasoning: %s]", *event.Data.Content) + output = fmt.Sprintf("[reasoning: %s]", *event.Data.Content.String) } case copilot.SessionEventTypeToolExecutionStart: if event.Data.ToolName != nil { @@ -66,7 +66,7 @@ func main() { reply, _ := session.SendAndWait(ctx, copilot.MessageOptions{Prompt: input}) content := "" if reply != nil && reply.Data.Content != nil { - content = *reply.Data.Content + content = *reply.Data.Content.String } fmt.Printf("\nAssistant: %s\n\n", content) } diff --git a/go/session.go b/go/session.go index 225f2bf5e..f7b4a852c 100644 --- a/go/session.go +++ b/go/session.go @@ -1229,6 +1229,9 @@ func (s *Session) Abort(ctx context.Context) error { type SetModelOptions struct { // ReasoningEffort sets the reasoning effort level for the new model (e.g., "low", "medium", "high", "xhigh"). ReasoningEffort *string + // ModelCapabilities overrides individual model capabilities resolved by the runtime. + // Only non-nil fields are applied over the runtime-resolved capabilities. + ModelCapabilities *rpc.ModelCapabilitiesOverride } // SetModel changes the model for this session. @@ -1246,6 +1249,7 @@ func (s *Session) SetModel(ctx context.Context, model string, opts *SetModelOpti params := &rpc.SessionModelSwitchToParams{ModelID: model} if opts != nil { params.ReasoningEffort = opts.ReasoningEffort + params.ModelCapabilities = opts.ModelCapabilities } _, err := s.RPC.Model.SwitchTo(ctx, params) if err != nil { diff --git a/go/types.go b/go/types.go index 9f23dcb85..ff9b4aed3 100644 --- a/go/types.go +++ b/go/types.go @@ -3,6 +3,8 @@ package copilot import ( "context" "encoding/json" + + "github.com/github/copilot-sdk/go/rpc" ) // ConnectionState represents the client connection state @@ -475,6 +477,9 @@ type SessionConfig struct { Streaming bool // Provider configures a custom model provider (BYOK) Provider *ProviderConfig + // ModelCapabilities overrides individual model capabilities resolved by the runtime. + // Only non-nil fields are applied over the runtime-resolved capabilities. + ModelCapabilities *rpc.ModelCapabilitiesOverride // MCPServers configures MCP servers for the session MCPServers map[string]MCPServerConfig // CustomAgents configures custom agents for the session @@ -650,6 +655,9 @@ type ResumeSessionConfig struct { ExcludedTools []string // Provider configures a custom model provider Provider *ProviderConfig + // ModelCapabilities overrides individual model capabilities resolved by the runtime. + // Only non-nil fields are applied over the runtime-resolved capabilities. + ModelCapabilities *rpc.ModelCapabilitiesOverride // ReasoningEffort level for models that support it. // Valid values: "low", "medium", "high", "xhigh" ReasoningEffort string @@ -765,6 +773,15 @@ type ModelCapabilities struct { Limits ModelLimits `json:"limits"` } +// Type aliases for model capabilities overrides, re-exported from the rpc +// package for ergonomic use without requiring a separate rpc import. +type ( + ModelCapabilitiesOverride = rpc.ModelCapabilitiesOverride + ModelCapabilitiesOverrideSupports = rpc.ModelCapabilitiesOverrideSupports + ModelCapabilitiesOverrideLimits = rpc.ModelCapabilitiesOverrideLimits + ModelCapabilitiesOverrideLimitsVision = rpc.ModelCapabilitiesOverrideLimitsVision +) + // ModelPolicy contains model policy state type ModelPolicy struct { State string `json:"state"` @@ -851,32 +868,33 @@ type SessionLifecycleHandler func(event SessionLifecycleEvent) // createSessionRequest is the request for session.create type createSessionRequest struct { - Model string `json:"model,omitempty"` - SessionID string `json:"sessionId,omitempty"` - ClientName string `json:"clientName,omitempty"` - ReasoningEffort string `json:"reasoningEffort,omitempty"` - Tools []Tool `json:"tools,omitempty"` - SystemMessage *SystemMessageConfig `json:"systemMessage,omitempty"` - AvailableTools []string `json:"availableTools"` - ExcludedTools []string `json:"excludedTools,omitempty"` - Provider *ProviderConfig `json:"provider,omitempty"` - RequestPermission *bool `json:"requestPermission,omitempty"` - RequestUserInput *bool `json:"requestUserInput,omitempty"` - Hooks *bool `json:"hooks,omitempty"` - WorkingDirectory string `json:"workingDirectory,omitempty"` - Streaming *bool `json:"streaming,omitempty"` - MCPServers map[string]MCPServerConfig `json:"mcpServers,omitempty"` - EnvValueMode string `json:"envValueMode,omitempty"` - CustomAgents []CustomAgentConfig `json:"customAgents,omitempty"` - Agent string `json:"agent,omitempty"` - ConfigDir string `json:"configDir,omitempty"` - SkillDirectories []string `json:"skillDirectories,omitempty"` - DisabledSkills []string `json:"disabledSkills,omitempty"` - InfiniteSessions *InfiniteSessionConfig `json:"infiniteSessions,omitempty"` - Commands []wireCommand `json:"commands,omitempty"` - RequestElicitation *bool `json:"requestElicitation,omitempty"` - Traceparent string `json:"traceparent,omitempty"` - Tracestate string `json:"tracestate,omitempty"` + Model string `json:"model,omitempty"` + SessionID string `json:"sessionId,omitempty"` + ClientName string `json:"clientName,omitempty"` + ReasoningEffort string `json:"reasoningEffort,omitempty"` + Tools []Tool `json:"tools,omitempty"` + SystemMessage *SystemMessageConfig `json:"systemMessage,omitempty"` + AvailableTools []string `json:"availableTools"` + ExcludedTools []string `json:"excludedTools,omitempty"` + Provider *ProviderConfig `json:"provider,omitempty"` + ModelCapabilities *rpc.ModelCapabilitiesOverride `json:"modelCapabilities,omitempty"` + RequestPermission *bool `json:"requestPermission,omitempty"` + RequestUserInput *bool `json:"requestUserInput,omitempty"` + Hooks *bool `json:"hooks,omitempty"` + WorkingDirectory string `json:"workingDirectory,omitempty"` + Streaming *bool `json:"streaming,omitempty"` + MCPServers map[string]MCPServerConfig `json:"mcpServers,omitempty"` + EnvValueMode string `json:"envValueMode,omitempty"` + CustomAgents []CustomAgentConfig `json:"customAgents,omitempty"` + Agent string `json:"agent,omitempty"` + ConfigDir string `json:"configDir,omitempty"` + SkillDirectories []string `json:"skillDirectories,omitempty"` + DisabledSkills []string `json:"disabledSkills,omitempty"` + InfiniteSessions *InfiniteSessionConfig `json:"infiniteSessions,omitempty"` + Commands []wireCommand `json:"commands,omitempty"` + RequestElicitation *bool `json:"requestElicitation,omitempty"` + Traceparent string `json:"traceparent,omitempty"` + Tracestate string `json:"tracestate,omitempty"` } // wireCommand is the wire representation of a command (name + description only, no handler). @@ -894,33 +912,34 @@ type createSessionResponse struct { // resumeSessionRequest is the request for session.resume type resumeSessionRequest struct { - SessionID string `json:"sessionId"` - ClientName string `json:"clientName,omitempty"` - Model string `json:"model,omitempty"` - ReasoningEffort string `json:"reasoningEffort,omitempty"` - Tools []Tool `json:"tools,omitempty"` - SystemMessage *SystemMessageConfig `json:"systemMessage,omitempty"` - AvailableTools []string `json:"availableTools"` - ExcludedTools []string `json:"excludedTools,omitempty"` - Provider *ProviderConfig `json:"provider,omitempty"` - RequestPermission *bool `json:"requestPermission,omitempty"` - RequestUserInput *bool `json:"requestUserInput,omitempty"` - Hooks *bool `json:"hooks,omitempty"` - WorkingDirectory string `json:"workingDirectory,omitempty"` - ConfigDir string `json:"configDir,omitempty"` - DisableResume *bool `json:"disableResume,omitempty"` - Streaming *bool `json:"streaming,omitempty"` - MCPServers map[string]MCPServerConfig `json:"mcpServers,omitempty"` - EnvValueMode string `json:"envValueMode,omitempty"` - CustomAgents []CustomAgentConfig `json:"customAgents,omitempty"` - Agent string `json:"agent,omitempty"` - SkillDirectories []string `json:"skillDirectories,omitempty"` - DisabledSkills []string `json:"disabledSkills,omitempty"` - InfiniteSessions *InfiniteSessionConfig `json:"infiniteSessions,omitempty"` - Commands []wireCommand `json:"commands,omitempty"` - RequestElicitation *bool `json:"requestElicitation,omitempty"` - Traceparent string `json:"traceparent,omitempty"` - Tracestate string `json:"tracestate,omitempty"` + SessionID string `json:"sessionId"` + ClientName string `json:"clientName,omitempty"` + Model string `json:"model,omitempty"` + ReasoningEffort string `json:"reasoningEffort,omitempty"` + Tools []Tool `json:"tools,omitempty"` + SystemMessage *SystemMessageConfig `json:"systemMessage,omitempty"` + AvailableTools []string `json:"availableTools"` + ExcludedTools []string `json:"excludedTools,omitempty"` + Provider *ProviderConfig `json:"provider,omitempty"` + ModelCapabilities *rpc.ModelCapabilitiesOverride `json:"modelCapabilities,omitempty"` + RequestPermission *bool `json:"requestPermission,omitempty"` + RequestUserInput *bool `json:"requestUserInput,omitempty"` + Hooks *bool `json:"hooks,omitempty"` + WorkingDirectory string `json:"workingDirectory,omitempty"` + ConfigDir string `json:"configDir,omitempty"` + DisableResume *bool `json:"disableResume,omitempty"` + Streaming *bool `json:"streaming,omitempty"` + MCPServers map[string]MCPServerConfig `json:"mcpServers,omitempty"` + EnvValueMode string `json:"envValueMode,omitempty"` + CustomAgents []CustomAgentConfig `json:"customAgents,omitempty"` + Agent string `json:"agent,omitempty"` + SkillDirectories []string `json:"skillDirectories,omitempty"` + DisabledSkills []string `json:"disabledSkills,omitempty"` + InfiniteSessions *InfiniteSessionConfig `json:"infiniteSessions,omitempty"` + Commands []wireCommand `json:"commands,omitempty"` + RequestElicitation *bool `json:"requestElicitation,omitempty"` + Traceparent string `json:"traceparent,omitempty"` + Tracestate string `json:"tracestate,omitempty"` } // resumeSessionResponse is the response from session.resume diff --git a/nodejs/package-lock.json b/nodejs/package-lock.json index 98ed1f0c7..e51474b78 100644 --- a/nodejs/package-lock.json +++ b/nodejs/package-lock.json @@ -9,7 +9,7 @@ "version": "0.1.8", "license": "MIT", "dependencies": { - "@github/copilot": "^1.0.17", + "@github/copilot": "^1.0.20-1", "vscode-jsonrpc": "^8.2.1", "zod": "^4.3.6" }, @@ -663,26 +663,26 @@ } }, "node_modules/@github/copilot": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.17.tgz", - "integrity": "sha512-RTJ+kEKOdidjuOs8ozsoBdz+94g7tFJIEu5kz1P2iwJhsL+iIA5rtn9/jXOF0hAI3CLSXKZoSd66cqHrn4rb1A==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.20-1.tgz", + "integrity": "sha512-a34M4P6XcKFy1sDubqn54qakQxeWwA44vKaOh3oNZT8vgna9R4ap2NYGnM8fn7XDAdlJ9QgW6Xt7dfPGwKkt/A==", "license": "SEE LICENSE IN LICENSE.md", "bin": { "copilot": "npm-loader.js" }, "optionalDependencies": { - "@github/copilot-darwin-arm64": "1.0.17", - "@github/copilot-darwin-x64": "1.0.17", - "@github/copilot-linux-arm64": "1.0.17", - "@github/copilot-linux-x64": "1.0.17", - "@github/copilot-win32-arm64": "1.0.17", - "@github/copilot-win32-x64": "1.0.17" + "@github/copilot-darwin-arm64": "1.0.20-1", + "@github/copilot-darwin-x64": "1.0.20-1", + "@github/copilot-linux-arm64": "1.0.20-1", + "@github/copilot-linux-x64": "1.0.20-1", + "@github/copilot-win32-arm64": "1.0.20-1", + "@github/copilot-win32-x64": "1.0.20-1" } }, "node_modules/@github/copilot-darwin-arm64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.17.tgz", - "integrity": "sha512-LSv66P8611y/UjTESnaHLYqLl9kA9yBYsaocZPQoOsvMgCmktgaBgUWq+KMpLMicaFN0jBAE5F0Ve7dW6N9X3A==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.20-1.tgz", + "integrity": "sha512-tip/KyjhRQG7OMAR8rBWrFcPk3XFQQlajozIMPxEA7+qwgMBOlaGcO0iuDEdF5vAtYXhUPPAI/tbuUqkueoJEA==", "cpu": [ "arm64" ], @@ -696,9 +696,9 @@ } }, "node_modules/@github/copilot-darwin-x64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.17.tgz", - "integrity": "sha512-yqRS0/8kYTGl4VvfJ/QOtHTeYF+DnAWNUReZgt2U0AEP3zgj4z4hxSH7D2PsO/488L4KsBmmcnJr13HmBGiT/w==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.20-1.tgz", + "integrity": "sha512-d/Etng6335TF1Dcw37XFtjKKZqQbqh9trXg5GhMySUamo4UolykylWJuhs+suCx2JJc1lGzPVAdGOxAvj+4P3Q==", "cpu": [ "x64" ], @@ -712,9 +712,9 @@ } }, "node_modules/@github/copilot-linux-arm64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.17.tgz", - "integrity": "sha512-TOK0ma0A24zmQJslkGxUk+KnMFpiqquWEXB5sIv/5Ci45Qi7s0BRWTnqtiJ8Vahwb/wkja6KarHkLA27+ETGUA==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.20-1.tgz", + "integrity": "sha512-ptwwVk/uMEoVdGTbhfC8CLtSCq3agnRKlD+iojabcg5K0y0HbaEGIaOeJle0uARpqeyLADgoUkMbth/wWQI2gQ==", "cpu": [ "arm64" ], @@ -728,9 +728,9 @@ } }, "node_modules/@github/copilot-linux-x64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.17.tgz", - "integrity": "sha512-4Yum3uaAuTM/SiNtzchsO/G/144Bi/Z4FEcearW6WsGDvS6cRwSJeudOM0y4aoy4BHcv8+yw7YuXH5BHC3SAiA==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.20-1.tgz", + "integrity": "sha512-sUuR5uVR1/Ndew/pSEQP4vLy2iohW+PMD96R+gzJkF77soe+PfFR7R6Py1VWmwAK1MDblyilDfMcusYLXK48LA==", "cpu": [ "x64" ], @@ -744,9 +744,9 @@ } }, "node_modules/@github/copilot-win32-arm64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.17.tgz", - "integrity": "sha512-I1ferbfQ0aS149WyEUw6XS1sFixwTUUm13BPBQ3yMzD8G2SaoxTsdYdlhZpkVfkfh/rUYyvMKKi9VNxoVYOlDA==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.20-1.tgz", + "integrity": "sha512-gk4belEoOHfQH2pJf0GPh2t1N4suIg1mhwJQHveGi5av22XZzYjY7yarNom+YCqc692MAuYsfNF0wXXSij3wBg==", "cpu": [ "arm64" ], @@ -760,9 +760,9 @@ } }, "node_modules/@github/copilot-win32-x64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.17.tgz", - "integrity": "sha512-kjiOxY9ibS+rPp9XFpPdfdYzluEL3SHN8R5/fnA7RO+kZEJ4FDKWJjAiec3tgVkEHQT3UwNuVa/u3TdfYNF15w==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.20-1.tgz", + "integrity": "sha512-ypRD1iawRw8a0qzhp4fq4ZqvqL86mk2UZNWyuTM8HOe2o3+SrZbveXpEk7gUYJ4ShLhqLVywJHs4+4yPkv5p+A==", "cpu": [ "x64" ], diff --git a/nodejs/package.json b/nodejs/package.json index 99681ec3f..55e058ea6 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -56,7 +56,7 @@ "author": "GitHub", "license": "MIT", "dependencies": { - "@github/copilot": "^1.0.17", + "@github/copilot": "^1.0.20-1", "vscode-jsonrpc": "^8.2.1", "zod": "^4.3.6" }, diff --git a/nodejs/src/client.ts b/nodejs/src/client.ts index 23aac99a3..e61afcacf 100644 --- a/nodejs/src/client.ts +++ b/nodejs/src/client.ts @@ -712,6 +712,7 @@ export class CopilotClient { availableTools: config.availableTools, excludedTools: config.excludedTools, provider: config.provider, + modelCapabilities: config.modelCapabilities, requestPermission: true, requestUserInput: !!config.onUserInputRequest, requestElicitation: !!config.onElicitationRequest, @@ -847,6 +848,7 @@ export class CopilotClient { description: cmd.description, })), provider: config.provider, + modelCapabilities: config.modelCapabilities, requestPermission: true, requestUserInput: !!config.onUserInputRequest, requestElicitation: !!config.onElicitationRequest, diff --git a/nodejs/src/generated/rpc.ts b/nodejs/src/generated/rpc.ts index 4f87c14f2..a72c07b9a 100644 --- a/nodejs/src/generated/rpc.ts +++ b/nodejs/src/generated/rpc.ts @@ -40,41 +40,7 @@ export interface ModelsListResult { * Display name */ name: string; - /** - * Model capabilities and limits - */ - capabilities: { - /** - * Feature flags indicating what the model supports - */ - supports: { - /** - * Whether this model supports vision/image input - */ - vision?: boolean; - /** - * Whether this model supports reasoning effort configuration - */ - reasoningEffort?: boolean; - }; - /** - * Token limits for prompts, outputs, and context window - */ - limits: { - /** - * Maximum number of prompt/input tokens - */ - max_prompt_tokens?: number; - /** - * Maximum number of output/completion tokens - */ - max_output_tokens?: number; - /** - * Maximum total context window size in tokens - */ - max_context_window_tokens: number; - }; - }; + capabilities: ModelCapabilities; /** * Policy state (if applicable) */ @@ -107,6 +73,61 @@ export interface ModelsListResult { defaultReasoningEffort?: string; }[]; } +/** + * Model capabilities and limits + */ +export interface ModelCapabilities { + supports: ModelCapabilitiesSupports; + limits: ModelCapabilitiesLimits; +} +/** + * Feature flags indicating what the model supports + */ +export interface ModelCapabilitiesSupports { + /** + * Whether this model supports vision/image input + */ + vision?: boolean; + /** + * Whether this model supports reasoning effort configuration + */ + reasoningEffort?: boolean; +} +/** + * Token limits for prompts, outputs, and context window + */ +export interface ModelCapabilitiesLimits { + /** + * Maximum number of prompt/input tokens + */ + max_prompt_tokens?: number; + /** + * Maximum number of output/completion tokens + */ + max_output_tokens?: number; + /** + * Maximum total context window size in tokens + */ + max_context_window_tokens: number; + vision?: ModelCapabilitiesLimitsVision; +} +/** + * Vision-specific limits + */ +export interface ModelCapabilitiesLimitsVision { + /** + * MIME types the model accepts + */ + supported_media_types: string[]; + /** + * Maximum number of images per prompt + */ + max_prompt_images: number; + /** + * Maximum image size in bytes + */ + max_prompt_image_size: number; +} export interface ToolsListResult { /** @@ -396,6 +417,47 @@ export interface SessionModelSwitchToParams { * Reasoning effort level to use for the model */ reasoningEffort?: string; + modelCapabilities?: ModelCapabilitiesOverride; +} +/** + * Override individual model capabilities resolved by the runtime + */ +export interface ModelCapabilitiesOverride { + supports?: ModelCapabilitiesOverrideSupports; + limits?: ModelCapabilitiesOverrideLimits; +} +/** + * Feature flags indicating what the model supports + */ +export interface ModelCapabilitiesOverrideSupports { + vision?: boolean; + reasoningEffort?: boolean; +} +/** + * Token limits for prompts, outputs, and context window + */ +export interface ModelCapabilitiesOverrideLimits { + max_prompt_tokens?: number; + max_output_tokens?: number; + /** + * Maximum total context window size in tokens + */ + max_context_window_tokens?: number; + vision?: ModelCapabilitiesOverrideLimitsVision; +} +export interface ModelCapabilitiesOverrideLimitsVision { + /** + * MIME types the model accepts + */ + supported_media_types?: string[]; + /** + * Maximum number of images per prompt + */ + max_prompt_images?: number; + /** + * Maximum image size in bytes + */ + max_prompt_image_size?: number; } export interface SessionModeGetResult { diff --git a/nodejs/src/generated/session-events.ts b/nodejs/src/generated/session-events.ts index 642c933cd..0c0389ad0 100644 --- a/nodejs/src/generated/session-events.ts +++ b/nodejs/src/generated/session-events.ts @@ -270,44 +270,9 @@ export type SessionEvent = ephemeral: true; type: "session.idle"; /** - * Payload indicating the agent is idle; includes any background tasks still in flight + * Payload indicating the session is fully idle with no background tasks in flight */ data: { - /** - * Background tasks still running when the agent became idle - */ - backgroundTasks?: { - /** - * Currently running background agents - */ - agents: { - /** - * Unique identifier of the background agent - */ - agentId: string; - /** - * Type of the background agent - */ - agentType: string; - /** - * Human-readable description of the agent task - */ - description?: string; - }[]; - /** - * Currently running background shell commands - */ - shells: { - /** - * Unique identifier of the background shell - */ - shellId: string; - /** - * Human-readable description of the shell command - */ - description?: string; - }[]; - }; /** * True when the preceding agentic loop was cancelled via abort signal */ @@ -2979,13 +2944,21 @@ export type SessionEvent = ephemeral: true; type: "user_input.completed"; /** - * User input request completion notification signaling UI dismissal + * User input request completion with the user's response */ data: { /** * Request ID of the resolved user input request; clients should dismiss any UI for this request */ requestId: string; + /** + * The user's answer to the input request + */ + answer?: string; + /** + * Whether the answer was typed as free-form text rather than selected from choices + */ + wasFreeform?: boolean; }; } | { @@ -3069,13 +3042,23 @@ export type SessionEvent = ephemeral: true; type: "elicitation.completed"; /** - * Elicitation request completion notification signaling UI dismissal + * Elicitation request completion with the user's response */ data: { /** * Request ID of the resolved elicitation request; clients should dismiss any UI for this request */ requestId: string; + /** + * The user action: "accept" (submitted form), "decline" (explicitly refused), or "cancel" (dismissed) + */ + action?: "accept" | "decline" | "cancel"; + /** + * The submitted form data when action is 'accept'; keys match the requested schema fields + */ + content?: { + [k: string]: string | number | boolean | string[]; + }; }; } | { @@ -3490,13 +3473,29 @@ export type SessionEvent = ephemeral: true; type: "exit_plan_mode.completed"; /** - * Plan mode exit completion notification signaling UI dismissal + * Plan mode exit completion with the user's approval decision and optional feedback */ data: { /** * Request ID of the resolved exit plan mode request; clients should dismiss any UI for this request */ requestId: string; + /** + * Whether the plan was approved by the user + */ + approved?: boolean; + /** + * Which action the user selected (e.g. 'autopilot', 'interactive', 'exit_only') + */ + selectedAction?: string; + /** + * Whether edits should be auto-approved without confirmation + */ + autoApproveEdits?: boolean; + /** + * Free-form feedback from the user if they requested changes to the plan + */ + feedback?: string; }; } | { diff --git a/nodejs/src/index.ts b/nodejs/src/index.ts index dc754a778..3fab122db 100644 --- a/nodejs/src/index.ts +++ b/nodejs/src/index.ts @@ -36,6 +36,7 @@ export type { MessageOptions, ModelBilling, ModelCapabilities, + ModelCapabilitiesOverride, ModelInfo, ModelPolicy, PermissionHandler, diff --git a/nodejs/src/session.ts b/nodejs/src/session.ts index 0bd5ad7b8..ffb2c045a 100644 --- a/nodejs/src/session.ts +++ b/nodejs/src/session.ts @@ -24,6 +24,7 @@ import type { PermissionRequest, PermissionRequestResult, ReasoningEffort, + ModelCapabilitiesOverride, SectionTransformFn, SessionCapabilities, SessionEvent, @@ -1029,7 +1030,13 @@ export class CopilotSession { * await session.setModel("claude-sonnet-4.6", { reasoningEffort: "high" }); * ``` */ - async setModel(model: string, options?: { reasoningEffort?: ReasoningEffort }): Promise { + async setModel( + model: string, + options?: { + reasoningEffort?: ReasoningEffort; + modelCapabilities?: ModelCapabilitiesOverride; + } + ): Promise { await this.rpc.model.switchTo({ modelId: model, ...options }); } diff --git a/nodejs/src/types.ts b/nodejs/src/types.ts index c20bf00db..13367631f 100644 --- a/nodejs/src/types.ts +++ b/nodejs/src/types.ts @@ -1065,6 +1065,9 @@ export interface SessionConfig { */ reasoningEffort?: ReasoningEffort; + /** Per-property overrides for model capabilities, deep-merged over runtime defaults. */ + modelCapabilities?: ModelCapabilitiesOverride; + /** * Override the default configuration directory location. * When specified, the session will use this directory for storing config and state. @@ -1214,6 +1217,7 @@ export type ResumeSessionConfig = Pick< | "availableTools" | "excludedTools" | "provider" + | "modelCapabilities" | "streaming" | "reasoningEffort" | "onPermissionRequest" @@ -1465,6 +1469,16 @@ export interface ModelCapabilities { }; } +/** Recursively makes all properties optional, preserving arrays as-is. */ +type DeepPartial = T extends readonly (infer U)[] + ? DeepPartial[] + : T extends object + ? { [K in keyof T]?: DeepPartial } + : T; + +/** Deep-partial override for model capabilities — every property at any depth is optional. */ +export type ModelCapabilitiesOverride = DeepPartial; + /** * Model policy state */ diff --git a/nodejs/test/e2e/session.test.ts b/nodejs/test/e2e/session.test.ts index 1dc7c0109..6153d4e4c 100644 --- a/nodejs/test/e2e/session.test.ts +++ b/nodejs/test/e2e/session.test.ts @@ -11,7 +11,7 @@ describe("Sessions", async () => { it("should create and disconnect sessions", async () => { const session = await client.createSession({ onPermissionRequest: approveAll, - model: "fake-test-model", + model: "claude-sonnet-4.5", }); expect(session.sessionId).toMatch(/^[a-f0-9-]+$/); @@ -20,7 +20,7 @@ describe("Sessions", async () => { expect(sessionStartEvents).toMatchObject([ { type: "session.start", - data: { sessionId: session.sessionId, selectedModel: "fake-test-model" }, + data: { sessionId: session.sessionId, selectedModel: "claude-sonnet-4.5" }, }, ]); diff --git a/nodejs/test/e2e/session_config.test.ts b/nodejs/test/e2e/session_config.test.ts index e27421ebf..a4c66ef6f 100644 --- a/nodejs/test/e2e/session_config.test.ts +++ b/nodejs/test/e2e/session_config.test.ts @@ -5,7 +5,7 @@ import { approveAll } from "../../src/index.js"; import { createSdkTestContext } from "./harness/sdkTestContext.js"; describe("Session Configuration", async () => { - const { copilotClient: client, workDir } = await createSdkTestContext(); + const { copilotClient: client, workDir, openAiEndpoint } = await createSdkTestContext(); it("should use workingDirectory for tool execution", async () => { const subDir = join(workDir, "subproject"); @@ -44,21 +44,25 @@ describe("Session Configuration", async () => { }); it("should accept blob attachments", async () => { + // Write the image to disk so the model can view it if it tries + const pngBase64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="; + await writeFile(join(workDir, "pixel.png"), Buffer.from(pngBase64, "base64")); + const session = await client.createSession({ onPermissionRequest: approveAll }); - await session.send({ - prompt: "Describe this image", + await session.sendAndWait({ + prompt: "What color is this pixel? Reply in one word.", attachments: [ { type: "blob", - data: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==", + data: pngBase64, mimeType: "image/png", - displayName: "test-pixel.png", + displayName: "pixel.png", }, ], }); - // Just verify send doesn't throw — blob attachment support varies by runtime await session.disconnect(); }); @@ -67,12 +71,86 @@ describe("Session Configuration", async () => { const session = await client.createSession({ onPermissionRequest: approveAll }); - await session.send({ + await session.sendAndWait({ prompt: "Summarize the attached file", attachments: [{ type: "file", path: join(workDir, "attached.txt") }], }); - // Just verify send doesn't throw — attachment support varies by runtime + await session.disconnect(); + }); + + const PNG_1X1 = Buffer.from( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==", + "base64" + ); + const VIEW_IMAGE_PROMPT = + "Use the view tool to look at the file test.png and describe what you see"; + + function hasImageUrlContent(messages: Array<{ role: string; content: unknown }>): boolean { + return messages.some( + (m) => + m.role === "user" && + Array.isArray(m.content) && + m.content.some((p: { type: string }) => p.type === "image_url") + ); + } + + it("vision disabled then enabled via setModel", async () => { + await writeFile(join(workDir, "test.png"), PNG_1X1); + + const session = await client.createSession({ + onPermissionRequest: approveAll, + modelCapabilities: { supports: { vision: false } }, + }); + + // Turn 1: vision off — no image_url expected + await session.sendAndWait({ prompt: VIEW_IMAGE_PROMPT }); + const trafficAfterT1 = await openAiEndpoint.getExchanges(); + const t1Messages = trafficAfterT1.flatMap((e) => e.request.messages ?? []); + expect(hasImageUrlContent(t1Messages)).toBe(false); + + // Switch vision on (re-specify same model with updated capabilities) + await session.setModel("claude-sonnet-4.5", { + modelCapabilities: { supports: { vision: true } }, + }); + + // Turn 2: vision on — image_url expected + await session.sendAndWait({ prompt: VIEW_IMAGE_PROMPT }); + const trafficAfterT2 = await openAiEndpoint.getExchanges(); + // Only check exchanges added after turn 1 + const newExchanges = trafficAfterT2.slice(trafficAfterT1.length); + const t2Messages = newExchanges.flatMap((e) => e.request.messages ?? []); + expect(hasImageUrlContent(t2Messages)).toBe(true); + + await session.disconnect(); + }); + + it("vision enabled then disabled via setModel", async () => { + await writeFile(join(workDir, "test.png"), PNG_1X1); + + const session = await client.createSession({ + onPermissionRequest: approveAll, + modelCapabilities: { supports: { vision: true } }, + }); + + // Turn 1: vision on — image_url expected + await session.sendAndWait({ prompt: VIEW_IMAGE_PROMPT }); + const trafficAfterT1 = await openAiEndpoint.getExchanges(); + const t1Messages = trafficAfterT1.flatMap((e) => e.request.messages ?? []); + expect(hasImageUrlContent(t1Messages)).toBe(true); + + // Switch vision off + await session.setModel("claude-sonnet-4.5", { + modelCapabilities: { supports: { vision: false } }, + }); + + // Turn 2: vision off — no image_url expected in new exchanges + await session.sendAndWait({ prompt: VIEW_IMAGE_PROMPT }); + const trafficAfterT2 = await openAiEndpoint.getExchanges(); + const newExchanges = trafficAfterT2.slice(trafficAfterT1.length); + const t2Messages = newExchanges.flatMap((e) => e.request.messages ?? []); + expect(hasImageUrlContent(t2Messages)).toBe(false); + await session.disconnect(); }); }); diff --git a/python/copilot/__init__.py b/python/copilot/__init__.py index 5a89909c5..db9f150c8 100644 --- a/python/copilot/__init__.py +++ b/python/copilot/__init__.py @@ -4,7 +4,15 @@ JSON-RPC based SDK for programmatic control of GitHub Copilot CLI """ -from .client import CopilotClient, ExternalServerConfig, SubprocessConfig +from .client import ( + CopilotClient, + ExternalServerConfig, + ModelCapabilitiesOverride, + ModelLimitsOverride, + ModelSupportsOverride, + ModelVisionLimitsOverride, + SubprocessConfig, +) from .session import ( CommandContext, CommandDefinition, @@ -33,6 +41,10 @@ "ElicitationResult", "ExternalServerConfig", "InputOptions", + "ModelCapabilitiesOverride", + "ModelLimitsOverride", + "ModelSupportsOverride", + "ModelVisionLimitsOverride", "SessionCapabilities", "SessionUiApi", "SessionUiCapabilities", diff --git a/python/copilot/client.py b/python/copilot/client.py index 356a5fd59..df6756cfe 100644 --- a/python/copilot/client.py +++ b/python/copilot/client.py @@ -384,6 +384,66 @@ def to_dict(self) -> dict: return result +@dataclass +class ModelVisionLimitsOverride: + supported_media_types: list[str] | None = None + max_prompt_images: int | None = None + max_prompt_image_size: int | None = None + + +@dataclass +class ModelLimitsOverride: + max_prompt_tokens: int | None = None + max_output_tokens: int | None = None + max_context_window_tokens: int | None = None + vision: ModelVisionLimitsOverride | None = None + + +@dataclass +class ModelSupportsOverride: + vision: bool | None = None + reasoning_effort: bool | None = None + + +@dataclass +class ModelCapabilitiesOverride: + supports: ModelSupportsOverride | None = None + limits: ModelLimitsOverride | None = None + + +def _capabilities_to_dict(caps: ModelCapabilitiesOverride) -> dict: + result: dict = {} + if caps.supports is not None: + s: dict = {} + if caps.supports.vision is not None: + s["vision"] = caps.supports.vision + if caps.supports.reasoning_effort is not None: + s["reasoningEffort"] = caps.supports.reasoning_effort + if s: + result["supports"] = s + if caps.limits is not None: + lim: dict = {} + if caps.limits.max_prompt_tokens is not None: + lim["max_prompt_tokens"] = caps.limits.max_prompt_tokens + if caps.limits.max_output_tokens is not None: + lim["max_output_tokens"] = caps.limits.max_output_tokens + if caps.limits.max_context_window_tokens is not None: + lim["max_context_window_tokens"] = caps.limits.max_context_window_tokens + if caps.limits.vision is not None: + v: dict = {} + if caps.limits.vision.supported_media_types is not None: + v["supported_media_types"] = caps.limits.vision.supported_media_types + if caps.limits.vision.max_prompt_images is not None: + v["max_prompt_images"] = caps.limits.vision.max_prompt_images + if caps.limits.vision.max_prompt_image_size is not None: + v["max_prompt_image_size"] = caps.limits.vision.max_prompt_image_size + if v: + lim["vision"] = v + if lim: + result["limits"] = lim + return result + + @dataclass class ModelPolicy: """Model policy state""" @@ -1107,6 +1167,7 @@ async def create_session( hooks: SessionHooks | None = None, working_directory: str | None = None, provider: ProviderConfig | None = None, + model_capabilities: ModelCapabilitiesOverride | None = None, streaming: bool | None = None, mcp_servers: dict[str, MCPServerConfig] | None = None, custom_agents: list[CustomAgentConfig] | None = None, @@ -1141,6 +1202,7 @@ async def create_session( hooks: Lifecycle hooks for the session. working_directory: Working directory for the session. provider: Provider configuration for Azure or custom endpoints. + model_capabilities: Override individual model capabilities resolved by the runtime. streaming: Whether to enable streaming responses. mcp_servers: MCP server configurations. custom_agents: Custom agent configurations. @@ -1247,6 +1309,10 @@ async def create_session( if provider: payload["provider"] = self._convert_provider_to_wire_format(provider) + # Add model capabilities override if provided + if model_capabilities: + payload["modelCapabilities"] = _capabilities_to_dict(model_capabilities) + # Add MCP servers configuration if provided if mcp_servers: payload["mcpServers"] = mcp_servers @@ -1346,6 +1412,7 @@ async def resume_session( hooks: SessionHooks | None = None, working_directory: str | None = None, provider: ProviderConfig | None = None, + model_capabilities: ModelCapabilitiesOverride | None = None, streaming: bool | None = None, mcp_servers: dict[str, MCPServerConfig] | None = None, custom_agents: list[CustomAgentConfig] | None = None, @@ -1380,6 +1447,7 @@ async def resume_session( hooks: Lifecycle hooks for the session. working_directory: Working directory for the session. provider: Provider configuration for Azure or custom endpoints. + model_capabilities: Override individual model capabilities resolved by the runtime. streaming: Whether to enable streaming responses. mcp_servers: MCP server configurations. custom_agents: Custom agent configurations. @@ -1455,6 +1523,8 @@ async def resume_session( payload["excludedTools"] = excluded_tools if provider: payload["provider"] = self._convert_provider_to_wire_format(provider) + if model_capabilities: + payload["modelCapabilities"] = _capabilities_to_dict(model_capabilities) if streaming is not None: payload["streaming"] = streaming diff --git a/python/copilot/generated/rpc.py b/python/copilot/generated/rpc.py index 7852d9984..93b80ee4f 100644 --- a/python/copilot/generated/rpc.py +++ b/python/copilot/generated/rpc.py @@ -10,8 +10,7 @@ from dataclasses import dataclass -from typing import Any, TypeVar, cast -from collections.abc import Callable +from typing import Any, TypeVar, Callable, cast from enum import Enum from uuid import UUID @@ -49,9 +48,9 @@ def from_union(fs, x): assert False -def from_bool(x: Any) -> bool: - assert isinstance(x, bool) - return x +def from_list(f: Callable[[Any], T], x: Any) -> list[T]: + assert isinstance(x, list) + return [f(y) for y in x] def to_class(c: type[T], x: Any) -> dict: @@ -59,9 +58,9 @@ def to_class(c: type[T], x: Any) -> dict: return cast(Any, x).to_dict() -def from_list(f: Callable[[Any], T], x: Any) -> list[T]: - assert isinstance(x, list) - return [f(y) for y in x] +def from_bool(x: Any) -> bool: + assert isinstance(x, bool) + return x def from_dict(f: Callable[[Any], T], x: Any) -> dict[str, T]: @@ -144,7 +143,36 @@ def to_dict(self) -> dict: @dataclass -class Limits: +class ModelCapabilitiesLimitsVision: + """Vision-specific limits""" + + max_prompt_image_size: float + """Maximum image size in bytes""" + + max_prompt_images: float + """Maximum number of images per prompt""" + + supported_media_types: list[str] + """MIME types the model accepts""" + + @staticmethod + def from_dict(obj: Any) -> 'ModelCapabilitiesLimitsVision': + assert isinstance(obj, dict) + max_prompt_image_size = from_float(obj.get("max_prompt_image_size")) + max_prompt_images = from_float(obj.get("max_prompt_images")) + supported_media_types = from_list(from_str, obj.get("supported_media_types")) + return ModelCapabilitiesLimitsVision(max_prompt_image_size, max_prompt_images, supported_media_types) + + def to_dict(self) -> dict: + result: dict = {} + result["max_prompt_image_size"] = to_float(self.max_prompt_image_size) + result["max_prompt_images"] = to_float(self.max_prompt_images) + result["supported_media_types"] = from_list(from_str, self.supported_media_types) + return result + + +@dataclass +class ModelCapabilitiesLimits: """Token limits for prompts, outputs, and context window""" max_context_window_tokens: float @@ -156,13 +184,17 @@ class Limits: max_prompt_tokens: float | None = None """Maximum number of prompt/input tokens""" + vision: ModelCapabilitiesLimitsVision | None = None + """Vision-specific limits""" + @staticmethod - def from_dict(obj: Any) -> 'Limits': + def from_dict(obj: Any) -> 'ModelCapabilitiesLimits': assert isinstance(obj, dict) max_context_window_tokens = from_float(obj.get("max_context_window_tokens")) max_output_tokens = from_union([from_float, from_none], obj.get("max_output_tokens")) max_prompt_tokens = from_union([from_float, from_none], obj.get("max_prompt_tokens")) - return Limits(max_context_window_tokens, max_output_tokens, max_prompt_tokens) + vision = from_union([ModelCapabilitiesLimitsVision.from_dict, from_none], obj.get("vision")) + return ModelCapabilitiesLimits(max_context_window_tokens, max_output_tokens, max_prompt_tokens, vision) def to_dict(self) -> dict: result: dict = {} @@ -171,11 +203,13 @@ def to_dict(self) -> dict: result["max_output_tokens"] = from_union([to_float, from_none], self.max_output_tokens) if self.max_prompt_tokens is not None: result["max_prompt_tokens"] = from_union([to_float, from_none], self.max_prompt_tokens) + if self.vision is not None: + result["vision"] = from_union([lambda x: to_class(ModelCapabilitiesLimitsVision, x), from_none], self.vision) return result @dataclass -class Supports: +class ModelCapabilitiesSupports: """Feature flags indicating what the model supports""" reasoning_effort: bool | None = None @@ -185,11 +219,11 @@ class Supports: """Whether this model supports vision/image input""" @staticmethod - def from_dict(obj: Any) -> 'Supports': + def from_dict(obj: Any) -> 'ModelCapabilitiesSupports': assert isinstance(obj, dict) reasoning_effort = from_union([from_bool, from_none], obj.get("reasoningEffort")) vision = from_union([from_bool, from_none], obj.get("vision")) - return Supports(reasoning_effort, vision) + return ModelCapabilitiesSupports(reasoning_effort, vision) def to_dict(self) -> dict: result: dict = {} @@ -201,26 +235,26 @@ def to_dict(self) -> dict: @dataclass -class Capabilities: +class ModelCapabilities: """Model capabilities and limits""" - limits: Limits + limits: ModelCapabilitiesLimits """Token limits for prompts, outputs, and context window""" - supports: Supports + supports: ModelCapabilitiesSupports """Feature flags indicating what the model supports""" @staticmethod - def from_dict(obj: Any) -> 'Capabilities': + def from_dict(obj: Any) -> 'ModelCapabilities': assert isinstance(obj, dict) - limits = Limits.from_dict(obj.get("limits")) - supports = Supports.from_dict(obj.get("supports")) - return Capabilities(limits, supports) + limits = ModelCapabilitiesLimits.from_dict(obj.get("limits")) + supports = ModelCapabilitiesSupports.from_dict(obj.get("supports")) + return ModelCapabilities(limits, supports) def to_dict(self) -> dict: result: dict = {} - result["limits"] = to_class(Limits, self.limits) - result["supports"] = to_class(Supports, self.supports) + result["limits"] = to_class(ModelCapabilitiesLimits, self.limits) + result["supports"] = to_class(ModelCapabilitiesSupports, self.supports) return result @@ -250,7 +284,7 @@ def to_dict(self) -> dict: @dataclass class Model: - capabilities: Capabilities + capabilities: ModelCapabilities """Model capabilities and limits""" id: str @@ -274,7 +308,7 @@ class Model: @staticmethod def from_dict(obj: Any) -> 'Model': assert isinstance(obj, dict) - capabilities = Capabilities.from_dict(obj.get("capabilities")) + capabilities = ModelCapabilities.from_dict(obj.get("capabilities")) id = from_str(obj.get("id")) name = from_str(obj.get("name")) billing = from_union([Billing.from_dict, from_none], obj.get("billing")) @@ -285,7 +319,7 @@ def from_dict(obj: Any) -> 'Model': def to_dict(self) -> dict: result: dict = {} - result["capabilities"] = to_class(Capabilities, self.capabilities) + result["capabilities"] = to_class(ModelCapabilities, self.capabilities) result["id"] = from_str(self.id) result["name"] = from_str(self.name) if self.billing is not None: @@ -475,7 +509,7 @@ class ServerValue: command: str | None = None cwd: str | None = None env: dict[str, str] | None = None - filter_mapping: dict[str | FilterMappingEnum] | FilterMappingEnum | None = None + filter_mapping: dict[str, FilterMappingEnum] | FilterMappingEnum | None = None is_default_server: bool | None = None timeout: float | None = None tools: list[str] | None = None @@ -561,7 +595,7 @@ class MCPConfigAddParamsConfig: command: str | None = None cwd: str | None = None env: dict[str, str] | None = None - filter_mapping: dict[str | FilterMappingEnum] | FilterMappingEnum | None = None + filter_mapping: dict[str, FilterMappingEnum] | FilterMappingEnum | None = None is_default_server: bool | None = None timeout: float | None = None tools: list[str] | None = None @@ -652,7 +686,7 @@ class MCPConfigUpdateParamsConfig: command: str | None = None cwd: str | None = None env: dict[str, str] | None = None - filter_mapping: dict[str | FilterMappingEnum] | FilterMappingEnum | None = None + filter_mapping: dict[str, FilterMappingEnum] | FilterMappingEnum | None = None is_default_server: bool | None = None timeout: float | None = None tools: list[str] | None = None @@ -839,11 +873,126 @@ def to_dict(self) -> dict: return result +@dataclass +class ModelCapabilitiesOverrideLimitsVision: + max_prompt_image_size: float | None = None + """Maximum image size in bytes""" + + max_prompt_images: float | None = None + """Maximum number of images per prompt""" + + supported_media_types: list[str] | None = None + """MIME types the model accepts""" + + @staticmethod + def from_dict(obj: Any) -> 'ModelCapabilitiesOverrideLimitsVision': + assert isinstance(obj, dict) + max_prompt_image_size = from_union([from_float, from_none], obj.get("max_prompt_image_size")) + max_prompt_images = from_union([from_float, from_none], obj.get("max_prompt_images")) + supported_media_types = from_union([lambda x: from_list(from_str, x), from_none], obj.get("supported_media_types")) + return ModelCapabilitiesOverrideLimitsVision(max_prompt_image_size, max_prompt_images, supported_media_types) + + def to_dict(self) -> dict: + result: dict = {} + if self.max_prompt_image_size is not None: + result["max_prompt_image_size"] = from_union([to_float, from_none], self.max_prompt_image_size) + if self.max_prompt_images is not None: + result["max_prompt_images"] = from_union([to_float, from_none], self.max_prompt_images) + if self.supported_media_types is not None: + result["supported_media_types"] = from_union([lambda x: from_list(from_str, x), from_none], self.supported_media_types) + return result + + +@dataclass +class ModelCapabilitiesOverrideLimits: + """Token limits for prompts, outputs, and context window""" + + max_context_window_tokens: float | None = None + """Maximum total context window size in tokens""" + + max_output_tokens: float | None = None + max_prompt_tokens: float | None = None + vision: ModelCapabilitiesOverrideLimitsVision | None = None + + @staticmethod + def from_dict(obj: Any) -> 'ModelCapabilitiesOverrideLimits': + assert isinstance(obj, dict) + max_context_window_tokens = from_union([from_float, from_none], obj.get("max_context_window_tokens")) + max_output_tokens = from_union([from_float, from_none], obj.get("max_output_tokens")) + max_prompt_tokens = from_union([from_float, from_none], obj.get("max_prompt_tokens")) + vision = from_union([ModelCapabilitiesOverrideLimitsVision.from_dict, from_none], obj.get("vision")) + return ModelCapabilitiesOverrideLimits(max_context_window_tokens, max_output_tokens, max_prompt_tokens, vision) + + def to_dict(self) -> dict: + result: dict = {} + if self.max_context_window_tokens is not None: + result["max_context_window_tokens"] = from_union([to_float, from_none], self.max_context_window_tokens) + if self.max_output_tokens is not None: + result["max_output_tokens"] = from_union([to_float, from_none], self.max_output_tokens) + if self.max_prompt_tokens is not None: + result["max_prompt_tokens"] = from_union([to_float, from_none], self.max_prompt_tokens) + if self.vision is not None: + result["vision"] = from_union([lambda x: to_class(ModelCapabilitiesOverrideLimitsVision, x), from_none], self.vision) + return result + + +@dataclass +class ModelCapabilitiesOverrideSupports: + """Feature flags indicating what the model supports""" + + reasoning_effort: bool | None = None + vision: bool | None = None + + @staticmethod + def from_dict(obj: Any) -> 'ModelCapabilitiesOverrideSupports': + assert isinstance(obj, dict) + reasoning_effort = from_union([from_bool, from_none], obj.get("reasoningEffort")) + vision = from_union([from_bool, from_none], obj.get("vision")) + return ModelCapabilitiesOverrideSupports(reasoning_effort, vision) + + def to_dict(self) -> dict: + result: dict = {} + if self.reasoning_effort is not None: + result["reasoningEffort"] = from_union([from_bool, from_none], self.reasoning_effort) + if self.vision is not None: + result["vision"] = from_union([from_bool, from_none], self.vision) + return result + + +@dataclass +class ModelCapabilitiesOverride: + """Override individual model capabilities resolved by the runtime""" + + limits: ModelCapabilitiesOverrideLimits | None = None + """Token limits for prompts, outputs, and context window""" + + supports: ModelCapabilitiesOverrideSupports | None = None + """Feature flags indicating what the model supports""" + + @staticmethod + def from_dict(obj: Any) -> 'ModelCapabilitiesOverride': + assert isinstance(obj, dict) + limits = from_union([ModelCapabilitiesOverrideLimits.from_dict, from_none], obj.get("limits")) + supports = from_union([ModelCapabilitiesOverrideSupports.from_dict, from_none], obj.get("supports")) + return ModelCapabilitiesOverride(limits, supports) + + def to_dict(self) -> dict: + result: dict = {} + if self.limits is not None: + result["limits"] = from_union([lambda x: to_class(ModelCapabilitiesOverrideLimits, x), from_none], self.limits) + if self.supports is not None: + result["supports"] = from_union([lambda x: to_class(ModelCapabilitiesOverrideSupports, x), from_none], self.supports) + return result + + @dataclass class SessionModelSwitchToParams: model_id: str """Model identifier to switch to""" + model_capabilities: ModelCapabilitiesOverride | None = None + """Override individual model capabilities resolved by the runtime""" + reasoning_effort: str | None = None """Reasoning effort level to use for the model""" @@ -851,12 +1000,15 @@ class SessionModelSwitchToParams: def from_dict(obj: Any) -> 'SessionModelSwitchToParams': assert isinstance(obj, dict) model_id = from_str(obj.get("modelId")) + model_capabilities = from_union([ModelCapabilitiesOverride.from_dict, from_none], obj.get("modelCapabilities")) reasoning_effort = from_union([from_str, from_none], obj.get("reasoningEffort")) - return SessionModelSwitchToParams(model_id, reasoning_effort) + return SessionModelSwitchToParams(model_id, model_capabilities, reasoning_effort) def to_dict(self) -> dict: result: dict = {} result["modelId"] = from_str(self.model_id) + if self.model_capabilities is not None: + result["modelCapabilities"] = from_union([lambda x: to_class(ModelCapabilitiesOverride, x), from_none], self.model_capabilities) if self.reasoning_effort is not None: result["reasoningEffort"] = from_union([from_str, from_none], self.reasoning_effort) return result diff --git a/python/copilot/generated/session_events.py b/python/copilot/generated/session_events.py index 9b4267829..361718ebb 100644 --- a/python/copilot/generated/session_events.py +++ b/python/copilot/generated/session_events.py @@ -78,6 +78,15 @@ def from_int(x: Any) -> int: return x +class Action(Enum): + """The user action: "accept" (submitted form), "decline" (explicitly refused), or "cancel" + (dismissed) + """ + ACCEPT = "accept" + CANCEL = "cancel" + DECLINE = "decline" + + class AgentMode(Enum): """The agent mode that was active when this message was sent""" @@ -88,7 +97,7 @@ class AgentMode(Enum): @dataclass -class DataAgent: +class Agent: description: str """Description of what the agent does""" @@ -114,7 +123,7 @@ class DataAgent: """Model override for this agent, if set""" @staticmethod - def from_dict(obj: Any) -> 'DataAgent': + def from_dict(obj: Any) -> 'Agent': assert isinstance(obj, dict) description = from_str(obj.get("description")) display_name = from_str(obj.get("displayName")) @@ -124,7 +133,7 @@ def from_dict(obj: Any) -> 'DataAgent': tools = from_list(from_str, obj.get("tools")) user_invocable = from_bool(obj.get("userInvocable")) model = from_union([from_str, from_none], obj.get("model")) - return DataAgent(description, display_name, id, name, source, tools, user_invocable, model) + return Agent(description, display_name, id, name, source, tools, user_invocable, model) def to_dict(self) -> dict: result: dict = {} @@ -363,85 +372,6 @@ def to_dict(self) -> dict: return result -@dataclass -class BackgroundTasksAgent: - """A background agent task""" - - agent_id: str - """Unique identifier of the background agent""" - - agent_type: str - """Type of the background agent""" - - description: str | None = None - """Human-readable description of the agent task""" - - @staticmethod - def from_dict(obj: Any) -> 'BackgroundTasksAgent': - assert isinstance(obj, dict) - agent_id = from_str(obj.get("agentId")) - agent_type = from_str(obj.get("agentType")) - description = from_union([from_str, from_none], obj.get("description")) - return BackgroundTasksAgent(agent_id, agent_type, description) - - def to_dict(self) -> dict: - result: dict = {} - result["agentId"] = from_str(self.agent_id) - result["agentType"] = from_str(self.agent_type) - if self.description is not None: - result["description"] = from_union([from_str, from_none], self.description) - return result - - -@dataclass -class Shell: - """A background shell command""" - - shell_id: str - """Unique identifier of the background shell""" - - description: str | None = None - """Human-readable description of the shell command""" - - @staticmethod - def from_dict(obj: Any) -> 'Shell': - assert isinstance(obj, dict) - shell_id = from_str(obj.get("shellId")) - description = from_union([from_str, from_none], obj.get("description")) - return Shell(shell_id, description) - - def to_dict(self) -> dict: - result: dict = {} - result["shellId"] = from_str(self.shell_id) - if self.description is not None: - result["description"] = from_union([from_str, from_none], self.description) - return result - - -@dataclass -class BackgroundTasks: - """Background tasks still running when the agent became idle""" - - agents: list[BackgroundTasksAgent] - """Currently running background agents""" - - shells: list[Shell] - """Currently running background shell commands""" - - @staticmethod - def from_dict(obj: Any) -> 'BackgroundTasks': - assert isinstance(obj, dict) - agents = from_list(BackgroundTasksAgent.from_dict, obj.get("agents")) - shells = from_list(Shell.from_dict, obj.get("shells")) - return BackgroundTasks(agents, shells) - - def to_dict(self) -> dict: - result: dict = {} - result["agents"] = from_list(lambda x: to_class(BackgroundTasksAgent, x), self.agents) - result["shells"] = from_list(lambda x: to_class(Shell, x), self.shells) - return result - - @dataclass class CodeChanges: """Aggregate code change metrics for the session""" @@ -1384,7 +1314,7 @@ class ContentType(Enum): @dataclass -class Content: +class ContentElement: """A content block within a tool result, which may be text, terminal output, image, audio, or a resource @@ -1448,7 +1378,7 @@ class Content: """The embedded resource contents, either text or base64-encoded binary""" @staticmethod - def from_dict(obj: Any) -> 'Content': + def from_dict(obj: Any) -> 'ContentElement': assert isinstance(obj, dict) type = ContentType(obj.get("type")) text = from_union([from_str, from_none], obj.get("text")) @@ -1463,7 +1393,7 @@ def from_dict(obj: Any) -> 'Content': title = from_union([from_str, from_none], obj.get("title")) uri = from_union([from_str, from_none], obj.get("uri")) resource = from_union([Resource.from_dict, from_none], obj.get("resource")) - return Content(type, text, cwd, exit_code, data, mime_type, description, icons, name, size, title, uri, resource) + return ContentElement(type, text, cwd, exit_code, data, mime_type, description, icons, name, size, title, uri, resource) def to_dict(self) -> dict: result: dict = {} @@ -1516,7 +1446,7 @@ class Result: """Concise tool result text sent to the LLM for chat completion, potentially truncated for token efficiency """ - contents: list[Content] | None = None + contents: list[ContentElement] | None = None """Structured content blocks (text, images, audio, resources) returned by the tool in their native format """ @@ -1531,7 +1461,7 @@ class Result: def from_dict(obj: Any) -> 'Result': assert isinstance(obj, dict) content = from_union([from_str, from_none], obj.get("content")) - contents = from_union([lambda x: from_list(Content.from_dict, x), from_none], obj.get("contents")) + contents = from_union([lambda x: from_list(ContentElement.from_dict, x), from_none], obj.get("contents")) detailed_content = from_union([from_str, from_none], obj.get("detailedContent")) kind = from_union([ResultKind, from_none], obj.get("kind")) return Result(content, contents, detailed_content, kind) @@ -1541,7 +1471,7 @@ def to_dict(self) -> dict: if self.content is not None: result["content"] = from_union([from_str, from_none], self.content) if self.contents is not None: - result["contents"] = from_union([lambda x: from_list(lambda x: to_class(Content, x), x), from_none], self.contents) + result["contents"] = from_union([lambda x: from_list(lambda x: to_class(ContentElement, x), x), from_none], self.contents) if self.detailed_content is not None: result["detailedContent"] = from_union([from_str, from_none], self.detailed_content) if self.kind is not None: @@ -1779,7 +1709,7 @@ class Data: Error details for timeline display including message and optional diagnostic information - Payload indicating the agent is idle; includes any background tasks still in flight + Payload indicating the session is fully idle with no background tasks in flight Session title change payload containing the new display title @@ -1877,12 +1807,12 @@ class Data: User input request notification with question and optional predefined choices - User input request completion notification signaling UI dismissal + User input request completion with the user's response Elicitation request; may be form-based (structured input) or URL-based (browser redirect) - Elicitation request completion notification signaling UI dismissal + Elicitation request completion with the user's response Sampling request from an MCP server; contains the server name and a requestId for correlation @@ -1909,7 +1839,7 @@ class Data: Plan approval request with plan content and available user actions - Plan mode exit completion notification signaling UI dismissal + Plan mode exit completion with the user's approval decision and optional feedback """ already_in_use: bool | None = None """Whether the session was already in use by another client at start time @@ -1999,9 +1929,6 @@ class Data: aborted: bool | None = None """True when the preceding agentic loop was cancelled via abort signal""" - background_tasks: BackgroundTasks | None = None - """Background tasks still running when the agent became idle""" - title: str | None = None """The new display title for the session""" @@ -2273,7 +2200,7 @@ class Data: attachments: list[Attachment] | None = None """Files, selections, or GitHub references attached to the message""" - content: str | None = None + content: str | dict[str, float | bool | list[str] | str] | None = None """The user's message text as displayed in the timeline The complete extended thinking text from the model @@ -2285,6 +2212,8 @@ class Data: The system or developer prompt text The notification text, typically wrapped in XML tags + + The submitted form data when action is 'accept'; keys match the requested schema fields """ interaction_id: str | None = None """CAPI interaction ID for correlating this user message with its turn @@ -2547,6 +2476,12 @@ class Data: question: str | None = None """The question or prompt to present to the user""" + answer: str | None = None + """The user's answer to the input request""" + + was_freeform: bool | None = None + """Whether the answer was typed as free-form text rather than selected from choices""" + elicitation_source: str | None = None """The source that initiated the request (MCP server name, or absent for agent-initiated)""" @@ -2557,6 +2492,10 @@ class Data: requested_schema: RequestedSchema | None = None """JSON Schema describing the form fields to present to the user (form mode only)""" + action: Action | None = None + """The user action: "accept" (submitted form), "decline" (explicitly refused), or "cancel" + (dismissed) + """ mcp_request_id: float | str | None = None """The JSON-RPC request ID from the MCP protocol""" @@ -2605,10 +2544,22 @@ class Data: recommended_action: str | None = None """The recommended action for the user to take""" + approved: bool | None = None + """Whether the plan was approved by the user""" + + auto_approve_edits: bool | None = None + """Whether edits should be auto-approved without confirmation""" + + feedback: str | None = None + """Free-form feedback from the user if they requested changes to the plan""" + + selected_action: str | None = None + """Which action the user selected (e.g. 'autopilot', 'interactive', 'exit_only')""" + skills: list[Skill] | None = None """Array of resolved skill metadata""" - agents: list[DataAgent] | None = None + agents: list[Agent] | None = None """Array of loaded custom agent metadata""" errors: list[str] | None = None @@ -2648,7 +2599,6 @@ def from_dict(obj: Any) -> 'Data': status_code = from_union([from_int, from_none], obj.get("statusCode")) url = from_union([from_str, from_none], obj.get("url")) aborted = from_union([from_bool, from_none], obj.get("aborted")) - background_tasks = from_union([BackgroundTasks.from_dict, from_none], obj.get("backgroundTasks")) title = from_union([from_str, from_none], obj.get("title")) info_type = from_union([from_str, from_none], obj.get("infoType")) warning_type = from_union([from_str, from_none], obj.get("warningType")) @@ -2709,7 +2659,7 @@ def from_dict(obj: Any) -> 'Data': tokens_removed = from_union([from_float, from_none], obj.get("tokensRemoved")) agent_mode = from_union([AgentMode, from_none], obj.get("agentMode")) attachments = from_union([lambda x: from_list(Attachment.from_dict, x), from_none], obj.get("attachments")) - content = from_union([from_str, from_none], obj.get("content")) + content = from_union([from_str, lambda x: from_dict(lambda x: from_union([from_float, from_bool, lambda x: from_list(from_str, x), from_str], x), x), from_none], obj.get("content")) interaction_id = from_union([from_str, from_none], obj.get("interactionId")) source = from_union([from_str, from_none], obj.get("source")) transformed_content = from_union([from_str, from_none], obj.get("transformedContent")) @@ -2773,9 +2723,12 @@ def from_dict(obj: Any) -> 'Data': allow_freeform = from_union([from_bool, from_none], obj.get("allowFreeform")) choices = from_union([lambda x: from_list(from_str, x), from_none], obj.get("choices")) question = from_union([from_str, from_none], obj.get("question")) + answer = from_union([from_str, from_none], obj.get("answer")) + was_freeform = from_union([from_bool, from_none], obj.get("wasFreeform")) elicitation_source = from_union([from_str, from_none], obj.get("elicitationSource")) mode = from_union([Mode, from_none], obj.get("mode")) requested_schema = from_union([RequestedSchema.from_dict, from_none], obj.get("requestedSchema")) + action = from_union([Action, from_none], obj.get("action")) mcp_request_id = from_union([from_float, from_str, from_none], obj.get("mcpRequestId")) server_name = from_union([from_str, from_none], obj.get("serverName")) server_url = from_union([from_str, from_none], obj.get("serverUrl")) @@ -2790,14 +2743,18 @@ def from_dict(obj: Any) -> 'Data': actions = from_union([lambda x: from_list(from_str, x), from_none], obj.get("actions")) plan_content = from_union([from_str, from_none], obj.get("planContent")) recommended_action = from_union([from_str, from_none], obj.get("recommendedAction")) + approved = from_union([from_bool, from_none], obj.get("approved")) + auto_approve_edits = from_union([from_bool, from_none], obj.get("autoApproveEdits")) + feedback = from_union([from_str, from_none], obj.get("feedback")) + selected_action = from_union([from_str, from_none], obj.get("selectedAction")) skills = from_union([lambda x: from_list(Skill.from_dict, x), from_none], obj.get("skills")) - agents = from_union([lambda x: from_list(DataAgent.from_dict, x), from_none], obj.get("agents")) + agents = from_union([lambda x: from_list(Agent.from_dict, x), from_none], obj.get("agents")) errors = from_union([lambda x: from_list(from_str, x), from_none], obj.get("errors")) warnings = from_union([lambda x: from_list(from_str, x), from_none], obj.get("warnings")) servers = from_union([lambda x: from_list(Server.from_dict, x), from_none], obj.get("servers")) status = from_union([ServerStatus, from_none], obj.get("status")) extensions = from_union([lambda x: from_list(Extension.from_dict, x), from_none], obj.get("extensions")) - return Data(already_in_use, context, copilot_version, producer, reasoning_effort, remote_steerable, selected_model, session_id, start_time, version, event_count, resume_time, error_type, message, provider_call_id, stack, status_code, url, aborted, background_tasks, title, info_type, warning_type, new_model, previous_model, previous_reasoning_effort, new_mode, previous_mode, operation, path, handoff_time, host, remote_session_id, repository, source_type, summary, messages_removed_during_truncation, performed_by, post_truncation_messages_length, post_truncation_tokens_in_messages, pre_truncation_messages_length, pre_truncation_tokens_in_messages, token_limit, tokens_removed_during_truncation, events_removed, up_to_event_id, code_changes, conversation_tokens, current_model, current_tokens, error_reason, model_metrics, session_start_time, shutdown_type, system_tokens, tool_definitions_tokens, total_api_duration_ms, total_premium_requests, base_commit, branch, cwd, git_root, head_commit, host_type, is_initial, messages_length, checkpoint_number, checkpoint_path, compaction_tokens_used, error, messages_removed, post_compaction_tokens, pre_compaction_messages_length, pre_compaction_tokens, request_id, success, summary_content, tokens_removed, agent_mode, attachments, content, interaction_id, source, transformed_content, turn_id, intent, reasoning_id, delta_content, total_response_size_bytes, encrypted_content, message_id, output_tokens, parent_tool_call_id, phase, reasoning_opaque, reasoning_text, tool_requests, api_call_id, cache_read_tokens, cache_write_tokens, copilot_usage, cost, duration, initiator, input_tokens, inter_token_latency_ms, model, quota_snapshots, ttft_ms, reason, arguments, tool_call_id, tool_name, mcp_server_name, mcp_tool_name, partial_output, progress_message, is_user_requested, result, tool_telemetry, allowed_tools, description, name, plugin_name, plugin_version, agent_description, agent_display_name, agent_name, duration_ms, total_tokens, total_tool_calls, tools, hook_invocation_id, hook_type, input, output, metadata, role, kind, permission_request, resolved_by_hook, allow_freeform, choices, question, elicitation_source, mode, requested_schema, mcp_request_id, server_name, server_url, static_client_config, traceparent, tracestate, command, args, command_name, commands, ui, actions, plan_content, recommended_action, skills, agents, errors, warnings, servers, status, extensions) + return Data(already_in_use, context, copilot_version, producer, reasoning_effort, remote_steerable, selected_model, session_id, start_time, version, event_count, resume_time, error_type, message, provider_call_id, stack, status_code, url, aborted, title, info_type, warning_type, new_model, previous_model, previous_reasoning_effort, new_mode, previous_mode, operation, path, handoff_time, host, remote_session_id, repository, source_type, summary, messages_removed_during_truncation, performed_by, post_truncation_messages_length, post_truncation_tokens_in_messages, pre_truncation_messages_length, pre_truncation_tokens_in_messages, token_limit, tokens_removed_during_truncation, events_removed, up_to_event_id, code_changes, conversation_tokens, current_model, current_tokens, error_reason, model_metrics, session_start_time, shutdown_type, system_tokens, tool_definitions_tokens, total_api_duration_ms, total_premium_requests, base_commit, branch, cwd, git_root, head_commit, host_type, is_initial, messages_length, checkpoint_number, checkpoint_path, compaction_tokens_used, error, messages_removed, post_compaction_tokens, pre_compaction_messages_length, pre_compaction_tokens, request_id, success, summary_content, tokens_removed, agent_mode, attachments, content, interaction_id, source, transformed_content, turn_id, intent, reasoning_id, delta_content, total_response_size_bytes, encrypted_content, message_id, output_tokens, parent_tool_call_id, phase, reasoning_opaque, reasoning_text, tool_requests, api_call_id, cache_read_tokens, cache_write_tokens, copilot_usage, cost, duration, initiator, input_tokens, inter_token_latency_ms, model, quota_snapshots, ttft_ms, reason, arguments, tool_call_id, tool_name, mcp_server_name, mcp_tool_name, partial_output, progress_message, is_user_requested, result, tool_telemetry, allowed_tools, description, name, plugin_name, plugin_version, agent_description, agent_display_name, agent_name, duration_ms, total_tokens, total_tool_calls, tools, hook_invocation_id, hook_type, input, output, metadata, role, kind, permission_request, resolved_by_hook, allow_freeform, choices, question, answer, was_freeform, elicitation_source, mode, requested_schema, action, mcp_request_id, server_name, server_url, static_client_config, traceparent, tracestate, command, args, command_name, commands, ui, actions, plan_content, recommended_action, approved, auto_approve_edits, feedback, selected_action, skills, agents, errors, warnings, servers, status, extensions) def to_dict(self) -> dict: result: dict = {} @@ -2839,8 +2796,6 @@ def to_dict(self) -> dict: result["url"] = from_union([from_str, from_none], self.url) if self.aborted is not None: result["aborted"] = from_union([from_bool, from_none], self.aborted) - if self.background_tasks is not None: - result["backgroundTasks"] = from_union([lambda x: to_class(BackgroundTasks, x), from_none], self.background_tasks) if self.title is not None: result["title"] = from_union([from_str, from_none], self.title) if self.info_type is not None: @@ -2962,7 +2917,7 @@ def to_dict(self) -> dict: if self.attachments is not None: result["attachments"] = from_union([lambda x: from_list(lambda x: to_class(Attachment, x), x), from_none], self.attachments) if self.content is not None: - result["content"] = from_union([from_str, from_none], self.content) + result["content"] = from_union([from_str, lambda x: from_dict(lambda x: from_union([to_float, from_bool, lambda x: from_list(from_str, x), from_str], x), x), from_none], self.content) if self.interaction_id is not None: result["interactionId"] = from_union([from_str, from_none], self.interaction_id) if self.source is not None: @@ -3089,12 +3044,18 @@ def to_dict(self) -> dict: result["choices"] = from_union([lambda x: from_list(from_str, x), from_none], self.choices) if self.question is not None: result["question"] = from_union([from_str, from_none], self.question) + if self.answer is not None: + result["answer"] = from_union([from_str, from_none], self.answer) + if self.was_freeform is not None: + result["wasFreeform"] = from_union([from_bool, from_none], self.was_freeform) if self.elicitation_source is not None: result["elicitationSource"] = from_union([from_str, from_none], self.elicitation_source) if self.mode is not None: result["mode"] = from_union([lambda x: to_enum(Mode, x), from_none], self.mode) if self.requested_schema is not None: result["requestedSchema"] = from_union([lambda x: to_class(RequestedSchema, x), from_none], self.requested_schema) + if self.action is not None: + result["action"] = from_union([lambda x: to_enum(Action, x), from_none], self.action) if self.mcp_request_id is not None: result["mcpRequestId"] = from_union([to_float, from_str, from_none], self.mcp_request_id) if self.server_name is not None: @@ -3123,10 +3084,18 @@ def to_dict(self) -> dict: result["planContent"] = from_union([from_str, from_none], self.plan_content) if self.recommended_action is not None: result["recommendedAction"] = from_union([from_str, from_none], self.recommended_action) + if self.approved is not None: + result["approved"] = from_union([from_bool, from_none], self.approved) + if self.auto_approve_edits is not None: + result["autoApproveEdits"] = from_union([from_bool, from_none], self.auto_approve_edits) + if self.feedback is not None: + result["feedback"] = from_union([from_str, from_none], self.feedback) + if self.selected_action is not None: + result["selectedAction"] = from_union([from_str, from_none], self.selected_action) if self.skills is not None: result["skills"] = from_union([lambda x: from_list(lambda x: to_class(Skill, x), x), from_none], self.skills) if self.agents is not None: - result["agents"] = from_union([lambda x: from_list(lambda x: to_class(DataAgent, x), x), from_none], self.agents) + result["agents"] = from_union([lambda x: from_list(lambda x: to_class(Agent, x), x), from_none], self.agents) if self.errors is not None: result["errors"] = from_union([lambda x: from_list(from_str, x), from_none], self.errors) if self.warnings is not None: @@ -3236,7 +3205,7 @@ class SessionEvent: Error details for timeline display including message and optional diagnostic information - Payload indicating the agent is idle; includes any background tasks still in flight + Payload indicating the session is fully idle with no background tasks in flight Session title change payload containing the new display title @@ -3334,12 +3303,12 @@ class SessionEvent: User input request notification with question and optional predefined choices - User input request completion notification signaling UI dismissal + User input request completion with the user's response Elicitation request; may be form-based (structured input) or URL-based (browser redirect) - Elicitation request completion notification signaling UI dismissal + Elicitation request completion with the user's response Sampling request from an MCP server; contains the server name and a requestId for correlation @@ -3366,7 +3335,7 @@ class SessionEvent: Plan approval request with plan content and available user actions - Plan mode exit completion notification signaling UI dismissal + Plan mode exit completion with the user's approval decision and optional feedback """ id: UUID """Unique event identifier (UUID v4), generated when the event is emitted""" diff --git a/python/copilot/session.py b/python/copilot/session.py index 9bf384fbe..59ec8532b 100644 --- a/python/copilot/session.py +++ b/python/copilot/session.py @@ -17,7 +17,7 @@ from collections.abc import Awaitable, Callable from dataclasses import dataclass from types import TracebackType -from typing import Any, Literal, NotRequired, Required, TypedDict, cast +from typing import TYPE_CHECKING, Any, Literal, NotRequired, Required, TypedDict, cast from ._jsonrpc import JsonRpcError, ProcessExitedError from ._telemetry import get_trace_context, trace_context @@ -41,6 +41,9 @@ SessionUIHandlePendingElicitationParams, SessionUIHandlePendingElicitationParamsResult, ) +from .generated.rpc import ( + ModelCapabilitiesOverride as _RpcModelCapabilitiesOverride, +) from .generated.session_events import ( PermissionRequest, SessionEvent, @@ -49,6 +52,9 @@ ) from .tools import Tool, ToolHandler, ToolInvocation, ToolResult +if TYPE_CHECKING: + from .client import ModelCapabilitiesOverride + # Re-export SessionEvent under an alias used internally SessionEventTypeAlias = SessionEvent @@ -1882,7 +1888,13 @@ async def abort(self) -> None: """ await self._client.request("session.abort", {"sessionId": self.session_id}) - async def set_model(self, model: str, *, reasoning_effort: str | None = None) -> None: + async def set_model( + self, + model: str, + *, + reasoning_effort: str | None = None, + model_capabilities: ModelCapabilitiesOverride | None = None, + ) -> None: """ Change the model for this session. @@ -1893,6 +1905,7 @@ async def set_model(self, model: str, *, reasoning_effort: str | None = None) -> model: Model ID to switch to (e.g., "gpt-4.1", "claude-sonnet-4"). reasoning_effort: Optional reasoning effort level for the new model (e.g., "low", "medium", "high", "xhigh"). + model_capabilities: Override individual model capabilities resolved by the runtime. Raises: Exception: If the session has been destroyed or the connection fails. @@ -1901,10 +1914,18 @@ async def set_model(self, model: str, *, reasoning_effort: str | None = None) -> >>> await session.set_model("gpt-4.1") >>> await session.set_model("claude-sonnet-4.6", reasoning_effort="high") """ + rpc_caps = None + if model_capabilities is not None: + from .client import _capabilities_to_dict + + rpc_caps = _RpcModelCapabilitiesOverride.from_dict( + _capabilities_to_dict(model_capabilities) + ) await self.rpc.model.switch_to( SessionModelSwitchToParams( model_id=model, reasoning_effort=reasoning_effort, + model_capabilities=rpc_caps, ) ) diff --git a/python/e2e/test_session.py b/python/e2e/test_session.py index c78b93ce1..1a249b516 100644 --- a/python/e2e/test_session.py +++ b/python/e2e/test_session.py @@ -1,5 +1,6 @@ """E2E Session Tests""" +import base64 import os import pytest @@ -17,7 +18,7 @@ class TestSessions: async def test_should_create_and_disconnect_sessions(self, ctx: E2ETestContext): session = await ctx.client.create_session( - on_permission_request=PermissionHandler.approve_all, model="fake-test-model" + on_permission_request=PermissionHandler.approve_all, model="claude-sonnet-4.5" ) assert session.session_id @@ -25,7 +26,7 @@ async def test_should_create_and_disconnect_sessions(self, ctx: E2ETestContext): assert len(messages) > 0 assert messages[0].type.value == "session.start" assert messages[0].data.session_id == session.session_id - assert messages[0].data.selected_model == "fake-test-model" + assert messages[0].data.selected_model == "claude-sonnet-4.5" await session.disconnect() @@ -611,18 +612,21 @@ def on_event(event): assert event.data.reasoning_effort == "high" async def test_should_accept_blob_attachments(self, ctx: E2ETestContext): - session = await ctx.client.create_session( - on_permission_request=PermissionHandler.approve_all - ) - - # 1x1 transparent PNG pixel, base64-encoded + # Write the image to disk so the model can view it pixel_png = ( "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAY" "AAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhg" "GAWjR9awAAAABJRU5ErkJggg==" ) + png_path = os.path.join(ctx.work_dir, "test-pixel.png") + with open(png_path, "wb") as f: + f.write(base64.b64decode(pixel_png)) - await session.send( + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all + ) + + await session.send_and_wait( "Describe this image", attachments=[ { @@ -634,7 +638,6 @@ async def test_should_accept_blob_attachments(self, ctx: E2ETestContext): ], ) - # Just verify send doesn't throw — blob attachment support varies by runtime await session.disconnect() diff --git a/python/e2e/test_session_config.py b/python/e2e/test_session_config.py new file mode 100644 index 000000000..e9c203b79 --- /dev/null +++ b/python/e2e/test_session_config.py @@ -0,0 +1,99 @@ +"""E2E tests for session configuration including model capabilities overrides.""" + +import base64 +import os + +import pytest + +from copilot import ModelCapabilitiesOverride, ModelSupportsOverride +from copilot.session import PermissionHandler + +from .testharness import E2ETestContext + +pytestmark = pytest.mark.asyncio(loop_scope="module") + + +def has_image_url_content(exchanges: list[dict]) -> bool: + """Check if any exchange contains an image_url content part in user messages.""" + for ex in exchanges: + for msg in ex.get("request", {}).get("messages", []): + if msg.get("role") == "user" and isinstance(msg.get("content"), list): + if any(p.get("type") == "image_url" for p in msg["content"]): + return True + return False + + +PNG_1X1 = base64.b64decode( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==" +) +VIEW_IMAGE_PROMPT = "Use the view tool to look at the file test.png and describe what you see" + + +class TestSessionConfig: + """Tests for session configuration including model capabilities overrides.""" + + async def test_vision_disabled_then_enabled_via_setmodel(self, ctx: E2ETestContext): + png_path = os.path.join(ctx.work_dir, "test.png") + with open(png_path, "wb") as f: + f.write(PNG_1X1) + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + model_capabilities=ModelCapabilitiesOverride( + supports=ModelSupportsOverride(vision=False) + ), + ) + + # Turn 1: vision off — no image_url expected + await session.send_and_wait(VIEW_IMAGE_PROMPT) + traffic_after_t1 = await ctx.get_exchanges() + assert not has_image_url_content(traffic_after_t1) + + # Switch vision on + await session.set_model( + "claude-sonnet-4.5", + model_capabilities=ModelCapabilitiesOverride( + supports=ModelSupportsOverride(vision=True) + ), + ) + + # Turn 2: vision on — image_url expected in new exchanges + await session.send_and_wait(VIEW_IMAGE_PROMPT) + traffic_after_t2 = await ctx.get_exchanges() + new_exchanges = traffic_after_t2[len(traffic_after_t1) :] + assert has_image_url_content(new_exchanges) + + await session.disconnect() + + async def test_vision_enabled_then_disabled_via_setmodel(self, ctx: E2ETestContext): + png_path = os.path.join(ctx.work_dir, "test.png") + with open(png_path, "wb") as f: + f.write(PNG_1X1) + + session = await ctx.client.create_session( + on_permission_request=PermissionHandler.approve_all, + model_capabilities=ModelCapabilitiesOverride( + supports=ModelSupportsOverride(vision=True) + ), + ) + + # Turn 1: vision on — image_url expected + await session.send_and_wait(VIEW_IMAGE_PROMPT) + traffic_after_t1 = await ctx.get_exchanges() + assert has_image_url_content(traffic_after_t1) + + # Switch vision off + await session.set_model( + "claude-sonnet-4.5", + model_capabilities=ModelCapabilitiesOverride( + supports=ModelSupportsOverride(vision=False) + ), + ) + + # Turn 2: vision off — no image_url expected in new exchanges + await session.send_and_wait(VIEW_IMAGE_PROMPT) + traffic_after_t2 = await ctx.get_exchanges() + new_exchanges = traffic_after_t2[len(traffic_after_t1) :] + assert not has_image_url_content(new_exchanges) + + await session.disconnect() diff --git a/scripts/codegen/csharp.ts b/scripts/codegen/csharp.ts index 304324421..d60cfbb96 100644 --- a/scripts/codegen/csharp.ts +++ b/scripts/codegen/csharp.ts @@ -620,7 +620,7 @@ function resolveRpcType(schema: JSONSchema7, isRequired: boolean, parentClassNam return isRequired ? enumName : `${enumName}?`; } if (schema.type === "object" && schema.properties) { - const className = `${parentClassName}${propName}`; + const className = (schema.title as string) ?? `${parentClassName}${propName}`; classes.push(emitRpcClass(className, schema, "public", classes)); return isRequired ? className : `${className}?`; } diff --git a/scripts/codegen/python.ts b/scripts/codegen/python.ts index 0340cf1f1..71e44943f 100644 --- a/scripts/codegen/python.ts +++ b/scripts/codegen/python.ts @@ -57,14 +57,36 @@ function replaceBalancedBrackets(code: string, prefix: string, replacer: (inner: return result; } +/** Split a string by commas, but only at the top bracket depth (ignores commas inside [...]) */ +function splitTopLevelCommas(s: string): string[] { + const parts: string[] = []; + let depth = 0; + let start = 0; + for (let i = 0; i < s.length; i++) { + if (s[i] === "[") depth++; + else if (s[i] === "]") depth--; + else if (s[i] === "," && depth === 0) { + parts.push(s.slice(start, i)); + start = i + 1; + } + } + parts.push(s.slice(start)); + return parts; +} + function modernizePython(code: string): string { // Replace Optional[X] with X | None (handles arbitrarily nested brackets) code = replaceBalancedBrackets(code, "Optional", (inner) => `${inner} | None`); - // Replace Union[X, Y] with X | Y - code = replaceBalancedBrackets(code, "Union", (inner) => { - return inner.split(",").map((s: string) => s.trim()).join(" | "); - }); + // Replace Union[X, Y] with X | Y (split only at top-level commas, not inside brackets) + // Run iteratively to handle nested Union inside Dict/List + let prev = ""; + while (prev !== code) { + prev = code; + code = replaceBalancedBrackets(code, "Union", (inner) => { + return splitTopLevelCommas(inner).map((s: string) => s.trim()).join(" | "); + }); + } // Replace List[X] with list[X] code = code.replace(/\bList\[/g, "list["); diff --git a/test/harness/package-lock.json b/test/harness/package-lock.json index 5d055e680..67e294c83 100644 --- a/test/harness/package-lock.json +++ b/test/harness/package-lock.json @@ -9,7 +9,7 @@ "version": "1.0.0", "license": "ISC", "devDependencies": { - "@github/copilot": "^1.0.17", + "@github/copilot": "^1.0.20-1", "@modelcontextprotocol/sdk": "^1.26.0", "@types/node": "^25.3.3", "openai": "^6.17.0", @@ -462,27 +462,27 @@ } }, "node_modules/@github/copilot": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.17.tgz", - "integrity": "sha512-RTJ+kEKOdidjuOs8ozsoBdz+94g7tFJIEu5kz1P2iwJhsL+iIA5rtn9/jXOF0hAI3CLSXKZoSd66cqHrn4rb1A==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.20-1.tgz", + "integrity": "sha512-a34M4P6XcKFy1sDubqn54qakQxeWwA44vKaOh3oNZT8vgna9R4ap2NYGnM8fn7XDAdlJ9QgW6Xt7dfPGwKkt/A==", "dev": true, "license": "SEE LICENSE IN LICENSE.md", "bin": { "copilot": "npm-loader.js" }, "optionalDependencies": { - "@github/copilot-darwin-arm64": "1.0.17", - "@github/copilot-darwin-x64": "1.0.17", - "@github/copilot-linux-arm64": "1.0.17", - "@github/copilot-linux-x64": "1.0.17", - "@github/copilot-win32-arm64": "1.0.17", - "@github/copilot-win32-x64": "1.0.17" + "@github/copilot-darwin-arm64": "1.0.20-1", + "@github/copilot-darwin-x64": "1.0.20-1", + "@github/copilot-linux-arm64": "1.0.20-1", + "@github/copilot-linux-x64": "1.0.20-1", + "@github/copilot-win32-arm64": "1.0.20-1", + "@github/copilot-win32-x64": "1.0.20-1" } }, "node_modules/@github/copilot-darwin-arm64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.17.tgz", - "integrity": "sha512-LSv66P8611y/UjTESnaHLYqLl9kA9yBYsaocZPQoOsvMgCmktgaBgUWq+KMpLMicaFN0jBAE5F0Ve7dW6N9X3A==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.20-1.tgz", + "integrity": "sha512-tip/KyjhRQG7OMAR8rBWrFcPk3XFQQlajozIMPxEA7+qwgMBOlaGcO0iuDEdF5vAtYXhUPPAI/tbuUqkueoJEA==", "cpu": [ "arm64" ], @@ -497,9 +497,9 @@ } }, "node_modules/@github/copilot-darwin-x64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.17.tgz", - "integrity": "sha512-yqRS0/8kYTGl4VvfJ/QOtHTeYF+DnAWNUReZgt2U0AEP3zgj4z4hxSH7D2PsO/488L4KsBmmcnJr13HmBGiT/w==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.20-1.tgz", + "integrity": "sha512-d/Etng6335TF1Dcw37XFtjKKZqQbqh9trXg5GhMySUamo4UolykylWJuhs+suCx2JJc1lGzPVAdGOxAvj+4P3Q==", "cpu": [ "x64" ], @@ -514,9 +514,9 @@ } }, "node_modules/@github/copilot-linux-arm64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.17.tgz", - "integrity": "sha512-TOK0ma0A24zmQJslkGxUk+KnMFpiqquWEXB5sIv/5Ci45Qi7s0BRWTnqtiJ8Vahwb/wkja6KarHkLA27+ETGUA==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.20-1.tgz", + "integrity": "sha512-ptwwVk/uMEoVdGTbhfC8CLtSCq3agnRKlD+iojabcg5K0y0HbaEGIaOeJle0uARpqeyLADgoUkMbth/wWQI2gQ==", "cpu": [ "arm64" ], @@ -531,9 +531,9 @@ } }, "node_modules/@github/copilot-linux-x64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.17.tgz", - "integrity": "sha512-4Yum3uaAuTM/SiNtzchsO/G/144Bi/Z4FEcearW6WsGDvS6cRwSJeudOM0y4aoy4BHcv8+yw7YuXH5BHC3SAiA==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.20-1.tgz", + "integrity": "sha512-sUuR5uVR1/Ndew/pSEQP4vLy2iohW+PMD96R+gzJkF77soe+PfFR7R6Py1VWmwAK1MDblyilDfMcusYLXK48LA==", "cpu": [ "x64" ], @@ -548,9 +548,9 @@ } }, "node_modules/@github/copilot-win32-arm64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.17.tgz", - "integrity": "sha512-I1ferbfQ0aS149WyEUw6XS1sFixwTUUm13BPBQ3yMzD8G2SaoxTsdYdlhZpkVfkfh/rUYyvMKKi9VNxoVYOlDA==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.20-1.tgz", + "integrity": "sha512-gk4belEoOHfQH2pJf0GPh2t1N4suIg1mhwJQHveGi5av22XZzYjY7yarNom+YCqc692MAuYsfNF0wXXSij3wBg==", "cpu": [ "arm64" ], @@ -565,9 +565,9 @@ } }, "node_modules/@github/copilot-win32-x64": { - "version": "1.0.17", - "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.17.tgz", - "integrity": "sha512-kjiOxY9ibS+rPp9XFpPdfdYzluEL3SHN8R5/fnA7RO+kZEJ4FDKWJjAiec3tgVkEHQT3UwNuVa/u3TdfYNF15w==", + "version": "1.0.20-1", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.20-1.tgz", + "integrity": "sha512-ypRD1iawRw8a0qzhp4fq4ZqvqL86mk2UZNWyuTM8HOe2o3+SrZbveXpEk7gUYJ4ShLhqLVywJHs4+4yPkv5p+A==", "cpu": [ "x64" ], diff --git a/test/harness/package.json b/test/harness/package.json index 257caf35c..48f43e856 100644 --- a/test/harness/package.json +++ b/test/harness/package.json @@ -11,7 +11,7 @@ "test": "vitest run" }, "devDependencies": { - "@github/copilot": "^1.0.17", + "@github/copilot": "^1.0.20-1", "@modelcontextprotocol/sdk": "^1.26.0", "@types/node": "^25.3.3", "openai": "^6.17.0", diff --git a/test/harness/replayingCapiProxy.ts b/test/harness/replayingCapiProxy.ts index 53d8c2b07..03dcd190f 100644 --- a/test/harness/replayingCapiProxy.ts +++ b/test/harness/replayingCapiProxy.ts @@ -341,6 +341,7 @@ export class ReplayingCapiProxy extends CapturingHttpProxy { state.testInfo, state.workDir, state.toolResultNormalizers, + state.storedData, ); return; } @@ -375,36 +376,100 @@ async function writeCapturesToDisk( } } +/** + * Produces a human-readable explanation of why no stored conversation matched + * a given request. For each stored conversation it reports the first reason + * matching failed, mirroring the logic in {@link findAssistantIndexAfterPrefix}. + */ +function diagnoseMatchFailure( + requestMessages: NormalizedMessage[], + rawMessages: unknown[], + storedData: NormalizedData | undefined, +): string { + const lines: string[] = []; + lines.push(`Request has ${requestMessages.length} normalized messages (${rawMessages.length} raw).`); + + if (!storedData || storedData.conversations.length === 0) { + lines.push("No stored conversations to match against."); + return lines.join("\n"); + } + + for (let c = 0; c < storedData.conversations.length; c++) { + const saved = storedData.conversations[c].messages; + + // Same check as findAssistantIndexAfterPrefix: request must be a strict prefix + if (requestMessages.length >= saved.length) { + lines.push( + `Conversation ${c} (${saved.length} messages): ` + + `skipped — request has ${requestMessages.length} messages, need fewer than ${saved.length}.`, + ); + continue; + } + + // Find the first message that doesn't match + let mismatchIndex = -1; + for (let i = 0; i < requestMessages.length; i++) { + if (JSON.stringify(requestMessages[i]) !== JSON.stringify(saved[i])) { + mismatchIndex = i; + break; + } + } + + if (mismatchIndex >= 0) { + const raw = mismatchIndex < rawMessages.length + ? JSON.stringify(rawMessages[mismatchIndex]).slice(0, 300) + : "(no raw message)"; + lines.push( + `Conversation ${c} (${saved.length} messages): mismatch at message ${mismatchIndex}:`, + ` request: ${JSON.stringify(requestMessages[mismatchIndex]).slice(0, 200)}`, + ` saved: ${JSON.stringify(saved[mismatchIndex]).slice(0, 200)}`, + ` raw (pre-normalization): ${raw}`, + ); + } else { + // Prefix matched, but the next saved message isn't an assistant turn + const nextRole = saved[requestMessages.length]?.role ?? "(end of conversation)"; + lines.push( + `Conversation ${c} (${saved.length} messages): ` + + `prefix matched, but next saved message is "${nextRole}" (need "assistant").`, + ); + } + } + + return lines.join("\n"); +} + async function exitWithNoMatchingRequestError( options: PerformRequestOptions, testInfo: { file: string; line?: number } | undefined, workDir: string, toolResultNormalizers: ToolResultNormalizer[], + storedData?: NormalizedData, ) { - const parts: string[] = []; - if (testInfo?.file) parts.push(`file=${testInfo.file}`); - if (typeof testInfo?.line === "number") parts.push(`line=${testInfo.line}`); - const header = parts.length ? ` ${parts.join(",")}` : ""; - - let finalMessageInfo: string; + let diagnostics: string; try { - const normalized = await parseAndNormalizeRequest( - options.body, - workDir, - toolResultNormalizers, - ); - const normalizedMessages = normalized.conversations[0]?.messages ?? []; - finalMessageInfo = JSON.stringify( - normalizedMessages[normalizedMessages.length - 1], - ); - } catch { - finalMessageInfo = `(unable to parse request body: ${options.body?.slice(0, 200) ?? "empty"})`; + const normalized = await parseAndNormalizeRequest(options.body, workDir, toolResultNormalizers); + const requestMessages = normalized.conversations[0]?.messages ?? []; + + let rawMessages: unknown[] = []; + try { + rawMessages = (JSON.parse(options.body ?? "{}") as { messages?: unknown[] }).messages ?? []; + } catch { /* non-JSON body */ } + + diagnostics = diagnoseMatchFailure(requestMessages, rawMessages, storedData); + } catch (e) { + diagnostics = `(unable to parse request for diagnostics: ${e})`; } const errorMessage = - `No cached response found for ${options.requestOptions.method} ${options.requestOptions.path}. ` + - `Final message: ${finalMessageInfo}`; - process.stderr.write(`::error${header}::${errorMessage}\n`); + `No cached response found for ${options.requestOptions.method} ${options.requestOptions.path}.\n${diagnostics}`; + + // Format as GitHub Actions annotation when test location is available + const annotation = [ + testInfo?.file ? `file=${testInfo.file}` : "", + typeof testInfo?.line === "number" ? `line=${testInfo.line}` : "", + ].filter(Boolean).join(","); + process.stderr.write(`::error${annotation ? ` ${annotation}` : ""}::${errorMessage}\n`); + options.onError(new Error(errorMessage)); } @@ -688,6 +753,18 @@ function transformOpenAIRequestMessage( content = "${system}"; } else if (m.role === "user" && typeof m.content === "string") { content = normalizeUserMessage(m.content); + } else if (m.role === "user" && Array.isArray(m.content)) { + // Multimodal user messages have array content with text and image_url parts. + // Extract and normalize text parts; represent image_url parts as a stable marker. + const parts: string[] = []; + for (const part of m.content) { + if (typeof part === "object" && part.type === "text" && typeof part.text === "string") { + parts.push(normalizeUserMessage(part.text)); + } else if (typeof part === "object" && part.type === "image_url") { + parts.push("[image]"); + } + } + content = parts.join("\n") || undefined; } else if (m.role === "tool" && typeof m.content === "string") { // If it's a JSON tool call result, normalize the whitespace and property ordering. // For successful tool results wrapped in {resultType, textResultForLlm}, unwrap to diff --git a/test/snapshots/session/should_accept_blob_attachments.yaml b/test/snapshots/session/should_accept_blob_attachments.yaml index 89e5d47ed..fe584aa8b 100644 --- a/test/snapshots/session/should_accept_blob_attachments.yaml +++ b/test/snapshots/session/should_accept_blob_attachments.yaml @@ -5,4 +5,72 @@ conversations: - role: system content: ${system} - role: user - content: Describe this image + content: |- + Describe this image + test-pixel.png + [image] + - role: assistant + content: I'll view the image file to describe it for you. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - role: assistant + tool_calls: + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test-pixel.png"}' + - messages: + - role: system + content: ${system} + - role: user + content: |- + Describe this image + test-pixel.png + [image] + - role: assistant + content: I'll view the image file to describe it for you. + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test-pixel.png"}' + - role: tool + tool_call_id: toolcall_0 + content: Intent logged + - role: tool + tool_call_id: toolcall_1 + content: Viewed image file successfully. + - role: user + content: |- + Image file at path ${workdir}/test-pixel.png + [image] + - role: assistant + content: >- + This is an extremely small image - it appears to be essentially a **single white pixel** or a very tiny white + square on a transparent background. The image is minimal in size and content, likely just a few pixels in + dimension. It's the kind of test image that might be used for: + + + - Testing image loading/rendering functionality + + - Placeholder purposes + + - Minimal file size requirements + + - Image processing pipeline validation + + + The file name "test-pixel.png" confirms this is indeed a test image consisting of just a single pixel or very + small pixel cluster. diff --git a/test/snapshots/session_config/should_accept_blob_attachments.yaml b/test/snapshots/session_config/should_accept_blob_attachments.yaml index 89e5d47ed..672ca74d4 100644 --- a/test/snapshots/session_config/should_accept_blob_attachments.yaml +++ b/test/snapshots/session_config/should_accept_blob_attachments.yaml @@ -5,4 +5,23 @@ conversations: - role: system content: ${system} - role: user - content: Describe this image + content: |- + What color is this pixel? Reply in one word. + pixel.png + [image] + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: view + arguments: '{"path":"${workdir}/pixel.png"}' + - role: tool + tool_call_id: toolcall_0 + content: Viewed image file successfully. + - role: user + content: |- + Image file at path ${workdir}/pixel.png + [image] + - role: assistant + content: Red diff --git a/test/snapshots/session_config/should_accept_message_attachments.yaml b/test/snapshots/session_config/should_accept_message_attachments.yaml index 3ea9f830a..2a345b4b3 100644 --- a/test/snapshots/session_config/should_accept_message_attachments.yaml +++ b/test/snapshots/session_config/should_accept_message_attachments.yaml @@ -13,3 +13,53 @@ conversations: * ${workdir}/attached.txt (1 lines) + - role: assistant + content: I'll read the attached file and summarize it for you. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Reading attached file"}' + - role: assistant + tool_calls: + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/attached.txt"}' + - messages: + - role: system + content: ${system} + - role: user + content: |- + Summarize the attached file + + + + + * ${workdir}/attached.txt (1 lines) + + - role: assistant + content: I'll read the attached file and summarize it for you. + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Reading attached file"}' + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/attached.txt"}' + - role: tool + tool_call_id: toolcall_0 + content: Intent logged + - role: tool + tool_call_id: toolcall_1 + content: 1. This file is attached + - role: assistant + content: The attached file contains a single line of text that simply states "This file is attached" - it's a minimal + test file confirming its attachment status. diff --git a/test/snapshots/session_config/vision_disabled_then_enabled_via_setmodel.yaml b/test/snapshots/session_config/vision_disabled_then_enabled_via_setmodel.yaml new file mode 100644 index 000000000..b9e414328 --- /dev/null +++ b/test/snapshots/session_config/vision_disabled_then_enabled_via_setmodel.yaml @@ -0,0 +1,120 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - role: assistant + tool_calls: + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - messages: + - role: system + content: ${system} + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - role: tool + tool_call_id: toolcall_0 + content: Intent logged + - role: tool + tool_call_id: toolcall_1 + content: Viewed image file successfully. + - role: assistant + content: The image shows a **GitHub Copilot logo** - a hexagonal icon with an orange/coral colored background containing + a stylized white design that represents the Copilot brand mark. The logo has the characteristic GitHub Copilot + appearance with its geometric hexagon shape and minimal design aesthetic. + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + tool_calls: + - id: toolcall_2 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - role: assistant + tool_calls: + - id: toolcall_3 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - messages: + - role: system + content: ${system} + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - role: tool + tool_call_id: toolcall_0 + content: Intent logged + - role: tool + tool_call_id: toolcall_1 + content: Viewed image file successfully. + - role: assistant + content: The image shows a **GitHub Copilot logo** - a hexagonal icon with an orange/coral colored background containing + a stylized white design that represents the Copilot brand mark. The logo has the characteristic GitHub Copilot + appearance with its geometric hexagon shape and minimal design aesthetic. + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + tool_calls: + - id: toolcall_2 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - id: toolcall_3 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - role: tool + tool_call_id: toolcall_2 + content: Intent logged + - role: tool + tool_call_id: toolcall_3 + content: Viewed image file successfully. + - role: user + content: |- + Image file at path ${workdir}/test.png + [image] + - role: assistant + content: The image appears to be a **very small, mostly blank white or off-white rectangular image**. There doesn't seem + to be any visible content, text, or graphics - just a plain light-colored background. It looks like either an + empty/blank image file or a very minimalistic placeholder image. diff --git a/test/snapshots/session_config/vision_enabled_then_disabled_via_setmodel.yaml b/test/snapshots/session_config/vision_enabled_then_disabled_via_setmodel.yaml new file mode 100644 index 000000000..29b201818 --- /dev/null +++ b/test/snapshots/session_config/vision_enabled_then_disabled_via_setmodel.yaml @@ -0,0 +1,167 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + content: I'll use the view tool to examine the test.png file. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - role: assistant + tool_calls: + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - messages: + - role: system + content: ${system} + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + content: I'll use the view tool to examine the test.png file. + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - role: tool + tool_call_id: toolcall_0 + content: Intent logged + - role: tool + tool_call_id: toolcall_1 + content: Viewed image file successfully. + - role: user + content: |- + Image file at path ${workdir}/test.png + [image] + - role: assistant + content: >- + Looking at the test.png image, I can see what appears to be a solid beige or tan colored square. It's a very + simple image - just a single uniform color with no visible patterns, text, or other elements. The color is a + light peachy-tan shade, similar to a skin tone or sand color. + + + This looks like it could be a test image or placeholder, possibly just a single pixel or small solid color + block that's been created for testing purposes. + - messages: + - role: system + content: ${system} + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + content: I'll use the view tool to examine the test.png file. + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - role: tool + tool_call_id: toolcall_0 + content: Intent logged + - role: tool + tool_call_id: toolcall_1 + content: Viewed image file successfully. + - role: assistant + content: >- + Looking at the test.png image, I can see what appears to be a solid beige or tan colored square. It's a very + simple image - just a single uniform color with no visible patterns, text, or other elements. The color is a + light peachy-tan shade, similar to a skin tone or sand color. + + + This looks like it could be a test image or placeholder, possibly just a single pixel or small solid color + block that's been created for testing purposes. + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + tool_calls: + - id: toolcall_2 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - role: assistant + tool_calls: + - id: toolcall_3 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - messages: + - role: system + content: ${system} + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + content: I'll use the view tool to examine the test.png file. + tool_calls: + - id: toolcall_0 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - id: toolcall_1 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - role: tool + tool_call_id: toolcall_0 + content: Intent logged + - role: tool + tool_call_id: toolcall_1 + content: Viewed image file successfully. + - role: assistant + content: >- + Looking at the test.png image, I can see what appears to be a solid beige or tan colored square. It's a very + simple image - just a single uniform color with no visible patterns, text, or other elements. The color is a + light peachy-tan shade, similar to a skin tone or sand color. + + + This looks like it could be a test image or placeholder, possibly just a single pixel or small solid color + block that's been created for testing purposes. + - role: user + content: Use the view tool to look at the file test.png and describe what you see + - role: assistant + tool_calls: + - id: toolcall_2 + type: function + function: + name: report_intent + arguments: '{"intent":"Viewing image file"}' + - id: toolcall_3 + type: function + function: + name: view + arguments: '{"path":"${workdir}/test.png"}' + - role: tool + tool_call_id: toolcall_2 + content: Intent logged + - role: tool + tool_call_id: toolcall_3 + content: Viewed image file successfully. + - role: assistant + content: I can see the test.png image again. It shows a solid, uniform beige or tan colored square - a simple test image + with just one flat color throughout. The color is a light peachy-tan or sand-like shade. There's no text, + patterns, gradients, or other visual elements - just a single solid color filling the entire image.