From 394437f58c109cdd203f3279b2dd2ec79e5acfe5 Mon Sep 17 00:00:00 2001 From: Leo Kettmeir Date: Tue, 9 Jun 2026 02:04:01 +0200 Subject: [PATCH] fix: ensure cache is written (#1440) --- README.md | 4 +- architecture.md | 22 +++++------ lb/local.ts | 4 ++ lb/main.ts | 33 ++++++++++------ lb/proxy.ts | 86 +++++++++++++++++++++++++++++++++++++++--- lb/proxy_test.ts | 98 ++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 217 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 9cca3ff6f..a47522759 100644 --- a/README.md +++ b/README.md @@ -32,8 +32,8 @@ This is the source code for https://jsr.io, the new JavaScript registry. Cloudflare Workers worker (the LB) - Module, package metadata, and npm tarballs are served directly from R2 - /api requests are proxied to the management API - - All other requests are proxied to the frontend Worker via a service - binding (no public URL hop) + - All other requests are proxied to the frontend Worker via a service binding + (no public URL hop) - Data is stored in PostgreSQL (using Google Cloud SQL) - The database is highly available - Not used for serving registry requests diff --git a/architecture.md b/architecture.md index dc3466e0f..b362ae604 100644 --- a/architecture.md +++ b/architecture.md @@ -131,15 +131,15 @@ JavaScript to the browser. In production the frontend ships as its own Cloudflare Worker: -- `frontend/build.ts` runs the Fresh vite build, mirrors the merged static - tree (`frontend/static/`, `_fresh/{client,static}/`, and `frontend/docs/*.md` - under `_jsr_docs/`) into `_fresh/assets/`, then bundles - `frontend/server.entry.ts` into `_fresh/worker.js` via `deno bundle`. -- `frontend/shim/deno.ts` polyfills the subset of `Deno.*` (env, file - reads, inspect, stat/open) needed by the Fresh server at runtime, with - file reads forwarded to the Workers ASSETS binding. -- Static files are served by the Workers Assets binding (asset-first - routing). Dynamic routes fall through to the worker. +- `frontend/build.ts` runs the Fresh vite build, mirrors the merged static tree + (`frontend/static/`, `_fresh/{client,static}/`, and `frontend/docs/*.md` under + `_jsr_docs/`) into `_fresh/assets/`, then bundles `frontend/server.entry.ts` + into `_fresh/worker.js` via `deno bundle`. +- `frontend/shim/deno.ts` polyfills the subset of `Deno.*` (env, file reads, + inspect, stat/open) needed by the Fresh server at runtime, with file reads + forwarded to the Workers ASSETS binding. +- Static files are served by the Workers Assets binding (asset-first routing). + Dynamic routes fall through to the worker. Environment variables (`API_ROOT`, `FRONTEND_ROOT`, the `ORAMA_*` keys, etc.) are configured as `plain_text` bindings on the worker by Terraform; no @@ -194,8 +194,8 @@ A Cloudflare Worker that acts as the edge router. See Key files: - `main.ts` — request routing logic -- `proxy.ts` — proxy to backends (Cloud Run for API, service binding for - the frontend Worker) and R2 +- `proxy.ts` — proxy to backends (Cloud Run for API, service binding for the + frontend Worker) and R2 - `headers.ts` — security headers, CORS, CSP - `bots.ts` — bot/crawler detection - `analytics.ts` — download tracking diff --git a/lb/local.ts b/lb/local.ts index e6b8535a0..ce37ad2ae 100755 --- a/lb/local.ts +++ b/lb/local.ts @@ -205,6 +205,10 @@ function handler(req: Request): Promise { ROOT_DOMAIN, API_DOMAIN, NPM_DOMAIN, + }, { + waitUntil(_promise: Promise) { + // do nothing + }, }); } diff --git a/lb/main.ts b/lb/main.ts index bf0fe966e..0ab2cf612 100644 --- a/lb/main.ts +++ b/lb/main.ts @@ -1,7 +1,7 @@ // Copyright 2024 the JSR authors. All rights reserved. MIT license. import type { WorkerEnv } from "./types.ts"; -import { proxyToBackend, proxyToR2 } from "./proxy.ts"; +import { type ExecutionCtx, proxyToBackend, proxyToR2 } from "./proxy.ts"; import { handleCORSPreflight, isCORSPreflight, @@ -22,9 +22,10 @@ export default { async fetch( request: Request, env: WorkerEnv, + ctx: ExecutionCtx, ): Promise { try { - const response = await route(request, env); + const response = await route(request, env, ctx); return response; } catch (error) { console.error("LB error:", error); @@ -42,16 +43,17 @@ export default { export async function route( request: Request, env: WorkerEnv, + ctx?: ExecutionCtx, ): Promise { const url = new URL(request.url); const hostname = url.hostname.toLowerCase(); if (hostname === env.API_DOMAIN) { - return await handleAPIRequest(request, env); + return await handleAPIRequest(request, env, true, ctx); } else if (hostname === env.NPM_DOMAIN) { - return await handleNPMRequest(request, env); + return await handleNPMRequest(request, env, ctx); } else if (hostname === env.ROOT_DOMAIN) { - return await handleRootRequest(request, env); + return await handleRootRequest(request, env, ctx); } else { return new Response(`Unknown hostname: ${hostname}`, { status: 404, @@ -66,6 +68,7 @@ export async function handleAPIRequest( request: Request, env: WorkerEnv, rewritePath: boolean = true, + ctx?: ExecutionCtx, ): Promise { if (isCORSPreflight(request)) { return handleCORSPreflight(API); @@ -75,6 +78,7 @@ export async function handleAPIRequest( request, env.REGISTRY_API_URL, rewritePath ? (path) => `/api${path}` : undefined, + ctx, ); setSecurityHeaders(response, API); @@ -89,6 +93,7 @@ export async function handleAPIRequest( export async function handleNPMRequest( request: Request, env: WorkerEnv, + ctx?: ExecutionCtx, ): Promise { if (isCORSPreflight(request)) { return handleCORSPreflight(NPM); @@ -104,6 +109,7 @@ export async function handleNPMRequest( } return path; }, + ctx, ); setSecurityHeaders(response, NPM); @@ -155,22 +161,23 @@ export async function handleNPMRequest( export async function handleRootRequest( request: Request, env: WorkerEnv, + ctx?: ExecutionCtx, ): Promise { const url = new URL(request.url); const path = url.pathname; if (isAPIRoute(path)) { - return await handleAPIRequest(request, env, false); + return await handleAPIRequest(request, env, false, ctx); } else if (isBot(request)) { - return await handleFrontendRoute(request, env, true); + return await handleFrontendRoute(request, env, true, ctx); } else if (path.startsWith("/@")) { if (canAccessModuleFile(request) && isModuleFilePath(path)) { - return await handleModuleFileRoute(request, env); + return await handleModuleFileRoute(request, env, ctx); } else { - return await handleFrontendRoute(request, env, false); + return await handleFrontendRoute(request, env, false, ctx); } } else { - return await handleFrontendRoute(request, env, false); + return await handleFrontendRoute(request, env, false, ctx); } } @@ -221,11 +228,12 @@ async function handleFrontendRoute( request: Request, env: WorkerEnv, isBot: boolean, + ctx?: ExecutionCtx, ): Promise { const limited = await rateLimitGuard(request, env); if (limited) return limited; - const response = await proxyToBackend(request, env.FRONTEND); + const response = await proxyToBackend(request, env.FRONTEND, undefined, ctx); setSecurityHeaders(response, FRONTEND); setDebugHeaders(response, { @@ -265,11 +273,14 @@ async function rateLimitGuard( async function handleModuleFileRoute( request: Request, env: WorkerEnv, + ctx?: ExecutionCtx, ): Promise { const url = new URL(request.url); const response = await proxyToR2( request, env.MODULES_BUCKET, + undefined, + ctx, ); setSecurityHeaders(response, MODULES); diff --git a/lb/proxy.ts b/lb/proxy.ts index f05ada7f0..c07b76149 100644 --- a/lb/proxy.ts +++ b/lb/proxy.ts @@ -2,6 +2,58 @@ import type { PartialBucket } from "./types.ts"; +// Minimal slice of Cloudflare's `ExecutionContext` we depend on. Cache writes +// (`Cache.put`) finish *after* the response is returned to the client; without +// registering them via `waitUntil`, the Workers runtime tears the invocation +// down first and silently drops the write — so nothing ever gets cached. See +// https://developers.cloudflare.com/workers/runtime-apis/cache/#put +export interface ExecutionCtx { + waitUntil(promise: Promise): void; +} + +// Persist a cache write so it survives past the response. With an execution +// context we hand it to `waitUntil`; without one (unit tests) we await it so +// the write still completes deterministically. A failed write (sync throw or +// async rejection) is logged and swallowed — caching is best-effort and must +// never break serving the response. +async function persistCacheWrite( + ctx: ExecutionCtx | undefined, + cache: Cache, + key: Request, + response: Response, +): Promise { + let write: Promise; + try { + write = cache.put(key, response); + } catch (error) { + console.error("cache write failed:", error); + return; + } + const guarded = write.catch((error) => { + console.error("cache write failed:", error); + }); + if (ctx) { + ctx.waitUntil(guarded); + } else { + await guarded; + } +} + +// Cache key for a bucket (R2) response. `caches.default` is shared across all +// backends, and a `/@scope/...` URL is served as EITHER a module file (bucket, +// JSON) or an HTML page (frontend) depending on request headers — keying both +// on the raw URL cross-serves HTML for module files (and vice versa). Bucket +// entries are namespaced under a synthetic, non-routable host (which no real +// request can ever target, so it can't be poisoned) keyed by the original host +// + path so module and npm buckets also stay distinct. +function bucketCacheKey(rawUrl: string): Request { + const u = new URL(rawUrl); + return new Request( + `https://bucket-cache.jsr.internal/${u.host}${u.pathname}${u.search}`, + { method: "GET" }, + ); +} + // Proxies an inbound request to a backend. The backend can be either an // HTTP URL (Cloud Run API) or a service-binding Fetcher (frontend Worker). // In both cases the caller receives the same cache + header semantics. @@ -9,6 +61,7 @@ export async function proxyToBackend( request: Request, backend: string | { fetch: (req: Request) => Promise }, pathRewrite?: (path: string) => string, + ctx?: ExecutionCtx, ): Promise { const url = new URL(request.url); let path = url.pathname; @@ -62,6 +115,7 @@ export async function proxyToBackend( body: request.body, redirect: "manual", }, + ctx, ); const res = new Response(response.body, { @@ -92,6 +146,7 @@ export async function proxyToR2( request: Request, bucket: PartialBucket, pathRewrite?: (path: string) => string, + ctx?: ExecutionCtx, ): Promise { const url = new URL(request.url); let path = url.pathname; @@ -100,8 +155,14 @@ export async function proxyToR2( } const key = decodeURIComponent(path.slice(1)); - const cacheKey = new Request(request.url, { method: "GET" }); - const cached = await caches.default?.match(cacheKey); + const cacheKey = bucketCacheKey(request.url); + let cached: Response | undefined; + try { + cached = await caches.default?.match(cacheKey); + } catch (error) { + // A corrupt/unreadable cache entry must never break the request. + console.error("R2 cache match error:", error); + } if (cached) { if (request.method === "HEAD") { return new Response(null, { @@ -109,7 +170,13 @@ export async function proxyToR2( status: cached.status, }); } - return cached; + // Re-wrap: responses from `caches.default.match` have immutable headers, + // and callers (e.g. setSecurityHeaders) mutate the returned response's + // headers — mutating the cached response directly throws. + return new Response(cached.body, { + headers: cached.headers, + status: cached.status, + }); } try { @@ -142,7 +209,10 @@ export async function proxyToR2( } const response = new Response(object.body, { headers }); - caches.default?.put(cacheKey, response.clone()); + const cache = caches.default; + if (cache) { + await persistCacheWrite(ctx, cache, cacheKey, response.clone()); + } return response; } } catch (error) { @@ -159,6 +229,7 @@ async function cachedFetch( fetcher: (req: Request) => Promise, input: RequestInfo | URL, init?: RequestInit, + ctx?: ExecutionCtx, ): Promise { const req = new Request(input, init); @@ -178,14 +249,17 @@ async function cachedFetch( const res = await fetcher(req); - if (shouldCache && req.method === "GET" && res.ok) { + const cache = caches.default; + if (cache && shouldCache && req.method === "GET" && res.ok) { const cacheControl = res.headers.get("Cache-Control") ?? ""; if ( !cacheControl.includes("private") && !cacheControl.includes("no-store") ) { const cacheKey = new Request(req.url, { method: "GET" }); - caches.default?.put(cacheKey, res.clone()); + // `waitUntil` (or await in tests) so the write isn't dropped when the + // invocation ends — the cause of the lb caching nothing in production. + await persistCacheWrite(ctx, cache, cacheKey, res.clone()); } } diff --git a/lb/proxy_test.ts b/lb/proxy_test.ts index 2105177dc..30a377860 100644 --- a/lb/proxy_test.ts +++ b/lb/proxy_test.ts @@ -148,6 +148,72 @@ Deno.test("proxyToR2 handles HEAD requests with URL-encoded path", async () => { assertEquals(response.body, null); }); +Deno.test("proxyToR2 cache hit returns a fresh, mutable response", async () => { + // caches.default.match returns responses with immutable headers; callers + // (setSecurityHeaders etc.) mutate the returned response, so proxyToR2 must + // re-wrap rather than return the cached object directly. + const stored = new Response("cached-body", { + status: 200, + headers: { "content-type": "application/json" }, + }); + (globalThis as any).caches = { + default: { + match: () => Promise.resolve(stored), + put: () => Promise.resolve(), + }, + }; + + try { + const req = new Request("https://npm.jsr.io/@jsr/whatever"); + const res = await proxyToR2(req, createFakeBucket({})); + + assertEquals(res !== stored, true); // not the cached object + res.headers.set("x-test", "1"); // must not throw + assertEquals(res.headers.get("x-test"), "1"); + assertEquals(await res.text(), "cached-body"); + } finally { + (globalThis as any).caches = { default: undefined }; + } +}); + +Deno.test("proxyToR2 namespaces cache keys away from the raw URL", async () => { + // The frontend caches `/@scope/...` navigations under the raw URL; bucket + // responses for the same URL must use a distinct key to avoid cross-serving. + const matchKeys: string[] = []; + const putKeys: string[] = []; + (globalThis as any).caches = { + default: { + match: (req: Request) => { + matchKeys.push(req.url); + return Promise.resolve(undefined); + }, + put: (req: Request) => { + putKeys.push(req.url); + return Promise.resolve(); + }, + }, + }; + + try { + const bucket = createFakeBucket({ + "@jsr/std__yaml": { body: "{}", contentType: "application/json" }, + }); + const res = await proxyToR2( + new Request("https://npm.jsr.io/@jsr/std__yaml"), + bucket, + ); + + assertEquals(res.status, 200); + assertEquals( + matchKeys[0], + "https://bucket-cache.jsr.internal/npm.jsr.io/@jsr/std__yaml", + ); + assertEquals(putKeys[0], matchKeys[0]); // match and put use the same key + } finally { + (globalThis as any).caches = { default: undefined }; + } +}); + // --- proxyToBackend tests --- /** In-memory Cache stub that records put/match calls for assertions. */ @@ -239,6 +305,38 @@ Deno.test("proxyToBackend caches anonymous GET with URL-only key", async () => { } }); +Deno.test("proxyToBackend registers the cache write with ctx.waitUntil", async () => { + const cache = createFakeCache(); + (globalThis as any).caches = { default: cache }; + + // Without this, the Workers runtime tears the invocation down before the + // async Cache.put completes and the write is silently dropped — the bug that + // left the lb caching nothing in production. + const waited: Promise[] = []; + const ctx = { waitUntil: (p: Promise) => waited.push(p) }; + + const restore = setupFetchStub( + new Response('{"ok":true}', { + status: 200, + headers: { "Cache-Control": "public, max-age=30, s-maxage=300" }, + }), + ); + + try { + const request = new Request("https://jsr.io/api/packages", { + method: "GET", + }); + await proxyToBackend(request, BACKEND_URL, undefined, ctx); + + assertEquals(waited.length, 1); + await Promise.all(waited); + assertEquals(cache.putCalls.length, 1); + } finally { + restore(); + (globalThis as any).caches = { default: undefined }; + } +}); + Deno.test("proxyToBackend serves cached response on second GET", async () => { const cache = createFakeCache(); (globalThis as any).caches = { default: cache };