diff --git a/docs/packages/cli.mdx b/docs/packages/cli.mdx
index 82192cbfa..ee5df7c11 100644
--- a/docs/packages/cli.mdx
+++ b/docs/packages/cli.mdx
@@ -305,6 +305,12 @@ This is suppressed in CI environments, non-TTY shells, and when `HYPERFRAMES_NO_
     # Adjust speech speed
     npx hyperframes tts "Slow and clear" --speed 0.8
 
+    # Generate Spanish speech (lang auto-detected from the `e` voice prefix)
+    npx hyperframes tts "La reunión empieza a las nueve" --voice ef_dora --output es.wav
+
+    # Override the phonemizer (read English text with a French voice)
+    npx hyperframes tts "Bonjour le monde" --voice af_heart --lang fr-fr
+
     # Read text from a file
     npx hyperframes tts script.txt
 
@@ -317,9 +323,14 @@ This is suppressed in CI environments, non-TTY shells, and when `HYPERFRAMES_NO_
     | `--output, -o` | Output file path (default: `speech.wav` in current directory) |
     | `--voice, -v` | Voice ID (run `--list` to see options) |
     | `--speed, -s` | Speech speed multiplier (default: 1.0) |
+    | `--lang, -l` | Phonemizer locale (`en-us`, `en-gb`, `es`, `fr-fr`, `hi`, `it`, `pt-br`, `ja`, `zh`). When omitted, inferred from the voice ID prefix. |
     | `--list` | List available voices and exit |
     | `--json` | Output result as JSON |
 
+    <Tip>
+      Voice IDs encode the phonemizer language in their first letter (`a`=American, `b`=British, `e`=Spanish, `f`=French, `h`=Hindi, `i`=Italian, `j`=Japanese, `p`=Brazilian Portuguese, `z`=Mandarin). `--lang` is only needed when you want to override that — for example, giving English text a French phonemizer for a stylized accent.
+    </Tip>
+
     <Tip>
       Combine `tts` with `transcribe` to generate narration and word-level timestamps for captions in a single workflow: generate the audio with `tts`, then transcribe the output with `transcribe` to get word-level timing.
     </Tip>
diff --git a/packages/cli/src/commands/tts.ts b/packages/cli/src/commands/tts.ts
index d64f38721..bf0bc03c1 100644
--- a/packages/cli/src/commands/tts.ts
+++ b/packages/cli/src/commands/tts.ts
@@ -7,15 +7,32 @@ export const examples: Example[] = [
   ["Choose a voice", 'hyperframes tts "Hello world" --voice am_adam'],
   ["Save to a specific file", 'hyperframes tts "Intro" --voice bf_emma --output narration.wav'],
   ["Adjust speech speed", 'hyperframes tts "Slow and clear" --speed 0.8'],
+  [
+    "Generate Spanish speech",
+    'hyperframes tts "La reunión empieza a las nueve" --voice ef_dora --output es.wav',
+  ],
+  [
+    "Override phonemizer language",
+    'hyperframes tts "Ciao a tutti" --voice af_heart --lang it --output accented.wav',
+  ],
   ["Read text from a file", "hyperframes tts script.txt"],
   ["List available voices", "hyperframes tts --list"],
 ];
 import { resolve, extname } from "node:path";
 import * as clack from "@clack/prompts";
 import { c } from "../ui/colors.js";
-import { DEFAULT_VOICE, BUNDLED_VOICES } from "../tts/manager.js";
+import { errorBox } from "../ui/format.js";
+import {
+  DEFAULT_VOICE,
+  BUNDLED_VOICES,
+  SUPPORTED_LANGS,
+  inferLangFromVoiceId,
+  isSupportedLang,
+  type SupportedLang,
+} from "../tts/manager.js";
 
 const voiceList = BUNDLED_VOICES.map((v) => `${v.id} (${v.label})`).join(", ");
+const langList = SUPPORTED_LANGS.join(", ");
 
 export default defineCommand({
   meta: {
@@ -43,6 +60,11 @@ export default defineCommand({
       description: "Speech speed multiplier (default: 1.0)",
       alias: "s",
     },
+    lang: {
+      type: "string",
+      description: `Phonemizer language (auto-detected from voice prefix when omitted). Options: ${langList}`,
+      alias: "l",
+    },
     list: {
       type: "boolean",
       description: "List available voices and exit",
@@ -94,15 +116,37 @@ export default defineCommand({
       process.exit(1);
     }
 
+    const inferredLang = inferLangFromVoiceId(voice);
+    let lang: SupportedLang = inferredLang;
+    if (args.lang != null) {
+      const requested = String(args.lang).toLowerCase();
+      if (!isSupportedLang(requested)) {
+        errorBox("Invalid --lang", `Got "${args.lang}". Must be one of: ${langList}.`);
+        process.exit(1);
+      }
+      lang = requested;
+    }
+
+    // Mismatched voice/lang is a valid stylization (English text, French
+    // phonemization for accent), so this is a hint, not an error.
+    if (!args.json && args.lang != null && lang !== inferredLang) {
+      console.log(
+        c.dim(
+          `  Note: voice "${voice}" is ${inferredLang}, rendering with --lang ${lang} instead.`,
+        ),
+      );
+    }
+
     // ── Synthesize ────────────────────────────────────────────────────
     const { synthesize } = await import("../tts/synthesize.js");
     const spin = args.json ? null : clack.spinner();
-    spin?.start(`Generating speech with ${c.accent(voice)}...`);
+    spin?.start(`Generating speech with ${c.accent(voice)} (${lang})...`);
 
     try {
       const result = await synthesize(text, output, {
         voice,
         speed,
+        lang,
         onProgress: spin ? (msg) => spin.message(msg) : undefined,
       });
 
@@ -112,6 +156,8 @@ export default defineCommand({
             ok: true,
             voice,
             speed,
+            lang,
+            langApplied: result.langApplied,
             durationSeconds: result.durationSeconds,
             outputPath: result.outputPath,
           }),
@@ -122,6 +168,13 @@ export default defineCommand({
             `Generated ${c.accent(result.durationSeconds.toFixed(1) + "s")} of speech → ${c.accent(result.outputPath)}`,
           ),
         );
+        if (args.lang != null && !result.langApplied) {
+          console.log(
+            c.dim(
+              "  Note: installed kokoro-onnx version does not support the --lang kwarg; phonemization used Kokoro's default.",
+            ),
+          );
+        }
       }
     } catch (err) {
       const message = err instanceof Error ? err.message : String(err);
@@ -140,23 +193,29 @@ export default defineCommand({
 // ---------------------------------------------------------------------------
 
 function listVoices(json: boolean): void {
+  const rows = BUNDLED_VOICES.map((v) => ({ ...v, defaultLang: inferLangFromVoiceId(v.id) }));
+
   if (json) {
-    console.log(JSON.stringify(BUNDLED_VOICES));
+    console.log(JSON.stringify(rows));
     return;
   }
 
   console.log(`\n${c.bold("Available voices")} (Kokoro-82M)\n`);
   console.log(
-    `  ${c.dim("ID")}                ${c.dim("Name")}         ${c.dim("Language")}   ${c.dim("Gender")}`,
+    `  ${c.dim("ID")}                ${c.dim("Name")}         ${c.dim("Language")}   ${c.dim("Lang code")}  ${c.dim("Gender")}`,
   );
-  console.log(`  ${c.dim("─".repeat(60))}`);
-  for (const v of BUNDLED_VOICES) {
-    const id = v.id.padEnd(18);
-    const label = v.label.padEnd(13);
-    const lang = v.language.padEnd(10);
-    console.log(`  ${c.accent(id)} ${label} ${lang} ${v.gender}`);
+  console.log(`  ${c.dim("─".repeat(72))}`);
+  for (const row of rows) {
+    const id = row.id.padEnd(18);
+    const label = row.label.padEnd(13);
+    const lang = row.language.padEnd(10);
+    const code = row.defaultLang.padEnd(10);
+    console.log(`  ${c.accent(id)} ${label} ${lang} ${code} ${row.gender}`);
   }
   console.log(
-    `\n  ${c.dim("Use any Kokoro voice ID — see https://github.com/thewh1teagle/kokoro-onnx for all 54 voices")}\n`,
+    `\n  ${c.dim("Use any Kokoro voice ID — see https://github.com/thewh1teagle/kokoro-onnx for all 54 voices")}`,
+  );
+  console.log(
+    `  ${c.dim("Override phonemizer with --lang <" + SUPPORTED_LANGS.join("|") + ">")}\n`,
   );
 }
diff --git a/packages/cli/src/tts/manager.test.ts b/packages/cli/src/tts/manager.test.ts
new file mode 100644
index 000000000..2ea4d9238
--- /dev/null
+++ b/packages/cli/src/tts/manager.test.ts
@@ -0,0 +1,61 @@
+import { describe, expect, it } from "vitest";
+import {
+  BUNDLED_VOICES,
+  SUPPORTED_LANGS,
+  inferLangFromVoiceId,
+  isSupportedLang,
+} from "./manager.js";
+
+describe("inferLangFromVoiceId", () => {
+  it.each([
+    ["af_heart", "en-us"],
+    ["am_adam", "en-us"],
+    ["bf_emma", "en-gb"],
+    ["bm_george", "en-gb"],
+    ["ef_dora", "es"],
+    ["ff_siwis", "fr-fr"],
+    ["hf_alpha", "hi"],
+    ["if_sara", "it"],
+    ["jf_alpha", "ja"],
+    ["pf_dora", "pt-br"],
+    ["zf_xiaobei", "zh"],
+  ])("maps voice %s to lang %s", (voiceId, expected) => {
+    expect(inferLangFromVoiceId(voiceId)).toBe(expected);
+  });
+
+  it("falls back to en-us for unknown prefixes", () => {
+    expect(inferLangFromVoiceId("xf_test")).toBe("en-us");
+    expect(inferLangFromVoiceId("")).toBe("en-us");
+  });
+
+  it("is case-insensitive on the prefix letter", () => {
+    expect(inferLangFromVoiceId("EF_dora")).toBe("es");
+    expect(inferLangFromVoiceId("ZF_xiaobei")).toBe("zh");
+  });
+});
+
+describe("isSupportedLang", () => {
+  it("accepts every value in SUPPORTED_LANGS", () => {
+    for (const lang of SUPPORTED_LANGS) {
+      expect(isSupportedLang(lang)).toBe(true);
+    }
+  });
+
+  it("rejects invalid or misspelled lang codes", () => {
+    expect(isSupportedLang("english")).toBe(false);
+    expect(isSupportedLang("de")).toBe(false);
+    expect(isSupportedLang("")).toBe(false);
+  });
+});
+
+describe("BUNDLED_VOICES", () => {
+  // --lang is user-facing, so the voice list must give users a working
+  // example in at least the most common non-English locales.
+  it("exposes at least one voice per non-English language", () => {
+    const langs = new Set(BUNDLED_VOICES.map((v) => inferLangFromVoiceId(v.id)));
+    expect(langs.has("es")).toBe(true);
+    expect(langs.has("fr-fr")).toBe(true);
+    expect(langs.has("ja")).toBe(true);
+    expect(langs.has("zh")).toBe(true);
+  });
+});
diff --git a/packages/cli/src/tts/manager.ts b/packages/cli/src/tts/manager.ts
index cc945bc21..fa7e3760f 100644
--- a/packages/cli/src/tts/manager.ts
+++ b/packages/cli/src/tts/manager.ts
@@ -17,6 +17,51 @@ const MODEL_URLS: Record<string, string> = {
 const VOICES_URL =
   "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin";
 
+// Locale codes accepted by Kokoro's phonemizer (misaki for English,
+// espeak-ng for everything else). Kept as a readonly tuple so the union
+// type below stays driven by this single source.
+export const SUPPORTED_LANGS = [
+  "en-us",
+  "en-gb",
+  "es",
+  "fr-fr",
+  "hi",
+  "it",
+  "pt-br",
+  "ja",
+  "zh",
+] as const;
+
+export type SupportedLang = (typeof SUPPORTED_LANGS)[number];
+
+// Kokoro voice IDs are `<lang><gender>_<name>` — the first letter is
+// language, the second is gender. See https://github.com/hexgrad/kokoro.
+const VOICE_PREFIX_LANG: Record<string, SupportedLang> = {
+  a: "en-us", // American English
+  b: "en-gb", // British English
+  e: "es", // Spanish
+  f: "fr-fr", // French
+  h: "hi", // Hindi
+  i: "it", // Italian
+  j: "ja", // Japanese
+  p: "pt-br", // Brazilian Portuguese
+  z: "zh", // Mandarin
+};
+
+/**
+ * Infer the phonemizer language from a Kokoro voice ID prefix.
+ * Unknown prefixes fall back to `en-us` — Kokoro's text frontend is
+ * English-trained, so that's the safe default.
+ */
+export function inferLangFromVoiceId(voiceId: string): SupportedLang {
+  const first = voiceId.charAt(0).toLowerCase();
+  return VOICE_PREFIX_LANG[first] ?? "en-us";
+}
+
+export function isSupportedLang(value: string): value is SupportedLang {
+  return (SUPPORTED_LANGS as readonly string[]).includes(value);
+}
+
 // ---------------------------------------------------------------------------
 // Voices — Kokoro ships 54 voices across 8 languages. We expose a curated
 // default set and allow users to specify any valid Kokoro voice ID.
@@ -38,6 +83,10 @@ export const BUNDLED_VOICES: VoiceInfo[] = [
   { id: "bf_emma", label: "Emma", language: "en-GB", gender: "female" },
   { id: "bf_isabella", label: "Isabella", language: "en-GB", gender: "female" },
   { id: "bm_george", label: "George", language: "en-GB", gender: "male" },
+  { id: "ef_dora", label: "Dora", language: "es", gender: "female" },
+  { id: "ff_siwis", label: "Siwis", language: "fr-FR", gender: "female" },
+  { id: "jf_alpha", label: "Alpha", language: "ja", gender: "female" },
+  { id: "zf_xiaobei", label: "Xiaobei", language: "zh", gender: "female" },
 ];
 
 export const DEFAULT_VOICE = "af_heart";
diff --git a/packages/cli/src/tts/synthesize.ts b/packages/cli/src/tts/synthesize.ts
index e2a5984fa..829417914 100644
--- a/packages/cli/src/tts/synthesize.ts
+++ b/packages/cli/src/tts/synthesize.ts
@@ -1,8 +1,14 @@
 import { execFileSync } from "node:child_process";
-import { existsSync, writeFileSync, mkdirSync } from "node:fs";
-import { join, dirname } from "node:path";
+import { existsSync, writeFileSync, mkdirSync, readdirSync, unlinkSync } from "node:fs";
+import { join, dirname, basename } from "node:path";
 import { homedir } from "node:os";
-import { ensureModel, ensureVoices, DEFAULT_VOICE } from "./manager.js";
+import {
+  ensureModel,
+  ensureVoices,
+  DEFAULT_VOICE,
+  inferLangFromVoiceId,
+  type SupportedLang,
+} from "./manager.js";
 
 // ---------------------------------------------------------------------------
 // Python runtime detection
@@ -54,8 +60,11 @@ function hasPythonPackage(python: string, pkg: string): boolean {
 // Inline Python script for Kokoro synthesis
 // ---------------------------------------------------------------------------
 
+// Kokoro-onnx added the `lang=` kwarg to `Kokoro.create()` in a later release.
+// We pass it conditionally so older installs that only accept `voice=`/`speed=`
+// continue to work (falling back to Kokoro's default phonemization).
 const SYNTH_SCRIPT = `
-import sys, json
+import sys, json, inspect
 
 model_path = sys.argv[1]
 voices_path = sys.argv[2]
@@ -63,12 +72,19 @@ text = sys.argv[3]
 voice = sys.argv[4]
 speed = float(sys.argv[5])
 output_path = sys.argv[6]
+lang = sys.argv[7] if len(sys.argv) > 7 else ""
 
 import kokoro_onnx
 import soundfile as sf
 
 model = kokoro_onnx.Kokoro(model_path, voices_path)
-samples, sample_rate = model.create(text, voice=voice, speed=speed)
+
+kwargs = {"voice": voice, "speed": speed}
+supports_lang = "lang" in inspect.signature(model.create).parameters
+if lang and supports_lang:
+    kwargs["lang"] = lang
+
+samples, sample_rate = model.create(text, **kwargs)
 sf.write(output_path, samples, sample_rate)
 
 duration = len(samples) / sample_rate
@@ -76,17 +92,36 @@ print(json.dumps({
     "outputPath": output_path,
     "sampleRate": sample_rate,
     "durationSeconds": round(duration, 3),
+    "langApplied": bool(lang and supports_lang),
 }))
 `;
 
-// Cache the script to avoid rewriting it on every invocation
+// Cache the script to avoid rewriting it on every invocation.
+// The filename carries a version suffix so older installs automatically
+// upgrade when the script body changes (e.g., adding the `lang` kwarg).
 const SCRIPT_DIR = join(homedir(), ".cache", "hyperframes", "tts");
-const SCRIPT_PATH = join(SCRIPT_DIR, "synth.py");
+const SCRIPT_PATH = join(SCRIPT_DIR, "synth-v2.py");
 
 function ensureSynthScript(): string {
   if (!existsSync(SCRIPT_PATH)) {
     mkdirSync(SCRIPT_DIR, { recursive: true });
     writeFileSync(SCRIPT_PATH, SYNTH_SCRIPT);
+    // Best-effort: delete older versioned scripts left behind by previous
+    // CLI releases so users don't accumulate stale files in ~/.cache.
+    const currentName = basename(SCRIPT_PATH);
+    try {
+      for (const entry of readdirSync(SCRIPT_DIR)) {
+        if (entry !== currentName && /^synth(-v\d+)?\.py$/.test(entry)) {
+          try {
+            unlinkSync(join(SCRIPT_DIR, entry));
+          } catch {
+            // Ignore — orphan cleanup is best-effort.
+          }
+        }
+      }
+    } catch {
+      // Ignore — directory read is best-effort.
+    }
   }
   return SCRIPT_PATH;
 }
@@ -99,6 +134,12 @@ export interface SynthesizeOptions {
   model?: string;
   voice?: string;
   speed?: number;
+  /**
+   * Phonemizer locale. When omitted, inferred from the voice ID prefix
+   * (e.g., `ef_dora` → `es`). Pass explicitly to override — for example,
+   * reading English text with a French voice as a stylization.
+   */
+  lang?: SupportedLang;
   onProgress?: (message: string) => void;
 }
 
@@ -106,6 +147,8 @@ export interface SynthesizeResult {
   outputPath: string;
   sampleRate: number;
   durationSeconds: number;
+  /** False when the installed kokoro-onnx version does not support the `lang` kwarg. */
+  langApplied: boolean;
 }
 
 /**
@@ -118,6 +161,7 @@ export async function synthesize(
 ): Promise<SynthesizeResult> {
   const voice = options?.voice ?? DEFAULT_VOICE;
   const speed = options?.speed ?? 1.0;
+  const lang: SupportedLang = options?.lang ?? inferLangFromVoiceId(voice);
 
   // 1. Ensure Python 3 is available with kokoro-onnx
   options?.onProgress?.("Checking Python runtime...");
@@ -151,11 +195,11 @@ export async function synthesize(
   mkdirSync(dirname(outputPath), { recursive: true });
 
   // 5. Run synthesis
-  options?.onProgress?.(`Generating speech with voice ${voice}...`);
+  options?.onProgress?.(`Generating speech with voice ${voice} (${lang})...`);
   try {
     const stdout = execFileSync(
       python,
-      [scriptPath, modelPath, voicesPath, text, voice, String(speed), outputPath],
+      [scriptPath, modelPath, voicesPath, text, voice, String(speed), outputPath, lang],
       {
         encoding: "utf-8",
         timeout: 300_000,
@@ -170,13 +214,18 @@ export async function synthesize(
     // Parse the last line of stdout as JSON (in case Python printed warnings before it)
     const lines = stdout.trim().split("\n");
     const jsonLine = lines[lines.length - 1] ?? "";
-    const result: { outputPath: string; sampleRate: number; durationSeconds: number } =
-      JSON.parse(jsonLine);
+    const result: {
+      outputPath: string;
+      sampleRate: number;
+      durationSeconds: number;
+      langApplied: boolean;
+    } = JSON.parse(jsonLine);
 
     return {
       outputPath: result.outputPath,
       sampleRate: result.sampleRate,
       durationSeconds: result.durationSeconds,
+      langApplied: result.langApplied,
     };
   } catch (err: unknown) {
     // If the error is our own JSON parse failure but the file was created,
diff --git a/skills/hyperframes/references/tts.md b/skills/hyperframes/references/tts.md
index ee94993a7..c403564d8 100644
--- a/skills/hyperframes/references/tts.md
+++ b/skills/hyperframes/references/tts.md
@@ -16,6 +16,25 @@ Match voice to content. Default is `af_heart`.
 
 Run `npx hyperframes tts --list` for all 54 voices (8 languages).
 
+## Multilingual Phonemization
+
+Kokoro voice IDs encode language in the first letter: `a`=American English, `b`=British English, `e`=Spanish, `f`=French, `h`=Hindi, `i`=Italian, `j`=Japanese, `p`=Brazilian Portuguese, `z`=Mandarin. The CLI auto-detects the phonemizer locale from that prefix — you don't need to pass `--lang` when the voice matches the text.
+
+```bash
+npx hyperframes tts "La reunión empieza a las nueve" --voice ef_dora --output es.wav
+npx hyperframes tts "今日はいい天気ですね" --voice jf_alpha --output ja.wav
+```
+
+Use `--lang` only to override auto-detection (e.g. stylized accents):
+
+```bash
+npx hyperframes tts "Hello there" --voice af_heart --lang fr-fr --output accented.wav
+```
+
+Valid `--lang` codes: `en-us`, `en-gb`, `es`, `fr-fr`, `hi`, `it`, `pt-br`, `ja`, `zh`.
+
+Non-English phonemization requires `espeak-ng` installed system-wide (`brew install espeak-ng` on macOS, `apt-get install espeak-ng` on Debian/Ubuntu).
+
 ## Speed Tuning
 
 - **0.7-0.8** — Tutorial, complex content