From 306a6902cba839371f6602cf9ee16532b570d30c Mon Sep 17 00:00:00 2001
From: user <grayjk@gmail.com>
Date: Wed, 10 Dec 2025 12:57:27 -0500
Subject: [PATCH 1/4] Fix traceback color output with unicode characters

Closes #130273
---
 Lib/test/test_traceback.py                    | 27 +++++++++++++++
 Lib/traceback.py                              | 33 +++++++++++++------
 ...-12-10-12-56-47.gh-issue-130273.iCfiY5.rst |  1 +
 3 files changed, 51 insertions(+), 10 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst

diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index 96510eeec54640..03b63ab2b81894 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -5273,5 +5273,32 @@ def expected(t, m, fn, l, f, E, e, z):
         ]
         self.assertEqual(actual, expected(**colors))
 
+    def test_colorized_traceback_unicode(self):
+        try:
+            啊哈=1; 啊哈/0####
+        except Exception as e:
+            exc = traceback.TracebackException.from_exception(e)
+
+        actual = "".join(exc.format(colorize=True)).splitlines()
+        def expected(t, m, fn, l, f, E, e, z):
+            return [
+                f"    啊哈=1; {e}啊哈{z}{E}/{z}{e}0{z}####",
+                f"            {e}~~~~{z}{E}^{z}{e}~{z}",
+            ]
+        self.assertEqual(actual[2:4], expected(**colors))
+
+        try:
+            ééééé/0
+        except Exception as e:
+            exc = traceback.TracebackException.from_exception(e)
+
+        actual = "".join(exc.format(colorize=True)).splitlines()
+        def expected(t, m, fn, l, f, E, e, z):
+            return [
+                f"    {E}ééééé{z}/0",
+                f"    {E}^^^^^{z}",
+            ]
+        self.assertEqual(actual[2:4], expected(**colors))
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/traceback.py b/Lib/traceback.py
index f95d6bdbd016ac..7239513f436109 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -680,12 +680,12 @@ def output_line(lineno):
                         colorized_line_parts = []
                         colorized_carets_parts = []
 
-                        for color, group in itertools.groupby(itertools.zip_longest(line, carets, fillvalue=""), key=lambda x: x[1]):
+                        for color, group in itertools.groupby(_zip_display_width(line, carets), key=lambda x: x[1]):
                             caret_group = list(group)
-                            if color == "^":
+                            if "^" in color:
                                 colorized_line_parts.append(theme.error_highlight + "".join(char for char, _ in caret_group) + theme.reset)
                                 colorized_carets_parts.append(theme.error_highlight + "".join(caret for _, caret in caret_group) + theme.reset)
-                            elif color == "~":
+                            elif "~" in color:
                                 colorized_line_parts.append(theme.error_range + "".join(char for char, _ in caret_group) + theme.reset)
                                 colorized_carets_parts.append(theme.error_range + "".join(caret for _, caret in caret_group) + theme.reset)
                             else:
@@ -967,7 +967,24 @@ def setup_positions(expr, force_valid=True):
 
     return None
 
-_WIDE_CHAR_SPECIFIERS = "WF"
+
+def _lookahead(iterator, default):
+    forked = itertools.tee(iterator, 1)[0]
+    return next(forked, default)
+
+
+def _zip_display_width(line, carets):
+    line = itertools.tee(line, 1)[0]
+    carets = iter(carets)
+    for char in line:
+        char_width = _display_width(char)
+        next_char = _lookahead(line, "")
+        if next_char and char_width == _display_width(char + next_char):
+            next(line)
+            yield char + next_char, "".join(itertools.islice(carets, char_width))
+        else:
+            yield char, "".join(itertools.islice(carets, char_width))
+
 
 def _display_width(line, offset=None):
     """Calculate the extra amount of width space the given source
@@ -981,13 +998,9 @@ def _display_width(line, offset=None):
     if line.isascii():
         return offset
 
-    import unicodedata
-
-    return sum(
-        2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1
-        for char in line[:offset]
-    )
+    from _pyrepl.utils import wlen
 
+    return wlen(line[:offset])
 
 
 class _ExceptionPrintContext:
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst
new file mode 100644
index 00000000000000..981c84a9372d04
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst
@@ -0,0 +1 @@
+Fix traceback color output with unicode characters

From 8edad110695a22acb6ad9c6266826c4b05a28d1e Mon Sep 17 00:00:00 2001
From: user <grayjk@gmail.com>
Date: Wed, 10 Dec 2025 15:15:30 -0500
Subject: [PATCH 2/4] mv news blurb

---
 .../2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst}               | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename Misc/NEWS.d/next/{Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst => Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst} (100%)

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst b/Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst
similarity index 100%
rename from Misc/NEWS.d/next/Core_and_Builtins/2025-12-10-12-56-47.gh-issue-130273.iCfiY5.rst
rename to Misc/NEWS.d/next/Library/2025-12-10-15-15-09.gh-issue-130273.iCfiY5.rst

From 794703397a2490f0cb587df8ce1118ea1fcc0404 Mon Sep 17 00:00:00 2001
From: user <grayjk@gmail.com>
Date: Wed, 28 Jan 2026 18:04:33 -0500
Subject: [PATCH 3/4] use unicodedata.iter_graphemes

---
 Lib/traceback.py | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/Lib/traceback.py b/Lib/traceback.py
index 7239513f436109..b1fd024884c907 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -968,22 +968,13 @@ def setup_positions(expr, force_valid=True):
     return None
 
 
-def _lookahead(iterator, default):
-    forked = itertools.tee(iterator, 1)[0]
-    return next(forked, default)
-
-
 def _zip_display_width(line, carets):
-    line = itertools.tee(line, 1)[0]
+    import unicodedata
     carets = iter(carets)
-    for char in line:
+    for char in unicodedata.iter_graphemes(line):
+        char = str(char)
         char_width = _display_width(char)
-        next_char = _lookahead(line, "")
-        if next_char and char_width == _display_width(char + next_char):
-            next(line)
-            yield char + next_char, "".join(itertools.islice(carets, char_width))
-        else:
-            yield char, "".join(itertools.islice(carets, char_width))
+        yield char, "".join(itertools.islice(carets, char_width))
 
 
 def _display_width(line, offset=None):

From 467656ee4ad09d73776cb60e635038c9dbf10dfa Mon Sep 17 00:00:00 2001
From: user <grayjk@gmail.com>
Date: Thu, 19 Feb 2026 10:45:04 -0500
Subject: [PATCH 4/4] mv wlen/str_width to traceback

---
 Lib/_pyrepl/utils.py               | 29 +------------------
 Lib/test/test_pyrepl/support.py    |  3 +-
 Lib/test/test_pyrepl/test_utils.py | 45 +-----------------------------
 Lib/test/test_traceback.py         | 45 ++++++++++++++++++++++++++++++
 Lib/traceback.py                   | 37 ++++++++++++++++++++++--
 5 files changed, 83 insertions(+), 76 deletions(-)

diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py
index 25d7ac1bd0b14e..d79c451949f946 100644
--- a/Lib/_pyrepl/utils.py
+++ b/Lib/_pyrepl/utils.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 import builtins
-import functools
 import keyword
 import re
 import token as T
@@ -11,12 +10,12 @@
 from collections import deque
 from io import StringIO
 from tokenize import TokenInfo as TI
+from traceback import _str_width as str_width, _wlen as wlen
 from typing import Iterable, Iterator, Match, NamedTuple, Self
 
 from .types import CharBuffer, CharWidths
 from .trace import trace
 
-ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
 ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
 ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
 IDENTIFIERS_AFTER = {"def", "class"}
@@ -59,32 +58,6 @@ class ColorSpan(NamedTuple):
     tag: str
 
 
-@functools.cache
-def str_width(c: str) -> int:
-    if ord(c) < 128:
-        return 1
-    # gh-139246 for zero-width joiner and combining characters
-    if unicodedata.combining(c):
-        return 0
-    category = unicodedata.category(c)
-    if category == "Cf" and c != "\u00ad":
-        return 0
-    w = unicodedata.east_asian_width(c)
-    if w in ("N", "Na", "H", "A"):
-        return 1
-    return 2
-
-
-def wlen(s: str) -> int:
-    if len(s) == 1 and s != "\x1a":
-        return str_width(s)
-    length = sum(str_width(i) for i in s)
-    # remove lengths of any escape sequences
-    sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
-    ctrl_z_cnt = s.count("\x1a")
-    return length - sum(len(i) for i in sequence) + ctrl_z_cnt
-
-
 def unbracket(s: str, including_content: bool = False) -> str:
     r"""Return `s` with \001 and \002 characters removed.
 
diff --git a/Lib/test/test_pyrepl/support.py b/Lib/test/test_pyrepl/support.py
index 4f7f9d77933336..be9f207ef24c28 100644
--- a/Lib/test/test_pyrepl/support.py
+++ b/Lib/test/test_pyrepl/support.py
@@ -1,12 +1,13 @@
 from code import InteractiveConsole
 from functools import partial
+from traceback import ANSI_ESCAPE_SEQUENCE
 from typing import Iterable
 from unittest.mock import MagicMock
 
 from _pyrepl.console import Console, Event
 from _pyrepl.readline import ReadlineAlikeReader, ReadlineConfig
 from _pyrepl.simple_interact import _strip_final_indent
-from _pyrepl.utils import unbracket, ANSI_ESCAPE_SEQUENCE
+from _pyrepl.utils import unbracket
 
 
 class ScreenEqualMixin:
diff --git a/Lib/test/test_pyrepl/test_utils.py b/Lib/test/test_pyrepl/test_utils.py
index 3c55b6bdaeee9e..eddc81489b4eec 100644
--- a/Lib/test/test_pyrepl/test_utils.py
+++ b/Lib/test/test_pyrepl/test_utils.py
@@ -1,52 +1,9 @@
 from unittest import TestCase
 
-from _pyrepl.utils import str_width, wlen, prev_next_window, gen_colors
+from _pyrepl.utils import prev_next_window, gen_colors
 
 
 class TestUtils(TestCase):
-    def test_str_width(self):
-        characters = [
-            'a',
-            '1',
-            '_',
-            '!',
-            '\x1a',
-            '\u263A',
-            '\uffb9',
-            '\N{LATIN SMALL LETTER E WITH ACUTE}',  # é
-            '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
-            '\u00ad',
-        ]
-        for c in characters:
-            self.assertEqual(str_width(c), 1)
-
-        zero_width_characters = [
-            '\N{COMBINING ACUTE ACCENT}',
-            '\N{ZERO WIDTH JOINER}',
-        ]
-        for c in zero_width_characters:
-            with self.subTest(character=c):
-                self.assertEqual(str_width(c), 0)
-
-        characters = [chr(99989), chr(99999)]
-        for c in characters:
-            self.assertEqual(str_width(c), 2)
-
-    def test_wlen(self):
-        for c in ['a', 'b', '1', '!', '_']:
-            self.assertEqual(wlen(c), 1)
-        self.assertEqual(wlen('\x1a'), 2)
-
-        char_east_asian_width_N = chr(3800)
-        self.assertEqual(wlen(char_east_asian_width_N), 1)
-        char_east_asian_width_W = chr(4352)
-        self.assertEqual(wlen(char_east_asian_width_W), 2)
-
-        self.assertEqual(wlen('hello'), 5)
-        self.assertEqual(wlen('hello' + '\x1a'), 7)
-        self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
-        self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2)
-
     def test_prev_next_window(self):
         def gen_normal():
             yield 1
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index e3172069716834..0c55b200872fd7 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -28,6 +28,7 @@
 import traceback
 from functools import partial
 from pathlib import Path
+from traceback import _str_width, _wlen
 import _colorize
 
 MODULE_PREFIX = f'{__name__}.' if __name__ == '__main__' else ''
@@ -1787,6 +1788,50 @@ def f():
         ]
         self.assertEqual(result_lines, expected)
 
+    def test_str_width(self):
+        characters = [
+            'a',
+            '1',
+            '_',
+            '!',
+            '\x1a',
+            '\u263A',
+            '\uffb9',
+            '\N{LATIN SMALL LETTER E WITH ACUTE}',  # é
+            '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ
+            '\u00ad',
+        ]
+        for c in characters:
+            self.assertEqual(_str_width(c), 1)
+
+        zero_width_characters = [
+            '\N{COMBINING ACUTE ACCENT}',
+            '\N{ZERO WIDTH JOINER}',
+        ]
+        for c in zero_width_characters:
+            with self.subTest(character=c):
+                self.assertEqual(_str_width(c), 0)
+
+        characters = [chr(99989), chr(99999)]
+        for c in characters:
+            self.assertEqual(_str_width(c), 2)
+
+    def test_wlen(self):
+        for c in ['a', 'b', '1', '!', '_']:
+            self.assertEqual(_wlen(c), 1)
+        self.assertEqual(_wlen('\x1a'), 2)
+
+        char_east_asian_width_N = chr(3800)
+        self.assertEqual(_wlen(char_east_asian_width_N), 1)
+        char_east_asian_width_W = chr(4352)
+        self.assertEqual(_wlen(char_east_asian_width_W), 2)
+
+        self.assertEqual(_wlen('hello'), 5)
+        self.assertEqual(_wlen('hello' + '\x1a'), 7)
+        self.assertEqual(_wlen('e\N{COMBINING ACUTE ACCENT}'), 1)
+        self.assertEqual(_wlen('a\N{ZERO WIDTH JOINER}b'), 2)
+
+
 class TestKeywordTypoSuggestions(unittest.TestCase):
     TYPO_CASES = [
         ("with block ad something:\n  pass", "and"),
diff --git a/Lib/traceback.py b/Lib/traceback.py
index 2f3acec02dc951..05cb77b6cac900 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -1,8 +1,10 @@
 """Extract, format and print information about Python stack traces."""
 
 import collections.abc
+import functools
 import itertools
 import linecache
+import re
 import sys
 import textwrap
 import types
@@ -978,6 +980,37 @@ def _zip_display_width(line, carets):
         yield char, "".join(itertools.islice(carets, char_width))
 
 
+@functools.cache
+def _str_width(c: str) -> int:
+    import unicodedata
+    if ord(c) < 128:
+        return 1
+    # gh-139246 for zero-width joiner and combining characters
+    if unicodedata.combining(c):
+        return 0
+    category = unicodedata.category(c)
+    if category == "Cf" and c != "\u00ad":
+        return 0
+    w = unicodedata.east_asian_width(c)
+    if w in ("N", "Na", "H", "A"):
+        return 1
+    return 2
+
+
+ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
+
+
+def _wlen(s: str) -> int:
+    if len(s) == 1 and s != "\x1a":
+        return _str_width(s)
+    length = sum(_str_width(i) for i in s)
+    # remove lengths of any escape sequences
+    sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
+    ctrl_z_cnt = s.count("\x1a")
+    return length - sum(len(i) for i in sequence) + ctrl_z_cnt
+
+
+
 def _display_width(line, offset=None):
     """Calculate the extra amount of width space the given source
     code segment might take if it were to be displayed on a fixed
@@ -990,9 +1023,7 @@ def _display_width(line, offset=None):
     if line.isascii():
         return offset
 
-    from _pyrepl.utils import wlen
-
-    return wlen(line[:offset])
+    return _wlen(line[:offset])
 
 
 class _ExceptionPrintContext: