From 09d6771702395dd664db81d850b7f4e9e871f765 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Mar 2026 11:27:00 +0100 Subject: [PATCH 1/7] gh-146169: correctly handle re-entrant parsing calls in Expat handlers --- Lib/test/test_pyexpat.py | 46 +++++++++++++++++++ ...-03-28-10-27-46.gh-issue-146169.RBF1xp.rst | 3 ++ Modules/pyexpat.c | 6 +++ 3 files changed, 55 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index cace780f79f515..af3c7651ae96ac 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -276,6 +276,52 @@ def test_parse_again(self): self.assertEqual(expat.ErrorString(cm.exception.code), expat.errors.XML_ERROR_FINISHED) + @support.subTests("encoding", ("utf-8", "utf-16")) + def test_parse_reentrancy_with_encoding(self, encoding): + # See https://github.com/python/cpython/issues/146169. + parser = expat.ParserCreate(encoding=encoding) + + CharacterDataHandler = lambda data: parser.Parse(data, False) + CharacterDataHandler = mock.Mock(wraps=CharacterDataHandler) + def StartElementHandler(name, attrs): + parser.CharacterDataHandler = CharacterDataHandler + parser.StartElementHandler = StartElementHandler + + payload = "x".encode(encoding) + msg = re.escape("cannot call Parse() from within a handler") + with self.assertRaisesRegex(RuntimeError, msg): + for i in range(len(payload)): + parser.Parse(payload[i:i+1], i == len(payload) - 1) + CharacterDataHandler.assert_called_once_with("x") + + @support.subTests("encoding", ("utf-8", "utf-16")) + def test_parse_reentrancy_allowed_for_external_parser(self, encoding): + parser = expat.ParserCreate(encoding=encoding) + subparser = parser.ExternalEntityParserCreate(None, encoding) + payload_extstr = '' + + def ExternalEntityRefHandler(*args): + subparser.Parse(payload_extstr, True) + return 1 # return an integer to indicate that parsing continues + ExternalEntityRefHandler = mock.Mock(wraps=ExternalEntityRefHandler) + + def StartElementHandler(*args): + parser.ExternalEntityRefHandler = ExternalEntityRefHandler + parser.StartElementHandler = StartElementHandler + + payload = f"""\ + + +&ext; +""".encode(encoding) + + # Check that external parsers be called from parent's handlers. + for i in range(len(payload)): + parser.Parse(payload[i:i+1], i == len(payload) - 1) + external_ref_args = ('ext', None, 'entity.file', None) + ExternalEntityRefHandler.assert_called_once_with(*external_ref_args) + + class NamespaceSeparatorTest(unittest.TestCase): def test_legal(self): # Tests that make sure we get errors when the namespace_separator value diff --git a/Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst b/Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst new file mode 100644 index 00000000000000..23b954d53d77c2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst @@ -0,0 +1,3 @@ +:mod:`xml.parser.expat`: raise :exc:`RuntimeError` when an Expat handler +calls :meth:`parser.Parse ` on the parser +that called the handler. Patch by Bénédikt Tran. diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 31b883fe8bd548..86dad1849ad5f2 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -863,6 +863,12 @@ pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls, int rc; pyexpat_state *state = PyType_GetModuleState(cls); + if (self->in_callback) { + PyErr_SetString(PyExc_RuntimeError, + "cannot call Parse() from within a handler"); + return NULL; + } + if (PyUnicode_Check(data)) { view.buf = NULL; s = PyUnicode_AsUTF8AndSize(data, &slen); From ad69e1e5a87ae4f7b335284b5d17ad36079d3b9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 28 Mar 2026 11:56:55 +0100 Subject: [PATCH 2/7] Update Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst --- .../next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst b/Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst index 23b954d53d77c2..6274004cb47374 100644 --- a/Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst +++ b/Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst @@ -1,3 +1,3 @@ -:mod:`xml.parser.expat`: raise :exc:`RuntimeError` when an Expat handler +:mod:`xml.parsers.expat`: raise :exc:`RuntimeError` when an Expat handler calls :meth:`parser.Parse ` on the parser that called the handler. Patch by Bénédikt Tran. From 03266f4af776fd1e56272e3d6883e879d40b6291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 11 Apr 2026 15:42:03 +0200 Subject: [PATCH 3/7] forbid reentrant calls to ParseFile() --- Modules/pyexpat.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 86dad1849ad5f2..5ff6476d81253f 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -780,6 +780,18 @@ VOID_HANDLER(StartDoctypeDecl, VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()")) +/* check that the current function is not called from within a handler */ +#define CHECK_NOT_IN_HANDLER(PARSER, FUNCNAME) \ + do { \ + if (PARSER->in_callback) { \ + PyErr_SetString(PyExc_RuntimeError, \ + "cannot call " FUNCNAME "() " \ + "from within a handler"); \ + return NULL; \ + } \ + } while (0) + + /* ---------------------------------------------------------------- */ /*[clinic input] class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype" @@ -857,18 +869,15 @@ pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls, PyObject *data, int isfinal) /*[clinic end generated code: output=8faffe07fe1f862a input=053e0f047e55c05a]*/ { + // avoid re-entrant calls to XML_Parse() + CHECK_NOT_IN_HANDLER(self, "Parse"); + const char *s; Py_ssize_t slen; Py_buffer view; int rc; pyexpat_state *state = PyType_GetModuleState(cls); - if (self->in_callback) { - PyErr_SetString(PyExc_RuntimeError, - "cannot call Parse() from within a handler"); - return NULL; - } - if (PyUnicode_Check(data)) { view.buf = NULL; s = PyUnicode_AsUTF8AndSize(data, &slen); @@ -962,6 +971,9 @@ pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls, PyObject *file) /*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/ { + // avoid re-entrant calls to XML_GetBuffer() or XML_ParseBuffer() + CHECK_NOT_IN_HANDLER(self, "ParseFile"); + int rv = 1; PyObject *readmethod = NULL; From f75a84cbbb497abb1b7820ccefe72ebbddfa81ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 11 Apr 2026 15:42:20 +0200 Subject: [PATCH 4/7] nit: improve tests writup --- Lib/test/test_pyexpat.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index af3c7651ae96ac..6de81f9230c6f7 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -302,18 +302,19 @@ def test_parse_reentrancy_allowed_for_external_parser(self, encoding): def ExternalEntityRefHandler(*args): subparser.Parse(payload_extstr, True) - return 1 # return an integer to indicate that parsing continues + # return a nonzero integer to indicate that parsing continues + return 1 ExternalEntityRefHandler = mock.Mock(wraps=ExternalEntityRefHandler) def StartElementHandler(*args): parser.ExternalEntityRefHandler = ExternalEntityRefHandler parser.StartElementHandler = StartElementHandler - payload = f"""\ - - -&ext; -""".encode(encoding) + payload = textwrap.dedent(f"""\ + + + &ext; + """).encode(encoding) # Check that external parsers be called from parent's handlers. for i in range(len(payload)): From 7015918015b41d8ca49a9cb0ed0d021d4b24d07f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 11 Apr 2026 15:47:41 +0200 Subject: [PATCH 5/7] add tests for XML_ParseFile --- Lib/test/test_pyexpat.py | 49 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 6de81f9230c6f7..4bd7ce4b0f0100 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -281,7 +281,8 @@ def test_parse_reentrancy_with_encoding(self, encoding): # See https://github.com/python/cpython/issues/146169. parser = expat.ParserCreate(encoding=encoding) - CharacterDataHandler = lambda data: parser.Parse(data, False) + def CharacterDataHandler(data): + return parser.Parse(data, False) CharacterDataHandler = mock.Mock(wraps=CharacterDataHandler) def StartElementHandler(name, attrs): parser.CharacterDataHandler = CharacterDataHandler @@ -294,6 +295,25 @@ def StartElementHandler(name, attrs): parser.Parse(payload[i:i+1], i == len(payload) - 1) CharacterDataHandler.assert_called_once_with("x") + @support.subTests("encoding", ("utf-8", "utf-16")) + def test_parse_file_reentrancy_with_encoding(self, encoding): + # See https://github.com/python/cpython/issues/146169. + parser = expat.ParserCreate(encoding=encoding) + + def CharacterDataHandler(data): + return parser.ParseFile(BytesIO(data.encode(encoding))) + CharacterDataHandler = mock.Mock(wraps=CharacterDataHandler) + def StartElementHandler(name, attrs): + parser.CharacterDataHandler = CharacterDataHandler + parser.StartElementHandler = StartElementHandler + + payload = "x".encode(encoding) + payload_buffer = BytesIO(payload) + msg = re.escape("cannot call ParseFile() from within a handler") + with self.assertRaisesRegex(RuntimeError, msg): + parser.ParseFile(payload_buffer) + CharacterDataHandler.assert_called_once_with("x") + @support.subTests("encoding", ("utf-8", "utf-16")) def test_parse_reentrancy_allowed_for_external_parser(self, encoding): parser = expat.ParserCreate(encoding=encoding) @@ -322,6 +342,33 @@ def StartElementHandler(*args): external_ref_args = ('ext', None, 'entity.file', None) ExternalEntityRefHandler.assert_called_once_with(*external_ref_args) + @support.subTests("encoding", ("utf-8", "utf-16")) + def test_parse_file_reentrancy_allowed_for_external_parser(self, encoding): + parser = expat.ParserCreate(encoding=encoding) + subparser = parser.ExternalEntityParserCreate(None, encoding) + payload_extstr = '' + + def ExternalEntityRefHandler(*args): + subparser.ParseFile(BytesIO(payload_extstr.encode(encoding))) + # return a nonzero integer to indicate that parsing continues + return 1 + ExternalEntityRefHandler = mock.Mock(wraps=ExternalEntityRefHandler) + + def StartElementHandler(*args): + parser.ExternalEntityRefHandler = ExternalEntityRefHandler + parser.StartElementHandler = StartElementHandler + + payload = textwrap.dedent(f"""\ + + + &ext; + """).encode(encoding) + + # Check that external parsers be called from parent's handlers. + parser.ParseFile(BytesIO(payload)) + external_ref_args = ('ext', None, 'entity.file', None) + ExternalEntityRefHandler.assert_called_once_with(*external_ref_args) + class NamespaceSeparatorTest(unittest.TestCase): def test_legal(self): From 2a08400d054f382063f0dbb5cd13b2b6222fd03a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 11 Apr 2026 15:54:19 +0200 Subject: [PATCH 6/7] Update Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst --- .../Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst b/Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst index 6274004cb47374..d9460540a5a7d3 100644 --- a/Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst +++ b/Misc/NEWS.d/next/Library/2026-03-28-10-27-46.gh-issue-146169.RBF1xp.rst @@ -1,3 +1,4 @@ :mod:`xml.parsers.expat`: raise :exc:`RuntimeError` when an Expat handler -calls :meth:`parser.Parse ` on the parser +calls :meth:`parser.Parse ` or +:meth:`parser.ParseFile ` on the parser that called the handler. Patch by Bénédikt Tran. From 0488dee70e2ab3b52c64be07def8b6b99d27e121 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 12 Apr 2026 18:00:22 +0200 Subject: [PATCH 7/7] simplify test --- Lib/test/test_pyexpat.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 4bd7ce4b0f0100..9f853397af4d95 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -284,9 +284,7 @@ def test_parse_reentrancy_with_encoding(self, encoding): def CharacterDataHandler(data): return parser.Parse(data, False) CharacterDataHandler = mock.Mock(wraps=CharacterDataHandler) - def StartElementHandler(name, attrs): - parser.CharacterDataHandler = CharacterDataHandler - parser.StartElementHandler = StartElementHandler + parser.CharacterDataHandler = CharacterDataHandler payload = "x".encode(encoding) msg = re.escape("cannot call Parse() from within a handler") @@ -303,9 +301,7 @@ def test_parse_file_reentrancy_with_encoding(self, encoding): def CharacterDataHandler(data): return parser.ParseFile(BytesIO(data.encode(encoding))) CharacterDataHandler = mock.Mock(wraps=CharacterDataHandler) - def StartElementHandler(name, attrs): - parser.CharacterDataHandler = CharacterDataHandler - parser.StartElementHandler = StartElementHandler + parser.CharacterDataHandler = CharacterDataHandler payload = "x".encode(encoding) payload_buffer = BytesIO(payload) @@ -325,10 +321,7 @@ def ExternalEntityRefHandler(*args): # return a nonzero integer to indicate that parsing continues return 1 ExternalEntityRefHandler = mock.Mock(wraps=ExternalEntityRefHandler) - - def StartElementHandler(*args): - parser.ExternalEntityRefHandler = ExternalEntityRefHandler - parser.StartElementHandler = StartElementHandler + parser.ExternalEntityRefHandler = ExternalEntityRefHandler payload = textwrap.dedent(f"""\ @@ -353,10 +346,7 @@ def ExternalEntityRefHandler(*args): # return a nonzero integer to indicate that parsing continues return 1 ExternalEntityRefHandler = mock.Mock(wraps=ExternalEntityRefHandler) - - def StartElementHandler(*args): - parser.ExternalEntityRefHandler = ExternalEntityRefHandler - parser.StartElementHandler = StartElementHandler + parser.ExternalEntityRefHandler = ExternalEntityRefHandler payload = textwrap.dedent(f"""\