diff --git a/nemo_text_processing/text_normalization/en/data/address/address_word.tsv b/nemo_text_processing/text_normalization/en/data/address/address_word.tsv index 2e9e71615..609b23a9f 100644 --- a/nemo_text_processing/text_normalization/en/data/address/address_word.tsv +++ b/nemo_text_processing/text_normalization/en/data/address/address_word.tsv @@ -4,6 +4,8 @@ expy Expressway fwy Freeway hwy Highway dr Drive +rd Road +road Road ct Court ave Avenue av Avenue diff --git a/nemo_text_processing/text_normalization/es/data/address/__init__.py b/nemo_text_processing/text_normalization/es/data/address/__init__.py new file mode 100644 index 000000000..9e3fb699d --- /dev/null +++ b/nemo_text_processing/text_normalization/es/data/address/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo_text_processing/text_normalization/es/data/address/apt_designator.tsv b/nemo_text_processing/text_normalization/es/data/address/apt_designator.tsv new file mode 100644 index 000000000..8275f42d3 --- /dev/null +++ b/nemo_text_processing/text_normalization/es/data/address/apt_designator.tsv @@ -0,0 +1,6 @@ +Apt. Apartamento +Apt. Apartamento +Dept. Departamento +Dept Departamento +Depto. Departamento +Depto Departamento diff --git a/nemo_text_processing/text_normalization/es/data/address/direction.tsv b/nemo_text_processing/text_normalization/es/data/address/direction.tsv new file mode 100644 index 000000000..a30781406 --- /dev/null +++ b/nemo_text_processing/text_normalization/es/data/address/direction.tsv @@ -0,0 +1,4 @@ +E East +S South +W West +N North diff --git a/nemo_text_processing/text_normalization/es/data/address/po_box.tsv b/nemo_text_processing/text_normalization/es/data/address/po_box.tsv new file mode 100644 index 000000000..84299001b --- /dev/null +++ b/nemo_text_processing/text_normalization/es/data/address/po_box.tsv @@ -0,0 +1 @@ +P.O. Box P.O. Box diff --git a/nemo_text_processing/text_normalization/es/data/address/suite_designator.tsv b/nemo_text_processing/text_normalization/es/data/address/suite_designator.tsv new file mode 100644 index 000000000..9c18d6dd9 --- /dev/null +++ b/nemo_text_processing/text_normalization/es/data/address/suite_designator.tsv @@ -0,0 +1,2 @@ +Ste. Suite +Ste Suite diff --git a/nemo_text_processing/text_normalization/es/data/address/unit_designator.tsv b/nemo_text_processing/text_normalization/es/data/address/unit_designator.tsv new file mode 100644 index 000000000..041a49d05 --- /dev/null +++ b/nemo_text_processing/text_normalization/es/data/address/unit_designator.tsv @@ -0,0 +1 @@ +Unit Unit diff --git a/nemo_text_processing/text_normalization/es/graph_utils.py b/nemo_text_processing/text_normalization/es/graph_utils.py index 946f4234e..2c539fe09 100644 --- a/nemo_text_processing/text_normalization/es/graph_utils.py +++ b/nemo_text_processing/text_normalization/es/graph_utils.py @@ -133,6 +133,21 @@ def strip_cardinal_apocope(fst: "pynini.FstLike") -> "pynini.FstLike": return fst @ strip +def normalize_spanish_cardinal_for_us_address_street(fst: "pynini.FstLike") -> "pynini.FstLike": + """ + Spanish cardinals often apocopate before a following vowel (e.g. ``veintiún``). US street names + are ASCII and usually start with a consonant, but the cardinal FST does not see that context when + materializing digits alone. Normalize common ``…ún`` spoken forms to ``…uno`` / ``… y uno`` for + address surfaces (same intent as ``strip_cardinal_apocope`` but not restricted to string end). + """ + out = fst + out = out @ pynini.cdrewrite(pynini.cross("veintiún", "veintiuno"), "", "", NEMO_SIGMA) + out = out @ pynini.cdrewrite(pynini.cross("treintún", "treinta y uno"), "", "", NEMO_SIGMA) + out = out @ pynini.cdrewrite(pynini.cross(" y ún", " y uno"), "", "", NEMO_SIGMA) + out = out @ pynini.cdrewrite(pynini.cross(" y un", " y uno"), "", "", NEMO_SIGMA) + return strip_cardinal_apocope(out) + + def add_cardinal_apocope_fem(fst: "pynini.FstLike") -> "pynini.FstLike": """ Adds apocope on cardinal strings in line with stressing rules. e.g. "una" -> "un". This only occurs when "una" precedes a stressed "a" sound in formal speech. This is not predictable diff --git a/nemo_text_processing/text_normalization/es/taggers/address.py b/nemo_text_processing/text_normalization/es/taggers/address.py new file mode 100644 index 000000000..ab793b855 --- /dev/null +++ b/nemo_text_processing/text_normalization/es/taggers/address.py @@ -0,0 +1,157 @@ +# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +US-style postal address surface for Spanish TN (embedded in ``MeasureFst`` as +``units: "address_us_es"``). + +Street numbers and ZIP are Spanish; street types, states, and ordinals (e.g. ``42nd``) +use English expansions from shared ``en/data/address/`` lexicons. +""" + +import pynini +from pynini.examples import plurals +from pynini.lib import pynutil + +from nemo_text_processing.text_normalization.en.graph_utils import ( + NEMO_ALPHA, + NEMO_DIGIT, + NEMO_SIGMA, + NEMO_SPACE, + NEMO_UPPER, + GraphFst, + insert_space, +) +from nemo_text_processing.text_normalization.en.taggers.cardinal import CardinalFst as EnCardinalFst +from nemo_text_processing.text_normalization.en.taggers.ordinal import OrdinalFst as OrdinalTagger +from nemo_text_processing.text_normalization.en.taggers.whitelist import get_formats +from nemo_text_processing.text_normalization.en.utils import get_abs_path as en_get_abs_path +from nemo_text_processing.text_normalization.en.utils import load_labels +from nemo_text_processing.text_normalization.en.verbalizers.ordinal import OrdinalFst as OrdinalVerbalizer +from nemo_text_processing.text_normalization.es.graph_utils import normalize_spanish_cardinal_for_us_address_street +from nemo_text_processing.text_normalization.es.utils import get_abs_path + + +class AddressUSSurfaceFst(GraphFst): + """ + Surface FST for US addresses inside Spanish sentences. + + Output is the spoken string stored in ``measure { units: "address_us_es" cardinal { integer: "..." } }``. + Not registered in ``tokenize_and_classify``; consumed by :class:`~nemo_text_processing.text_normalization.es.taggers.measure.MeasureFst`. + + Args: + cardinal: Spanish :class:`~nemo_text_processing.text_normalization.es.taggers.cardinal.CardinalFst` + deterministic: passed to English ordinal/cardinal helpers + """ + + def __init__(self, cardinal: GraphFst, deterministic: bool = True): + super().__init__(name="address_us_es_surface", kind="classify", deterministic=deterministic) + + graph_direction = pynini.string_file(get_abs_path("data/address/direction.tsv")) + graph_zip_digit = pynini.invert( + pynini.string_file(get_abs_path("data/numbers/zero.tsv")) + | pynini.string_file(get_abs_path("data/numbers/digit.tsv")) + ).optimize() + graph_zip_digit @= pynini.cdrewrite(pynini.cross("un", "uno"), "", "", NEMO_SIGMA) + graph_suite_designator = pynini.string_file(get_abs_path("data/address/suite_designator.tsv")) + graph_apt_designator = pynini.string_file(get_abs_path("data/address/apt_designator.tsv")) + graph_unit_designator = pynini.string_file(get_abs_path("data/address/unit_designator.tsv")) + graph_po_box = pynini.string_file(get_abs_path("data/address/po_box.tsv")) + + en_cardinal = EnCardinalFst(deterministic=deterministic) + g = cardinal.graph + + ordinal_en = pynini.compose( + pynutil.insert('integer: "') + OrdinalTagger(cardinal=en_cardinal).graph + pynutil.insert('"'), + OrdinalVerbalizer().graph, + ) + + address_num = NEMO_DIGIT ** (1, 2) @ cardinal.graph_hundreds_component_at_least_one_none_zero_digit + address_num += insert_space + NEMO_DIGIT**2 @ ( + pynini.closure(pynini.cross("0", "cero "), 0, 1) + + cardinal.graph_hundreds_component_at_least_one_none_zero_digit + ) + address_num = pynini.compose(NEMO_DIGIT ** (3, 4), address_num) + address_num = normalize_spanish_cardinal_for_us_address_street( + plurals._priority_union(address_num, g, NEMO_SIGMA).optimize() + ) + + direction = pynini.closure( + pynini.accep(NEMO_SPACE) + graph_direction + pynini.closure(pynutil.delete("."), 0, 1), + 0, + 1, + ) + + address_words = get_formats(en_get_abs_path("data/address/address_word.tsv")) + street = ( + pynini.accep(NEMO_SPACE) + + (pynini.closure(ordinal_en, 0, 1) | NEMO_UPPER + pynini.closure(NEMO_ALPHA, 1)) + + NEMO_SPACE + + pynini.closure(NEMO_UPPER + pynini.closure(NEMO_ALPHA) + NEMO_SPACE) + + address_words + ) + + zip_five = (pynini.closure(graph_zip_digit + insert_space, 4) + graph_zip_digit).optimize() + + city = pynini.closure(NEMO_ALPHA | pynini.accep(NEMO_SPACE), 1) + city = pynini.closure(pynini.accep(",") + pynini.accep(NEMO_SPACE) + city, 0, 1) + + states = load_labels(en_get_abs_path("data/address/state.tsv")) + states_extra = [(x, f"{y[0]}.{y[1:]}") for x, y in states] + states.extend(states_extra) + state = pynini.closure( + pynini.accep(",") + pynini.accep(NEMO_SPACE) + pynini.invert(pynini.string_map(states)), 0, 1 + ) + + zip_code = pynini.closure( + pynini.closure(pynini.accep(","), 0, 1) + pynini.accep(NEMO_SPACE) + zip_five, + 0, + 1, + ) + tail = pynini.closure(city + state + zip_code, 0, 1).optimize() + + suite_num = normalize_spanish_cardinal_for_us_address_street((pynini.closure(NEMO_DIGIT, 1, 4) @ g).optimize()) + unit_num = normalize_spanish_cardinal_for_us_address_street((pynini.closure(NEMO_DIGIT, 1, 3) @ g).optimize()) + apt_char = graph_zip_digit | NEMO_UPPER + apt_num = (apt_char + pynini.closure(insert_space + apt_char, 0, 3)).optimize() + + comma_sp = pynini.accep(",") + pynini.accep(NEMO_SPACE) + suite = graph_suite_designator + pynini.closure(NEMO_SPACE, 0, 1) + suite_num + apt = graph_apt_designator + pynini.closure(NEMO_SPACE, 0, 1) + apt_num + unit = graph_unit_designator + unit_num + middle = pynini.closure(comma_sp + (suite | apt | unit), 0, 3).optimize() + + po_box = ( + graph_po_box + + normalize_spanish_cardinal_for_us_address_street(pynini.closure(NEMO_DIGIT, 1, 4) @ g) + + tail + ).optimize() + + standard = address_num + direction + street + middle + tail + hyphen = pynini.accep("-") + alpha_chars = NEMO_ALPHA | hyphen + standard_eos = ( + address_num + + direction + + street + + middle + + pynini.accep(".") + + pynini.closure(NEMO_SPACE, 1, 2) + + NEMO_UPPER + + pynini.closure(alpha_chars) + ) + standard |= pynutil.add_weight(standard_eos, -0.001) + standard |= address_num + direction + street + middle + pynini.closure(pynini.cross(".", ""), 0, 1) + + self.graph = (po_box | standard.optimize()).optimize() diff --git a/nemo_text_processing/text_normalization/es/taggers/cardinal.py b/nemo_text_processing/text_normalization/es/taggers/cardinal.py index 85402089f..371a48d2e 100644 --- a/nemo_text_processing/text_normalization/es/taggers/cardinal.py +++ b/nemo_text_processing/text_normalization/es/taggers/cardinal.py @@ -169,6 +169,10 @@ def __init__(self, deterministic: bool = True): self.graph = filter_punctuation(self.graph).optimize() + self.graph_hundreds_component_at_least_one_none_zero_digit = ( + graph_hundreds_component_at_least_one_none_zero_digit.optimize() + ) + optional_minus_graph = pynini.closure(pynutil.insert("negative: ") + pynini.cross("-", "\"true\" "), 0, 1) final_graph = optional_minus_graph + pynutil.insert("integer: \"") + self.graph + pynutil.insert("\"") diff --git a/nemo_text_processing/text_normalization/es/taggers/measure.py b/nemo_text_processing/text_normalization/es/taggers/measure.py index a63677c47..bc1e0c0c8 100644 --- a/nemo_text_processing/text_normalization/es/taggers/measure.py +++ b/nemo_text_processing/text_normalization/es/taggers/measure.py @@ -25,6 +25,7 @@ insert_space, ) from nemo_text_processing.text_normalization.es.graph_utils import strip_cardinal_apocope +from nemo_text_processing.text_normalization.es.taggers.address import AddressUSSurfaceFst from nemo_text_processing.text_normalization.es.utils import get_abs_path unit = pynini.string_file(get_abs_path("data/measures/measurements.tsv")) @@ -199,6 +200,13 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst, fraction: GraphFst, de + pynutil.insert("\" } preserve_order: true") ) + address_us_es_inner = AddressUSSurfaceFst(cardinal, deterministic=deterministic).graph + address_us_es = ( + pynutil.insert('units: "address_us_es" cardinal { integer: "') + + address_us_es_inner + + pynutil.insert('" } preserve_order: true') + ) + final_graph = ( subgraph_decimal | subgraph_cardinal @@ -210,6 +218,7 @@ def __init__(self, cardinal: GraphFst, decimal: GraphFst, fraction: GraphFst, de | cardinal_times | alpha_dash_decimal | math + | address_us_es ) final_graph = self.add_tokens(final_graph) diff --git a/nemo_text_processing/text_normalization/es/verbalizers/measure.py b/nemo_text_processing/text_normalization/es/verbalizers/measure.py index de877446d..cb15e7a12 100644 --- a/nemo_text_processing/text_normalization/es/verbalizers/measure.py +++ b/nemo_text_processing/text_normalization/es/verbalizers/measure.py @@ -23,6 +23,7 @@ delete_extra_space, delete_preserve_order, delete_space, + insert_space, ) from nemo_text_processing.text_normalization.es.graph_utils import ones from nemo_text_processing.text_normalization.es.utils import get_abs_path @@ -65,12 +66,20 @@ def __init__(self, decimal: GraphFst, cardinal: GraphFst, fraction: GraphFst, de NEMO_WHITE_SPACE + "por" + pynini.closure(NEMO_NOT_QUOTE, 1), 0, 1 ) unit_masc |= "por" + pynini.closure(NEMO_NOT_QUOTE, 1) - unit_masc = pynutil.delete("units: \"") + (pynini.closure(NEMO_NOT_QUOTE) @ unit_masc) + pynutil.delete("\"") + unit_masc = ( + pynutil.delete("units: \"") + + (pynini.difference(pynini.closure(NEMO_NOT_QUOTE, 1), pynini.union("math", "address_us_es")) @ unit_masc) + + pynutil.delete("\"") + ) unit_fem = (unit_plural_fem | unit_singular_fem) + pynini.closure( NEMO_WHITE_SPACE + "por" + pynini.closure(NEMO_NOT_QUOTE, 1), 0, 1 ) - unit_fem = pynutil.delete("units: \"") + (pynini.closure(NEMO_NOT_QUOTE) @ unit_fem) + pynutil.delete("\"") + unit_fem = ( + pynutil.delete("units: \"") + + (pynini.difference(pynini.closure(NEMO_NOT_QUOTE, 1), pynini.union("math", "address_us_es")) @ unit_fem) + + pynutil.delete("\"") + ) graph_masc = (graph_cardinal_masc | graph_decimal_masc) + NEMO_WHITE_SPACE + unit_masc graph_masc |= graph_fraction_masc + NEMO_WHITE_SPACE + pynutil.insert("de ") + unit_masc @@ -96,7 +105,11 @@ def __init__(self, decimal: GraphFst, cardinal: GraphFst, fraction: GraphFst, de graph @= pynini.cdrewrite(pynini.cross(ones, "uno"), "", NEMO_WHITE_SPACE + "por", NEMO_SIGMA) # To manage alphanumeric combonations ("a-8, 5x"), we let them use a weighted default path. - alpha_num_unit = pynutil.delete("units: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"") + alpha_num_unit = ( + pynutil.delete("units: \"") + + pynini.difference(pynini.closure(NEMO_NOT_QUOTE), pynini.union("math", "address_us_es")) + + pynutil.delete("\"") + ) graph_alpha_num = pynini.union( (graph_cardinal_masc | graph_decimal_masc) + NEMO_SPACE + alpha_num_unit, alpha_num_unit + delete_extra_space + (graph_cardinal_masc | graph_decimal_masc), @@ -106,8 +119,23 @@ def __init__(self, decimal: GraphFst, cardinal: GraphFst, fraction: GraphFst, de pynutil.delete("units: \"math\"") + delete_space + graph_cardinal_masc + delete_space, -1 ) + preserve_order_tail = pynutil.delete("preserve_order:") + delete_space + pynutil.delete("true") + delete_space + address_us_es = ( + pynutil.delete('units: "address_us_es" ') + + delete_space + + pynutil.delete("cardinal { integer: \"") + + delete_space + + pynini.closure(NEMO_NOT_QUOTE) + + pynutil.delete("\"") + + delete_space + + pynutil.delete("}") + + delete_space + + pynini.closure(preserve_order_tail) + ) + graph |= pynutil.add_weight(graph_alpha_num, 0.01) graph |= math + graph |= address_us_es graph += delete_preserve_order diff --git a/tests/nemo_text_processing/es/data_text_normalization/test_cases_address.txt b/tests/nemo_text_processing/es/data_text_normalization/test_cases_address.txt new file mode 100644 index 000000000..c0badfb20 --- /dev/null +++ b/tests/nemo_text_processing/es/data_text_normalization/test_cases_address.txt @@ -0,0 +1,10 @@ +Mi dirección es 1234 Maple St., Springfield, IL 62704~Mi dirección es mil doscientos treinta y cuatro Maple Street, Springfield, Illinois seis dos siete cero cuatro +La oficina está ubicada en 567 Main St., Ste. 200, Dallas, TX 75201~La oficina está ubicada en quinientos sesenta y siete Main Street, Suite doscientos, Dallas, Texas siete cinco dos cero uno +Por favor envía el paquete a 890 Oak Ave., Apt. 5B, Brooklyn, NY 11201~Por favor envía el paquete a ochocientos noventa Oak Avenue, Apartamento cinco B, Brooklyn, New York uno uno dos cero uno +Vivo en 4321 Sunset Blvd., Los Angeles, CA 90028, cerca del centro~Vivo en cuatro mil trescientos veintiuno Sunset Boulevard, Los Angeles, California nueve cero cero dos ocho, cerca del centro +La nueva tienda abrirá en 100 Market Rd., San Francisco, CA 94105~La nueva tienda abrirá en cien Market Road, San Francisco, California nueve cuatro uno cero cinco +Su casa queda en 25 W 42nd St., New York, NY 10036~Su casa queda en veinticinco West forty second Street, New York, New York uno cero cero tres seis +El hospital se encuentra en 7890 Lincoln Dr., Miami, FL 33133~El hospital se encuentra en siete mil ochocientos noventa Lincoln Drive, Miami, Florida tres tres uno tres tres +Mándame la carta a P.O. Box 456, Austin, TX 78701, por favor~Mándame la carta a P.O. Box cuatrocientos cincuenta y seis, Austin, Texas siete ocho siete cero uno, por favor +La escuela de mis hijos está en 321 Elm St., Boston, MA 02108~La escuela de mis hijos está en trescientos veintiuno Elm Street, Boston, Massachusetts cero dos uno cero ocho +Nos mudamos a 9876 Pine Rd., Unit 12, Seattle, WA 98101 el mes pasado~Nos mudamos a nueve mil ochocientos setenta y seis Pine Road, Unit doce, Seattle, Washington nueve ocho uno cero uno el mes pasado diff --git a/tests/nemo_text_processing/es/data_text_normalization/test_cases_measure.txt b/tests/nemo_text_processing/es/data_text_normalization/test_cases_measure.txt index 092dcbc33..2f8d33fb4 100644 --- a/tests/nemo_text_processing/es/data_text_normalization/test_cases_measure.txt +++ b/tests/nemo_text_processing/es/data_text_normalization/test_cases_measure.txt @@ -29,4 +29,4 @@ a-5~a cinco -8° c~menos ocho grados centígrados 40 ° k~cuarenta grados kelvin 180 psi~ciento ochenta p s i -2 + 2 - 1 = 3~dos más dos menos uno es igual a tres \ No newline at end of file +2 + 2 - 1 = 3~dos más dos menos uno es igual a tres diff --git a/tests/nemo_text_processing/es/test_address.py b/tests/nemo_text_processing/es/test_address.py new file mode 100644 index 000000000..71d3d5097 --- /dev/null +++ b/tests/nemo_text_processing/es/test_address.py @@ -0,0 +1,47 @@ +# Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from parameterized import parameterized + +from nemo_text_processing.text_normalization.normalize import Normalizer +from nemo_text_processing.text_normalization.normalize_with_audio import NormalizerWithAudio + +from ..utils import CACHE_DIR, RUN_AUDIO_BASED_TESTS, parse_test_case_file + + +class TestAddress: + normalizer_es = Normalizer(input_case='cased', lang='es', cache_dir=CACHE_DIR, overwrite_cache=False) + + normalizer_with_audio_es = ( + NormalizerWithAudio(input_case='cased', lang='es', cache_dir=CACHE_DIR, overwrite_cache=False) + if RUN_AUDIO_BASED_TESTS + else None + ) + + # Spanish US-address code-switching is tagged by the measure class. + @parameterized.expand(parse_test_case_file('es/data_text_normalization/test_cases_address.txt')) + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_norm(self, test_input, expected): + pred = self.normalizer_es.normalize(test_input, verbose=False, punct_post_process=True) + assert pred == expected + + if self.normalizer_with_audio_es: + pred_non_deterministic = self.normalizer_with_audio_es.normalize( + test_input, + n_tagged=500, + punct_post_process=False, + ) + assert expected in pred_non_deterministic diff --git a/tests/nemo_text_processing/es/test_measure.py b/tests/nemo_text_processing/es/test_measure.py index 572c88d03..6bb048bce 100644 --- a/tests/nemo_text_processing/es/test_measure.py +++ b/tests/nemo_text_processing/es/test_measure.py @@ -58,7 +58,7 @@ def test_denorm(self, test_input, expected): @pytest.mark.run_only_on('CPU') @pytest.mark.unit def test_norm(self, test_input, expected): - pred = self.normalizer.normalize(test_input, verbose=False) + pred = self.normalizer.normalize(test_input, verbose=False, punct_post_process=True) assert pred in expected if self.normalizer_with_audio: diff --git a/tests/nemo_text_processing/es/test_sparrowhawk_normalization.sh b/tests/nemo_text_processing/es/test_sparrowhawk_normalization.sh index 102d7e04f..66549515c 100644 --- a/tests/nemo_text_processing/es/test_sparrowhawk_normalization.sh +++ b/tests/nemo_text_processing/es/test_sparrowhawk_normalization.sh @@ -77,6 +77,11 @@ testTNMeasure() { runtest $input } +testTNAddress() { + input=$PROJECT_DIR/es/data_text_normalization/test_cases_address.txt + runtest $input +} + testTNWhitelist() { input=$PROJECT_DIR/es/data_text_normalization/test_cases_whitelist.txt runtest $input