From b860b2f7ed1cd2f1ca66a95f3c063c4fc0f35a41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Romain=20Gon=C3=A7alves?= Date: Thu, 8 Feb 2024 13:25:51 +0100 Subject: feat(filter_plugins): normalize special character to ascii characters --- filter_plugins/core.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 filter_plugins/core.py (limited to 'filter_plugins') diff --git a/filter_plugins/core.py b/filter_plugins/core.py new file mode 100644 index 0000000..74d6a72 --- /dev/null +++ b/filter_plugins/core.py @@ -0,0 +1,19 @@ +import unicodedata +from typing import Callable + + +def normalize_unicode_to_ansii(data: str) -> str: + """Returns an UTF-8 normalized string without unicode characters.""" + return ( + unicodedata.normalize("NFD", data) + .encode("ascii", errors="ignore") + .decode("utf-8") + ) + + +class FilterModule(object): + + def filters(self) -> dict[str, Callable]: + return { + "normalize_unicode_to_ansii": normalize_unicode_to_ansii + } -- cgit v1.2.3