1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-23 00:14:19 +00:00
bda-6-steffo/unimore_bda_6/tokenizer/lower.py

18 lines
441 B
Python
Raw Normal View History

2023-02-04 00:36:42 +00:00
import tensorflow
from .base import BaseTokenizer
class LowercaseTokenizer(BaseTokenizer):
2023-02-08 18:46:05 +00:00
"""
Tokenizer which converts the words to lowercase before splitting them via spaces.
"""
2023-02-10 04:12:07 +00:00
def tokenize_plain(self, text: str) -> str:
text = text.lower()
return text
2023-02-04 00:36:42 +00:00
def tokenize_tensorflow(self, text: tensorflow.Tensor) -> tensorflow.Tensor:
text = tensorflow.strings.lower(text)
return text