mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-25 09:14:19 +00:00
17 lines
441 B
Python
17 lines
441 B
Python
import tensorflow
|
|
|
|
from .base import BaseTokenizer
|
|
|
|
|
|
class LowercaseTokenizer(BaseTokenizer):
|
|
"""
|
|
Tokenizer which converts the words to lowercase before splitting them via spaces.
|
|
"""
|
|
|
|
def tokenize_plain(self, text: str) -> str:
|
|
text = text.lower()
|
|
return text
|
|
|
|
def tokenize_tensorflow(self, text: tensorflow.Tensor) -> tensorflow.Tensor:
|
|
text = tensorflow.strings.lower(text)
|
|
return text
|