mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-24 08:44:19 +00:00
14 lines
338 B
Python
14 lines
338 B
Python
import typing as t
|
|
|
|
from .base import BaseTokenizer
|
|
|
|
|
|
class LowercaseTokenizer(BaseTokenizer):
|
|
"""
|
|
Tokenizer which converts the words to lowercase before splitting them with `str.split`.
|
|
"""
|
|
|
|
def tokenize(self, text: str) -> t.Iterator[str]:
|
|
text = text.lower()
|
|
tokens = text.split()
|
|
return tokens
|