mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-22 16:04:18 +00:00
18 lines
383 B
Python
18 lines
383 B
Python
|
import abc
|
||
|
|
||
|
|
||
|
class BaseTokenizer(metaclass=abc.ABCMeta):
|
||
|
"""
|
||
|
The base for all tokenizers in this project.
|
||
|
"""
|
||
|
|
||
|
def __repr__(self):
|
||
|
return f"{self.__class__.__qualname__}()"
|
||
|
|
||
|
@abc.abstractmethod
|
||
|
def tokenize(self, text: str) -> list[str]:
|
||
|
"""
|
||
|
Convert a text string into a list of tokens.
|
||
|
"""
|
||
|
raise NotImplementedError()
|