1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-22 16:04:18 +00:00
bda-6-steffo/unimore_bda_6/tokenizer/base.py

18 lines
383 B
Python
Raw Normal View History

2023-02-03 22:27:44 +00:00
import abc
class BaseTokenizer(metaclass=abc.ABCMeta):
"""
The base for all tokenizers in this project.
"""
def __repr__(self):
return f"{self.__class__.__qualname__}()"
@abc.abstractmethod
def tokenize(self, text: str) -> list[str]:
"""
Convert a text string into a list of tokens.
"""
raise NotImplementedError()