1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-25 09:14:19 +00:00
bda-6-steffo/unimore_bda_6/tokenizer/plain.py

15 lines
365 B
Python

import tensorflow
from .base import BaseTokenizer
class PlainTokenizer(BaseTokenizer):
"""
Tokenizer which just splits the text into tokens by separating them at whitespaces.
"""
def tokenize_plain(self, text: str) -> str:
return text
def tokenize_tensorflow(self, text: tensorflow.Tensor) -> tensorflow.Tensor:
return text