mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-25 17:24:20 +00:00
17 lines
442 B
Python
17 lines
442 B
Python
|
import tensorflow
|
||
|
|
||
|
from .base import BaseTokenizer
|
||
|
|
||
|
|
||
|
class PlainTokenizer(BaseTokenizer):
|
||
|
"""
|
||
|
Tokenizer which just splits the text into tokens by separating them at whitespaces.
|
||
|
"""
|
||
|
|
||
|
def tokenize_plain(self, text: str) -> list[str]:
|
||
|
return text.split()
|
||
|
|
||
|
def tokenize_tensorflow(self, text: tensorflow.Tensor) -> tensorflow.Tensor:
|
||
|
text = tensorflow.expand_dims(text, -1, name="tokens")
|
||
|
return text
|