1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-26 17:54:20 +00:00
bda-6-steffo/unimore_bda_6/tokenizer/lower.py
Stefano Pigozzi 3abba24ca2
Made good progress
How does text vectorization in tensorflow work?
2023-02-05 17:40:22 +01:00

13 lines
398 B
Python

import tensorflow
from .base import BaseTokenizer
class LowercaseTokenizer(BaseTokenizer):
def tokenize_builtins(self, text: str) -> list[str]:
return text.lower().split()
def tokenize_tensorflow(self, text: tensorflow.Tensor) -> tensorflow.Tensor:
text = tensorflow.strings.lower(text)
text = tensorflow.expand_dims(text, -1, name="tokens")
return text