1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-25 09:14:19 +00:00
bda-6-steffo/unimore_bda_6/tokenization/nltk_based.py

16 lines
294 B
Python

import nltk
import nltk.sentiment.util
def tokenizer(text: str) -> list[str]:
"""
Convert a text string into a list of tokens.
"""
tokens = nltk.word_tokenize(text)
nltk.sentiment.util.mark_negation(tokens, shallow=True)
return tokens
__all__ = (
"tokenizer",
)