mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-25 09:14:19 +00:00
17 lines
294 B
Python
17 lines
294 B
Python
|
import nltk
|
||
|
import nltk.sentiment.util
|
||
|
|
||
|
|
||
|
def tokenizer(text: str) -> list[str]:
|
||
|
"""
|
||
|
Convert a text string into a list of tokens.
|
||
|
"""
|
||
|
tokens = nltk.word_tokenize(text)
|
||
|
nltk.sentiment.util.mark_negation(tokens, shallow=True)
|
||
|
return tokens
|
||
|
|
||
|
|
||
|
__all__ = (
|
||
|
"tokenizer",
|
||
|
)
|