1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-12-01 12:14:19 +00:00
bda-6-steffo/unimore_bda_6/tokenization/nltk_based.py

17 lines
304 B
Python
Raw Normal View History

2023-02-02 16:24:11 +00:00
import nltk
import nltk.sentiment.util
2023-02-03 01:10:00 +00:00
def nltk_tokenizer(text: str) -> list[str]:
2023-02-02 16:24:11 +00:00
"""
Convert a text string into a list of tokens.
"""
tokens = nltk.word_tokenize(text)
nltk.sentiment.util.mark_negation(tokens, shallow=True)
return tokens
__all__ = (
2023-02-03 01:10:00 +00:00
"nltk_tokenizer",
2023-02-02 16:24:11 +00:00
)