1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-29 03:04:18 +00:00
bda-6-steffo/unimore_bda_6/tokenization/nltk_based.py

16 lines
304 B
Python

import nltk
import nltk.sentiment.util
def nltk_tokenizer(text: str) -> list[str]:
"""
Convert a text string into a list of tokens.
"""
tokens = nltk.word_tokenize(text)
nltk.sentiment.util.mark_negation(tokens, shallow=True)
return tokens
__all__ = (
"nltk_tokenizer",
)