1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-21 23:44:19 +00:00

Move language VanillaSA parameter to _tokenize_text

This commit is contained in:
Steffo 2023-02-02 04:26:20 +01:00
parent aa980012d7
commit c212be37c3
Signed by: steffo
GPG key ID: 2A24051445686895

View file

@ -18,16 +18,15 @@ class VanillaSA(BaseSA, metaclass=abc.ABCMeta):
A sentiment analyzer resembling the one implemented in structure the one implemented in the classroom, using the basic sentiment analyzer of NLTK.
"""
def __init__(self, language="english") -> None:
def __init__(self) -> None:
super().__init__()
self.language: str = language
self.model: nltk.sentiment.SentimentAnalyzer = nltk.sentiment.SentimentAnalyzer()
def _tokenize_text(self, text: str) -> list[str]:
def _tokenize_text(self, text: str, language: str = "english") -> list[str]:
"""
Convert a text string into a list of tokens, using the language of the model.
"""
tokens = nltk.word_tokenize(text, language=self.language)
tokens = nltk.word_tokenize(text, language=language)
nltk.sentiment.util.mark_negation(tokens, shallow=True)
return tokens