From c212be37c3a4968140c9df670e80af9e8b911f0e Mon Sep 17 00:00:00 2001 From: Stefano Pigozzi Date: Thu, 2 Feb 2023 04:26:20 +0100 Subject: [PATCH] Move `language` VanillaSA parameter to `_tokenize_text` --- unimore_bda_6/analysis/vanilla.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/unimore_bda_6/analysis/vanilla.py b/unimore_bda_6/analysis/vanilla.py index 9b3406b..403b0b0 100644 --- a/unimore_bda_6/analysis/vanilla.py +++ b/unimore_bda_6/analysis/vanilla.py @@ -18,16 +18,15 @@ class VanillaSA(BaseSA, metaclass=abc.ABCMeta): A sentiment analyzer resembling the one implemented in structure the one implemented in the classroom, using the basic sentiment analyzer of NLTK. """ - def __init__(self, language="english") -> None: + def __init__(self) -> None: super().__init__() - self.language: str = language self.model: nltk.sentiment.SentimentAnalyzer = nltk.sentiment.SentimentAnalyzer() - def _tokenize_text(self, text: str) -> list[str]: + def _tokenize_text(self, text: str, language: str = "english") -> list[str]: """ Convert a text string into a list of tokens, using the language of the model. """ - tokens = nltk.word_tokenize(text, language=self.language) + tokens = nltk.word_tokenize(text, language=language) nltk.sentiment.util.mark_negation(tokens, shallow=True) return tokens