Move language VanillaSA parameter to _tokenize_text

2024-11-22 07:54:19 +00:00 · 2023-02-02 04:26:20 +01:00 · 2023-02-02 04:26:20 +01:00 · c212be37c3
commit c212be37c3
parent aa980012d7
1 changed files with 3 additions and 4 deletions
--- a/unimore_bda_6/analysis/vanilla.py
+++ b/unimore_bda_6/analysis/vanilla.py
@ -18,16 +18,15 @@ class VanillaSA(BaseSA, metaclass=abc.ABCMeta):
    A sentiment analyzer resembling the one implemented in structure the one implemented in the classroom, using the basic sentiment analyzer of NLTK.
    """

-    def __init__(self, language="english") -> None:
+    def __init__(self) -> None:
        super().__init__()
-        self.language: str = language
        self.model: nltk.sentiment.SentimentAnalyzer = nltk.sentiment.SentimentAnalyzer()

-    def _tokenize_text(self, text: str) -> list[str]:
+    def _tokenize_text(self, text: str, language: str = "english") -> list[str]:
        """
        Convert a text string into a list of tokens, using the language of the model.
        """
-        tokens = nltk.word_tokenize(text, language=self.language)
+        tokens = nltk.word_tokenize(text, language=language)
        nltk.sentiment.util.mark_negation(tokens, shallow=True)
        return tokens