1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-22 07:54:19 +00:00

Move language VanillaSA parameter to _tokenize_text

This commit is contained in:
Steffo 2023-02-02 04:26:20 +01:00
parent aa980012d7
commit c212be37c3
Signed by: steffo
GPG key ID: 2A24051445686895

View file

@ -18,16 +18,15 @@ class VanillaSA(BaseSA, metaclass=abc.ABCMeta):
A sentiment analyzer resembling the one implemented in structure the one implemented in the classroom, using the basic sentiment analyzer of NLTK. A sentiment analyzer resembling the one implemented in structure the one implemented in the classroom, using the basic sentiment analyzer of NLTK.
""" """
def __init__(self, language="english") -> None: def __init__(self) -> None:
super().__init__() super().__init__()
self.language: str = language
self.model: nltk.sentiment.SentimentAnalyzer = nltk.sentiment.SentimentAnalyzer() self.model: nltk.sentiment.SentimentAnalyzer = nltk.sentiment.SentimentAnalyzer()
def _tokenize_text(self, text: str) -> list[str]: def _tokenize_text(self, text: str, language: str = "english") -> list[str]:
""" """
Convert a text string into a list of tokens, using the language of the model. Convert a text string into a list of tokens, using the language of the model.
""" """
tokens = nltk.word_tokenize(text, language=self.language) tokens = nltk.word_tokenize(text, language=language)
nltk.sentiment.util.mark_negation(tokens, shallow=True) nltk.sentiment.util.mark_negation(tokens, shallow=True)
return tokens return tokens