1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-21 23:44:19 +00:00

Implement basic Potts sentiment analyzer

This commit is contained in:
Steffo 2023-02-02 04:34:05 +01:00
parent e2b9133bd5
commit ab5f12f8fc
Signed by: steffo
GPG key ID: 2A24051445686895
2 changed files with 42 additions and 9 deletions

View file

@ -1,23 +1,26 @@
import logging
from .config import config
from .database import mongo_reviews_collection_from_config, get_training_reviews, get_test_reviews
from .analysis.vanilla import VanillaReviewSA
from .analysis.potts import PottsReviewSA
from .log import install_log_handler
log = logging.getLogger(__name__)
def main():
with mongo_reviews_collection_from_config() as reviews:
training_reviews = get_training_reviews(collection=reviews)
test_reviews = get_test_reviews(collection=reviews)
model = VanillaReviewSA()
model.train(training_reviews)
vanilla = VanillaReviewSA()
vanilla.train(training_reviews)
log.info("Vanilla evaluation results: %s", vanilla.evaluate(test_reviews))
evaluation = model.evaluate(test_reviews)
print(evaluation)
while True:
classification = model.use(input())
print(classification)
potts = PottsReviewSA()
potts.train(training_reviews)
log.info("Potts evaluation results: %s", potts.evaluate(test_reviews))
if __name__ == "__main__":

View file

@ -0,0 +1,30 @@
from ..vendor.potts import Tokenizer
from .vanilla import VanillaSA, VanillaReviewSA
class PottsSA(VanillaSA):
"""
A sentiment analyzer using Potts' tokenizer.
"""
def __init__(self) -> None:
super().__init__()
def _tokenize_text(self, text: str) -> list[str]:
"""
Convert a text string into a list of tokens, using the language of the model.
"""
tokenizer: Tokenizer = Tokenizer(preserve_case=False)
return list(tokenizer.tokenize(text))
class PottsReviewSA(VanillaReviewSA, PottsSA):
"""
A `PottsSA` to be used with `Review`s.
"""
__all__ = (
"PottsSA",
"PottsReviewSA",
)