1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-23 00:14:19 +00:00
bda-6-steffo/unimore_bda_6/analysis/vanilla.py

58 lines
1.7 KiB
Python

import nltk
import nltk.classify
import nltk.sentiment
import nltk.sentiment.util
import logging
from ..database import mongo_reviews_collection_from_config, get_reviews_training_set, get_reviews_test_set
log = logging.getLogger(__name__)
def create_model_vanilla() -> nltk.sentiment.SentimentAnalyzer:
log.debug("Creating model...")
model = nltk.sentiment.SentimentAnalyzer()
log.debug("Created model %s!", model)
return model
def train_model_vanilla(model: nltk.sentiment.SentimentAnalyzer) -> None:
# TODO: This doesn't work yet
with mongo_reviews_collection_from_config() as reviews:
training_set = get_reviews_training_set(reviews)
log.debug("Marking negations...")
training_negated_set = list(map(nltk.sentiment.util.mark_negation, training_set))
log.debug("Extracting tokens...")
training_tokens = model.all_words(training_negated_set, labeled=False)
log.debug("Counting unigrams...")
training_unigrams = model.unigram_word_feats(words=training_tokens, min_freq=4)
log.debug("Configuring model features...")
model.add_feat_extractor(nltk.sentiment.util.extract_unigram_feats, unigrams=training_unigrams)
training_set = model.apply_features(documents=training_set)
log.info("Training model...")
model.train(trainer=nltk.classify.NaiveBayesClassifier.train, training_set=training_set)
def evaluate_model_vanilla(model: nltk.sentiment.SentimentAnalyzer):
with mongo_reviews_collection_from_config() as reviews:
test_set = get_reviews_test_set(reviews)
log.info("Evaluating model...")
model.evaluate(test_set)
# TODO
breakpoint()
__all__ = (
"create_model_vanilla",
"train_model_vanilla",
"evaluate_model_vanilla",
)