mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-22 16:04:18 +00:00
59 lines
1.7 KiB
Python
59 lines
1.7 KiB
Python
|
import nltk
|
||
|
import nltk.classify
|
||
|
import nltk.sentiment
|
||
|
import nltk.sentiment.util
|
||
|
import logging
|
||
|
|
||
|
from ..database import mongo_reviews_collection_from_config, get_reviews_training_set, get_reviews_test_set
|
||
|
|
||
|
|
||
|
log = logging.getLogger(__name__)
|
||
|
|
||
|
|
||
|
def create_model_vanilla() -> nltk.sentiment.SentimentAnalyzer:
|
||
|
log.debug("Creating model...")
|
||
|
model = nltk.sentiment.SentimentAnalyzer()
|
||
|
log.debug("Created model %s!", model)
|
||
|
return model
|
||
|
|
||
|
|
||
|
def train_model_vanilla(model: nltk.sentiment.SentimentAnalyzer) -> None:
|
||
|
# TODO: This doesn't work yet
|
||
|
|
||
|
with mongo_reviews_collection_from_config() as reviews:
|
||
|
training_set = get_reviews_training_set(reviews)
|
||
|
|
||
|
log.debug("Marking negations...")
|
||
|
training_negated_set = list(map(nltk.sentiment.util.mark_negation, training_set))
|
||
|
|
||
|
log.debug("Extracting tokens...")
|
||
|
training_tokens = model.all_words(training_negated_set, labeled=False)
|
||
|
|
||
|
log.debug("Counting unigrams...")
|
||
|
training_unigrams = model.unigram_word_feats(words=training_tokens, min_freq=4)
|
||
|
|
||
|
log.debug("Configuring model features...")
|
||
|
model.add_feat_extractor(nltk.sentiment.util.extract_unigram_feats, unigrams=training_unigrams)
|
||
|
training_set = model.apply_features(documents=training_set)
|
||
|
|
||
|
log.info("Training model...")
|
||
|
model.train(trainer=nltk.classify.NaiveBayesClassifier.train, training_set=training_set)
|
||
|
|
||
|
|
||
|
def evaluate_model_vanilla(model: nltk.sentiment.SentimentAnalyzer):
|
||
|
with mongo_reviews_collection_from_config() as reviews:
|
||
|
test_set = get_reviews_test_set(reviews)
|
||
|
|
||
|
log.info("Evaluating model...")
|
||
|
model.evaluate(test_set)
|
||
|
|
||
|
# TODO
|
||
|
breakpoint()
|
||
|
|
||
|
|
||
|
__all__ = (
|
||
|
"create_model_vanilla",
|
||
|
"train_model_vanilla",
|
||
|
"evaluate_model_vanilla",
|
||
|
)
|