1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-22 07:54:19 +00:00
bda-6-steffo/unimore_bda_6/__main__.py
2023-02-08 19:46:05 +01:00

81 lines
3.1 KiB
Python

import logging
import pymongo.errors
from .log import install_log_handler
install_log_handler()
from .config import config
from .database import mongo_client_from_config, reviews_collection, sample_reviews_polar, sample_reviews_varied
from .analysis.nltk_sentiment import NLTKSentimentAnalyzer
from .analysis.tf_text import TensorflowSentimentAnalyzer
from .analysis.base import TrainingFailedError
from .tokenizer import PlainTokenizer, LowercaseTokenizer, NLTKWordTokenizer, PottsTokenizer, PottsTokenizerWithNegation
from .gathering import Caches
log = logging.getLogger(__name__)
def main():
log.info("Started unimore-bda-6 in %s mode!", "DEBUG" if __debug__ else "PRODUCTION")
log.debug("Validating configuration...")
config.proxies.resolve()
log.debug("Ensuring there are no leftover caches...")
Caches.ensure_clean()
with mongo_client_from_config() as db:
try:
db.admin.command("ping")
except pymongo.errors.ServerSelectionTimeoutError:
log.fatal("MongoDB database is not available, exiting...")
exit(1)
reviews = reviews_collection(db)
for sample_func in [sample_reviews_varied, sample_reviews_polar]:
for SentimentAnalyzer in [
TensorflowSentimentAnalyzer,
NLTKSentimentAnalyzer
]:
for Tokenizer in [
PlainTokenizer,
LowercaseTokenizer,
NLTKWordTokenizer,
PottsTokenizer,
PottsTokenizerWithNegation,
]:
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
while True:
try:
slog.debug("Creating sentiment analyzer...")
sa = SentimentAnalyzer(tokenizer=Tokenizer())
except TypeError:
slog.warning("%s does not support %s, skipping...", Tokenizer.__name__, SentimentAnalyzer.__name__)
break
with Caches.from_database_samples(collection=reviews, sample_func=sample_func) as datasets:
try:
slog.info("Training sentiment analyzer: %s", sa)
sa.train(training_dataset_func=datasets.training, validation_dataset_func=datasets.validation)
except TrainingFailedError:
slog.error("Training failed, trying again with a different dataset...")
continue
else:
slog.info("Training succeeded!")
slog.info("Evaluating sentiment analyzer: %s", sa)
evaluation_results = sa.evaluate(evaluation_dataset_func=datasets.evaluation)
slog.info("Evaluation results: %s", evaluation_results)
break
if __name__ == "__main__":
main()