1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-21 15:34:18 +00:00

Write results to a ./data/logs/results.tsv file as well

This commit is contained in:
Steffo 2023-05-08 04:16:32 +02:00
parent b4cc6f8707
commit 8d831afbe3
Signed by: steffo
GPG key ID: 2A24051445686895
2 changed files with 93 additions and 78 deletions

View file

@ -24,101 +24,112 @@ def main():
log.debug("Ensuring there are no leftover caches...") log.debug("Ensuring there are no leftover caches...")
Caches.ensure_clean() Caches.ensure_clean()
with mongo_client_from_config() as db: with open("./data/logs/results.tsv", "w") as file:
try: file.write("function\tanalyzer\ttokenizer\trun no\tmean absolute error\tmean squared error\tperfects\trecall 1\trecall 2\trecall 3\trecall 4\trecall 5\tprecision 1\tprecision 2\tprecision 3\tprecision 4\tprecision 5\n")
db.admin.command("ping")
except pymongo.errors.ServerSelectionTimeoutError:
log.fatal("MongoDB database is not available, exiting...")
exit(1)
for sample_func in [ with mongo_client_from_config() as db:
sample_reviews_polar, try:
sample_reviews_varied, db.admin.command("ping")
]: except pymongo.errors.ServerSelectionTimeoutError:
log.fatal("MongoDB database is not available, exiting...")
exit(1)
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}") for sample_func in [
slog.debug("Selected sample_func: %s", sample_func.__name__) sample_reviews_polar,
sample_reviews_varied,
for SentimentAnalyzer in [
ThreeCheat,
NLTKSentimentAnalyzer,
TensorflowPolarSentimentAnalyzer,
TensorflowCategorySentimentAnalyzer,
]: ]:
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}") slog = logging.getLogger(f"{__name__}.{sample_func.__name__}")
slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__) slog.debug("Selected sample_func: %s", sample_func.__name__)
for Tokenizer in [ for SentimentAnalyzer in [
PlainTokenizer, ThreeCheat,
LowercaseTokenizer, NLTKSentimentAnalyzer,
NLTKWordTokenizer, TensorflowPolarSentimentAnalyzer,
PottsTokenizer, TensorflowCategorySentimentAnalyzer,
PottsTokenizerWithNegation,
HuggingBertTokenizer,
]: ]:
log.debug("Running garbage collection...") slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}")
garbage_count = gc.collect() slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__)
log.debug("Collected %d pieces of garbage!", garbage_count)
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}") for Tokenizer in [
slog.debug("Selected Tokenizer: %s", Tokenizer.__name__) PlainTokenizer,
LowercaseTokenizer,
NLTKWordTokenizer,
PottsTokenizer,
PottsTokenizerWithNegation,
HuggingBertTokenizer,
]:
runs = 0 log.debug("Running garbage collection...")
successful_runs = 0 garbage_count = gc.collect()
cumulative_evaluation_results = EvaluationResults() log.debug("Collected %d pieces of garbage!", garbage_count)
while True:
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}") slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
slog.debug("Selected Tokenizer: %s", Tokenizer.__name__)
if successful_runs >= TARGET_RUNS.__wrapped__: runs = 0
slog.info("Reached target of %d runs, moving on...", TARGET_RUNS.__wrapped__) successful_runs = 0
break cumulative_evaluation_results = EvaluationResults()
if runs >= MAXIMUM_RUNS.__wrapped__: while True:
slog.fatal("Exceeded %d runs, giving up and exiting...", MAXIMUM_RUNS.__wrapped__)
break
runs += 1 slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}.{runs}")
slog.debug("Run #%d", runs)
try: if successful_runs >= TARGET_RUNS.__wrapped__:
slog.debug("Instantiating %s with %s...", SentimentAnalyzer.__name__, Tokenizer.__name__) slog.info("Reached target of %d runs, moving on...", TARGET_RUNS.__wrapped__)
sa = SentimentAnalyzer(tokenizer=Tokenizer())
except TypeError:
slog.warning("%s is not supported by %s, skipping run...", SentimentAnalyzer.__name__, Tokenizer.__name__)
break
with mongo_client_from_config() as db:
reviews = reviews_collection(db)
datasets_cm = Caches.from_database_samples(collection=reviews, sample_func=sample_func)
datasets = datasets_cm.__enter__()
try:
try:
slog.info("Training sentiment analyzer: %s", sa)
sa.train(training_dataset_func=datasets.training, validation_dataset_func=datasets.validation)
except TrainingFailedError:
slog.error("Training failed, trying again with a different dataset...")
continue
else:
slog.info("Training succeeded!")
slog.info("Evaluating sentiment analyzer: %s", sa)
evaluation_results = sa.evaluate(evaluation_dataset_func=datasets.evaluation)
slog.info("Evaluation results: %s", evaluation_results)
successful_runs += 1
cumulative_evaluation_results += evaluation_results
break break
finally:
datasets_cm.__exit__(None, None, None)
slog.info("Cumulative evaluation results: %s", cumulative_evaluation_results) if runs >= MAXIMUM_RUNS.__wrapped__:
slog.fatal("Exceeded %d runs, giving up and exiting...", MAXIMUM_RUNS.__wrapped__)
break
runs += 1
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}.{runs}")
slog.debug("Run #%d", runs)
try:
slog.debug("Instantiating %s with %s...", SentimentAnalyzer.__name__, Tokenizer.__name__)
sa = SentimentAnalyzer(tokenizer=Tokenizer())
except TypeError:
slog.warning("%s is not supported by %s, skipping run...", SentimentAnalyzer.__name__, Tokenizer.__name__)
break
with mongo_client_from_config() as db:
reviews = reviews_collection(db)
datasets_cm = Caches.from_database_samples(collection=reviews, sample_func=sample_func)
datasets = datasets_cm.__enter__()
try:
try:
slog.info("Training sentiment analyzer: %s", sa)
sa.train(training_dataset_func=datasets.training, validation_dataset_func=datasets.validation)
except TrainingFailedError:
slog.error("Training failed, trying again with a different dataset...")
file.write(f"{sample_func.__name__}\t{SentimentAnalyzer.__name__}\t{Tokenizer.__name__}\t{runs}\t\t\t\t\t\t\t\t\t\t\t\t\t\n")
file.flush()
continue
else:
slog.info("Training succeeded!")
slog.info("Evaluating sentiment analyzer: %s", sa)
evaluation_results = sa.evaluate(evaluation_dataset_func=datasets.evaluation)
slog.info("Evaluation results: %s", evaluation_results)
file.write(f"{sample_func.__name__}\t{SentimentAnalyzer.__name__}\t{Tokenizer.__name__}\t{runs}\t{evaluation_results.mean_absolute_error()}\t{evaluation_results.mean_squared_error()}\t{evaluation_results.perfect_count()}\t{evaluation_results.recall(1.0)}\t{evaluation_results.recall(2.0)}\t{evaluation_results.recall(3.0)}\t{evaluation_results.recall(4.0)}\t{evaluation_results.recall(5.0)}\t{evaluation_results.precision(1.0)}\t{evaluation_results.precision(2.0)}\t{evaluation_results.precision(3.0)}\t{evaluation_results.precision(4.0)}\t{evaluation_results.precision(5.0)}\n")
file.flush()
successful_runs += 1
cumulative_evaluation_results += evaluation_results
break
finally:
datasets_cm.__exit__(None, None, None)
slog.info("Cumulative evaluation results: %s", cumulative_evaluation_results)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -147,6 +147,8 @@ class EvaluationResults:
""" """
try: try:
return self.confusion_matrix[rating][rating] / self.recall_count(rating) return self.confusion_matrix[rating][rating] / self.recall_count(rating)
except KeyError:
return float("NaN")
except ZeroDivisionError: except ZeroDivisionError:
return float("inf") return float("inf")
@ -156,6 +158,8 @@ class EvaluationResults:
""" """
try: try:
return self.confusion_matrix[rating][rating] / self.precision_count(rating) return self.confusion_matrix[rating][rating] / self.precision_count(rating)
except KeyError:
return float("NaN")
except ZeroDivisionError: except ZeroDivisionError:
return float("inf") return float("inf")