mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-21 23:44:19 +00:00
Various upgrades
This commit is contained in:
parent
e71baeb41c
commit
b777236735
4 changed files with 189 additions and 50 deletions
|
@ -5,10 +5,10 @@ from .log import install_general_log_handlers
|
||||||
|
|
||||||
install_general_log_handlers()
|
install_general_log_handlers()
|
||||||
|
|
||||||
from .config import config
|
from .config import config, TARGET_RUNS, MAXIMUM_RUNS
|
||||||
from .database import mongo_client_from_config, reviews_collection, sample_reviews_polar, sample_reviews_varied
|
from .database import mongo_client_from_config, reviews_collection, sample_reviews_polar, sample_reviews_varied
|
||||||
from .analysis import NLTKSentimentAnalyzer, TensorflowCategorySentimentAnalyzer, TensorflowPolarSentimentAnalyzer, ThreeCheat
|
from .analysis import NLTKSentimentAnalyzer, TensorflowCategorySentimentAnalyzer, TensorflowPolarSentimentAnalyzer, ThreeCheat
|
||||||
from .analysis.base import TrainingFailedError
|
from .analysis.base import TrainingFailedError, EvaluationResults
|
||||||
from .tokenizer import PlainTokenizer, LowercaseTokenizer, NLTKWordTokenizer, PottsTokenizer, PottsTokenizerWithNegation, HuggingBertTokenizer
|
from .tokenizer import PlainTokenizer, LowercaseTokenizer, NLTKWordTokenizer, PottsTokenizer, PottsTokenizerWithNegation, HuggingBertTokenizer
|
||||||
from .gathering import Caches
|
from .gathering import Caches
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@ def main():
|
||||||
slog.debug("Selected sample_func: %s", sample_func.__name__)
|
slog.debug("Selected sample_func: %s", sample_func.__name__)
|
||||||
|
|
||||||
for SentimentAnalyzer in [
|
for SentimentAnalyzer in [
|
||||||
ThreeCheat,
|
# ThreeCheat,
|
||||||
NLTKSentimentAnalyzer,
|
NLTKSentimentAnalyzer,
|
||||||
TensorflowPolarSentimentAnalyzer,
|
TensorflowPolarSentimentAnalyzer,
|
||||||
TensorflowCategorySentimentAnalyzer,
|
TensorflowCategorySentimentAnalyzer,
|
||||||
|
@ -67,17 +67,25 @@ def main():
|
||||||
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
|
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
|
||||||
slog.debug("Selected Tokenizer: %s", Tokenizer.__name__)
|
slog.debug("Selected Tokenizer: %s", Tokenizer.__name__)
|
||||||
|
|
||||||
run_counter = 0
|
runs = 0
|
||||||
|
successful_runs = 0
|
||||||
|
cumulative_evaluation_results = EvaluationResults()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}.{run_counter}")
|
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
|
||||||
run_counter += 1
|
|
||||||
slog.debug("Run #%d", run_counter)
|
|
||||||
|
|
||||||
if run_counter >= 100:
|
if successful_runs >= TARGET_RUNS.__wrapped__:
|
||||||
slog.fatal("Exceeded 100 runs, giving up and exiting...")
|
slog.info("Reached target of %d runs, moving on...", TARGET_RUNS.__wrapped__)
|
||||||
exit(2)
|
break
|
||||||
|
|
||||||
|
if runs >= MAXIMUM_RUNS.__wrapped__:
|
||||||
|
slog.fatal("Exceeded %d runs, giving up and exiting...", MAXIMUM_RUNS.__wrapped__)
|
||||||
|
break
|
||||||
|
|
||||||
|
runs += 1
|
||||||
|
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}.{runs}")
|
||||||
|
slog.debug("Run #%d", runs)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
slog.debug("Instantiating %s with %s...", SentimentAnalyzer.__name__, Tokenizer.__name__)
|
slog.debug("Instantiating %s with %s...", SentimentAnalyzer.__name__, Tokenizer.__name__)
|
||||||
|
@ -97,12 +105,15 @@ def main():
|
||||||
|
|
||||||
else:
|
else:
|
||||||
slog.info("Training succeeded!")
|
slog.info("Training succeeded!")
|
||||||
|
|
||||||
slog.info("Evaluating sentiment analyzer: %s", sa)
|
slog.info("Evaluating sentiment analyzer: %s", sa)
|
||||||
evaluation_results = sa.evaluate(evaluation_dataset_func=datasets.evaluation)
|
evaluation_results = sa.evaluate(evaluation_dataset_func=datasets.evaluation)
|
||||||
slog.info("Evaluation results: %s", evaluation_results)
|
slog.info("Evaluation results: %s", evaluation_results)
|
||||||
|
successful_runs += 1
|
||||||
|
cumulative_evaluation_results += evaluation_results
|
||||||
break
|
break
|
||||||
|
|
||||||
|
slog.info("Cumulative evaluation results: %s", cumulative_evaluation_results)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -2,7 +2,7 @@ from __future__ import annotations
|
||||||
|
|
||||||
import abc
|
import abc
|
||||||
import logging
|
import logging
|
||||||
import dataclasses
|
import collections
|
||||||
|
|
||||||
from ..database import CachedDatasetFunc
|
from ..database import CachedDatasetFunc
|
||||||
from ..tokenizer import BaseTokenizer
|
from ..tokenizer import BaseTokenizer
|
||||||
|
@ -39,54 +39,148 @@ class BaseSentimentAnalyzer(metaclass=abc.ABCMeta):
|
||||||
"""
|
"""
|
||||||
Perform a model evaluation by calling repeatedly `.use` on every text of the test dataset and by comparing its resulting category with the expected category.
|
Perform a model evaluation by calling repeatedly `.use` on every text of the test dataset and by comparing its resulting category with the expected category.
|
||||||
"""
|
"""
|
||||||
|
er = EvaluationResults()
|
||||||
# TODO: Add precision and recall measures
|
|
||||||
|
|
||||||
evaluated: int = 0
|
|
||||||
|
|
||||||
perfect: int = 0
|
|
||||||
|
|
||||||
squared_error: float = 0.0
|
|
||||||
|
|
||||||
for review in evaluation_dataset_func():
|
for review in evaluation_dataset_func():
|
||||||
resulting_category = self.use(review.text)
|
er.add(expected=review.rating, predicted=self.use(review.text))
|
||||||
log.debug("Evaluation step: %.1d* for %s", resulting_category, review)
|
return er
|
||||||
evaluated += 1
|
|
||||||
try:
|
|
||||||
perfect += 1 if resulting_category == review.rating else 0
|
|
||||||
squared_error += (resulting_category - review.rating) ** 2
|
|
||||||
except ValueError:
|
|
||||||
log.warning("Model execution on %s resulted in a NaN value: %s", review, resulting_category)
|
|
||||||
|
|
||||||
return EvaluationResults(perfect=perfect, evaluated=evaluated, mse=squared_error / evaluated)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
|
||||||
class EvaluationResults:
|
class EvaluationResults:
|
||||||
"""
|
"""
|
||||||
Container for the results of a dataset evaluation.
|
Container for the results of a dataset evaluation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
evaluated: int
|
def __init__(self):
|
||||||
|
self.confusion_matrix: dict[float, dict[float, int]] = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
|
||||||
"""
|
"""
|
||||||
The number of reviews that were evaluated.
|
Confusion matrix of the evaluation.
|
||||||
|
|
||||||
|
First key is the expected rating, second key is the output label.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
perfect: int
|
self.absolute_error_total: float = 0.0
|
||||||
"""
|
"""
|
||||||
The number of reviews for which the model returned the correct rating.
|
Sum of the absolute errors committed in the evaluation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
mse: float
|
self.squared_error_total: float = 0.0
|
||||||
"""
|
"""
|
||||||
Mean squared error
|
Sum of the squared errors committed in the evaluation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self) -> str:
|
||||||
return f"<EvaluationResults: {self!s}>"
|
return f"<EvaluationResults with {self.evaluated_count()} evaluated and {len(self.keys())} categories>"
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self) -> str:
|
||||||
return f"Evaluation results:\t{self.evaluated}\tevaluated\t{self.perfect}\tperfect\t{self.perfect / self.evaluated:.2%}\taccuracy\t{self.mse / self.evaluated:.2}\tmean squared error"
|
text = [f"Evaluation results: {self.evaluated_count()} evaluated, {self.mean_absolute_error()} mean absolute error, {self.mean_squared_error()} mean squared error, "]
|
||||||
|
for key in self.keys():
|
||||||
|
text.append(f"{self.recall(key)} recall of {key}, ")
|
||||||
|
text.append(f"{self.precision(key)} precision of {key}, ")
|
||||||
|
text.append(f"{self.perfect_count()} perfect matches.")
|
||||||
|
return "".join(text)
|
||||||
|
|
||||||
|
def __add__(self, other: EvaluationResults) -> EvaluationResults:
|
||||||
|
new = self.__class__()
|
||||||
|
for expected, value in self.confusion_matrix.items():
|
||||||
|
for predicted, amount in value.items():
|
||||||
|
new.confusion_matrix[expected][predicted] += amount
|
||||||
|
for expected, value in other.confusion_matrix.items():
|
||||||
|
for predicted, amount in value.items():
|
||||||
|
new.confusion_matrix[expected][predicted] += amount
|
||||||
|
return new
|
||||||
|
|
||||||
|
def keys(self) -> set[float]:
|
||||||
|
"""
|
||||||
|
Return all processed categories.
|
||||||
|
"""
|
||||||
|
keys: set[float] = set()
|
||||||
|
|
||||||
|
for expected, value in self.confusion_matrix.items():
|
||||||
|
keys.add(expected)
|
||||||
|
for predicted, _ in value.items():
|
||||||
|
keys.add(predicted)
|
||||||
|
|
||||||
|
return keys
|
||||||
|
|
||||||
|
def evaluated_count(self) -> int:
|
||||||
|
"""
|
||||||
|
Return the total number of evaluated reviews.
|
||||||
|
"""
|
||||||
|
total: int = 0
|
||||||
|
for row in self.confusion_matrix.values():
|
||||||
|
for el in row.values():
|
||||||
|
total += el
|
||||||
|
return total
|
||||||
|
|
||||||
|
def perfect_count(self) -> int:
|
||||||
|
"""
|
||||||
|
Return the total number of perfect reviews.
|
||||||
|
"""
|
||||||
|
total: int = 0
|
||||||
|
for key in self.keys():
|
||||||
|
total += self.confusion_matrix[key][key]
|
||||||
|
return total
|
||||||
|
|
||||||
|
def recall_count(self, rating: float) -> int:
|
||||||
|
"""
|
||||||
|
Return the number of reviews processed with the given rating.
|
||||||
|
"""
|
||||||
|
total: int = 0
|
||||||
|
for el in self.confusion_matrix[rating].values():
|
||||||
|
total += el
|
||||||
|
return total
|
||||||
|
|
||||||
|
def precision_count(self, rating: float) -> int:
|
||||||
|
"""
|
||||||
|
Return the number of reviews for which the model returned the given rating.
|
||||||
|
"""
|
||||||
|
total: int = 0
|
||||||
|
for col in self.confusion_matrix.values():
|
||||||
|
total += col[rating]
|
||||||
|
return total
|
||||||
|
|
||||||
|
def recall(self, rating: float) -> float:
|
||||||
|
"""
|
||||||
|
Return the recall for a given rating.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return self.confusion_matrix[rating][rating] / self.recall_count(rating)
|
||||||
|
except ZeroDivisionError:
|
||||||
|
return float("inf")
|
||||||
|
|
||||||
|
def precision(self, rating: float) -> float:
|
||||||
|
"""
|
||||||
|
Return the precision for a given rating.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return self.confusion_matrix[rating][rating] / self.precision_count(rating)
|
||||||
|
except ZeroDivisionError:
|
||||||
|
return float("inf")
|
||||||
|
|
||||||
|
def mean_absolute_error(self) -> float:
|
||||||
|
"""
|
||||||
|
Return the mean absolute error.
|
||||||
|
"""
|
||||||
|
return self.absolute_error_total / self.evaluated_count()
|
||||||
|
|
||||||
|
def mean_squared_error(self) -> float:
|
||||||
|
"""
|
||||||
|
Return the mean squared error.
|
||||||
|
"""
|
||||||
|
return self.squared_error_total / self.evaluated_count()
|
||||||
|
|
||||||
|
def add(self, expected: float, predicted: float) -> None:
|
||||||
|
"""
|
||||||
|
Count a new prediction.
|
||||||
|
"""
|
||||||
|
if expected == predicted:
|
||||||
|
log.log(11, "Expected %.1d*, predicted %.1d*", expected, predicted) # Success
|
||||||
|
else:
|
||||||
|
log.log(12, "Expected %.1d*, predicted %.1d*", expected, predicted) # Failure
|
||||||
|
|
||||||
|
self.confusion_matrix[expected][predicted] += 1
|
||||||
|
self.absolute_error_total += abs(expected - predicted)
|
||||||
|
self.squared_error_total += (expected - predicted) ** 2
|
||||||
|
|
||||||
|
|
||||||
class AlreadyTrainedError(Exception):
|
class AlreadyTrainedError(Exception):
|
||||||
|
|
|
@ -125,6 +125,35 @@ def TENSORFLOW_EPOCHS(val: str | None) -> int:
|
||||||
raise cfig.InvalidValueError("Not an int.")
|
raise cfig.InvalidValueError("Not an int.")
|
||||||
|
|
||||||
|
|
||||||
|
@config.optional()
|
||||||
|
def TARGET_RUNS(val: str | None) -> int:
|
||||||
|
"""
|
||||||
|
The amount of successful runs to perform on a sample-model-tokenizer combination.
|
||||||
|
Defaults to `1`.
|
||||||
|
"""
|
||||||
|
if val is None:
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
return int(val)
|
||||||
|
except ValueError:
|
||||||
|
raise cfig.InvalidValueError("Not an int.")
|
||||||
|
|
||||||
|
|
||||||
|
@config.optional()
|
||||||
|
def MAXIMUM_RUNS(val: str | None) -> int:
|
||||||
|
"""
|
||||||
|
The maximum amount of runs to perform on a sample-model-tokenizer combination before skipping it.
|
||||||
|
Defaults to `25`.
|
||||||
|
"""
|
||||||
|
if val is None:
|
||||||
|
return 25
|
||||||
|
try:
|
||||||
|
return int(val)
|
||||||
|
except ValueError:
|
||||||
|
raise cfig.InvalidValueError("Not an int.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = (
|
__all__ = (
|
||||||
"config",
|
"config",
|
||||||
"MONGO_HOST",
|
"MONGO_HOST",
|
||||||
|
|
|
@ -3,6 +3,9 @@ import logging
|
||||||
import coloredlogs
|
import coloredlogs
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
|
logging.addLevelName(11, "SUCCESS")
|
||||||
|
logging.addLevelName(12, "FAILURE")
|
||||||
|
|
||||||
this_log = logging.getLogger(__name__)
|
this_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@ -34,9 +37,11 @@ def install_general_log_handlers():
|
||||||
level_styles=dict(
|
level_styles=dict(
|
||||||
debug=dict(color="white"),
|
debug=dict(color="white"),
|
||||||
info=dict(color="cyan"),
|
info=dict(color="cyan"),
|
||||||
warning=dict(color="yellow"),
|
warning=dict(color="yellow", bold=True),
|
||||||
error=dict(color="red"),
|
error=dict(color="red", bold=True),
|
||||||
critical=dict(color="black", background="red", bold=True),
|
critical=dict(color="black", background="red", bold=True),
|
||||||
|
success=dict(color="green"),
|
||||||
|
failure=dict(color="yellow"),
|
||||||
),
|
),
|
||||||
field_styles=dict(
|
field_styles=dict(
|
||||||
asctime=dict(color='magenta'),
|
asctime=dict(color='magenta'),
|
||||||
|
|
Loading…
Reference in a new issue