2023-02-02 01:56:37 +00:00
|
|
|
import abc
|
2023-02-03 22:27:44 +00:00
|
|
|
import logging
|
2023-02-04 05:14:24 +00:00
|
|
|
import dataclasses
|
2023-02-02 16:24:11 +00:00
|
|
|
|
2023-02-07 09:22:09 +00:00
|
|
|
from ..database import Text, Category, DatasetFunc
|
2023-02-02 16:24:11 +00:00
|
|
|
|
2023-02-03 22:27:44 +00:00
|
|
|
log = logging.getLogger(__name__)
|
2023-02-02 01:56:37 +00:00
|
|
|
|
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
@dataclasses.dataclass
|
|
|
|
class EvaluationResults:
|
|
|
|
correct: int
|
|
|
|
evaluated: int
|
2023-02-08 09:54:14 +00:00
|
|
|
score: float
|
2023-02-04 05:14:24 +00:00
|
|
|
|
|
|
|
def __repr__(self):
|
2023-02-08 09:54:14 +00:00
|
|
|
return f"<EvaluationResults: score of {self.score} out of {self.evaluated} evaluated tuples>"
|
2023-02-04 05:14:24 +00:00
|
|
|
|
|
|
|
def __str__(self):
|
2023-02-08 09:54:14 +00:00
|
|
|
return f"{self.evaluated} evaluated, {self.correct} correct, {self.correct / self.evaluated * 100:.2} % accuracy, {self.score:.2} score, {self.score / self.evaluated * 100:.2} scoreaccuracy"
|
2023-02-04 05:14:24 +00:00
|
|
|
|
|
|
|
|
2023-02-03 22:27:44 +00:00
|
|
|
class BaseSentimentAnalyzer(metaclass=abc.ABCMeta):
|
2023-02-02 01:56:37 +00:00
|
|
|
"""
|
|
|
|
Abstract base class for sentiment analyzers implemented in this project.
|
|
|
|
"""
|
|
|
|
|
|
|
|
@abc.abstractmethod
|
2023-02-04 05:14:24 +00:00
|
|
|
def train(self, dataset_func: DatasetFunc) -> None:
|
2023-02-02 01:56:37 +00:00
|
|
|
"""
|
2023-02-03 22:27:44 +00:00
|
|
|
Train the analyzer with the given training dataset.
|
2023-02-02 01:56:37 +00:00
|
|
|
"""
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
def evaluate(self, dataset_func: DatasetFunc) -> EvaluationResults:
|
2023-02-02 01:56:37 +00:00
|
|
|
"""
|
2023-02-03 22:27:44 +00:00
|
|
|
Perform a model evaluation by calling repeatedly `.use` on every text of the test dataset and by comparing its resulting category with the expected category.
|
2023-02-02 01:56:37 +00:00
|
|
|
|
2023-02-03 22:27:44 +00:00
|
|
|
Returns a tuple with the number of correct results and the number of evaluated results.
|
|
|
|
"""
|
2023-02-04 05:14:24 +00:00
|
|
|
|
2023-02-03 22:27:44 +00:00
|
|
|
evaluated: int = 0
|
2023-02-04 05:14:24 +00:00
|
|
|
correct: int = 0
|
2023-02-08 09:54:14 +00:00
|
|
|
score: float = 0.0
|
2023-02-02 01:56:37 +00:00
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
for review in dataset_func():
|
|
|
|
resulting_category = self.use(review.text)
|
2023-02-03 22:27:44 +00:00
|
|
|
evaluated += 1
|
2023-02-04 05:14:24 +00:00
|
|
|
correct += 1 if resulting_category == review.category else 0
|
2023-02-08 09:54:14 +00:00
|
|
|
score += 1 - (abs(resulting_category - review.category) / 4)
|
2023-02-03 22:27:44 +00:00
|
|
|
if not evaluated % 100:
|
2023-02-08 09:54:14 +00:00
|
|
|
temp_results = EvaluationResults(correct=correct, evaluated=evaluated, score=score)
|
|
|
|
log.debug(f"{temp_results!s}")
|
2023-02-02 01:56:37 +00:00
|
|
|
|
2023-02-08 09:54:14 +00:00
|
|
|
return EvaluationResults(correct=correct, evaluated=evaluated, score=score)
|
2023-02-03 16:50:40 +00:00
|
|
|
|
2023-02-03 22:27:44 +00:00
|
|
|
@abc.abstractmethod
|
|
|
|
def use(self, text: Text) -> Category:
|
|
|
|
"""
|
|
|
|
Run the model on the given input.
|
|
|
|
"""
|
|
|
|
raise NotImplementedError()
|
2023-02-02 01:56:37 +00:00
|
|
|
|
|
|
|
|
2023-02-04 00:36:42 +00:00
|
|
|
class AlreadyTrainedError(Exception):
|
|
|
|
"""
|
|
|
|
This model has already been trained and cannot be trained again.
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
class NotTrainedError(Exception):
|
|
|
|
"""
|
|
|
|
This model has not been trained yet.
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
2023-02-08 09:54:14 +00:00
|
|
|
class TrainingFailedError(Exception):
|
|
|
|
"""
|
|
|
|
The model wasn't able to complete the training and should not be used anymore.
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
2023-02-02 01:56:37 +00:00
|
|
|
__all__ = (
|
2023-02-03 22:27:44 +00:00
|
|
|
"BaseSentimentAnalyzer",
|
2023-02-04 00:36:42 +00:00
|
|
|
"AlreadyTrainedError",
|
|
|
|
"NotTrainedError",
|
2023-02-08 09:54:14 +00:00
|
|
|
"TrainingFailedError",
|
2023-02-02 01:56:37 +00:00
|
|
|
)
|