1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-22 16:04:18 +00:00
bda-6-steffo/unimore_bda_6/analysis/base.py

50 lines
1.4 KiB
Python
Raw Normal View History

2023-02-02 01:56:37 +00:00
import abc
2023-02-03 22:27:44 +00:00
import logging
2023-02-02 16:24:11 +00:00
2023-02-03 22:27:44 +00:00
from ..database import DataSet, Text, Category
2023-02-02 16:24:11 +00:00
2023-02-03 22:27:44 +00:00
log = logging.getLogger(__name__)
2023-02-02 01:56:37 +00:00
2023-02-03 22:27:44 +00:00
class BaseSentimentAnalyzer(metaclass=abc.ABCMeta):
2023-02-02 01:56:37 +00:00
"""
Abstract base class for sentiment analyzers implemented in this project.
"""
@abc.abstractmethod
2023-02-03 22:27:44 +00:00
def train(self, training_set: DataSet) -> None:
2023-02-02 01:56:37 +00:00
"""
2023-02-03 22:27:44 +00:00
Train the analyzer with the given training dataset.
2023-02-02 01:56:37 +00:00
"""
raise NotImplementedError()
2023-02-03 22:27:44 +00:00
def evaluate(self, test_set: DataSet) -> tuple[int, int]:
2023-02-02 01:56:37 +00:00
"""
2023-02-03 22:27:44 +00:00
Perform a model evaluation by calling repeatedly `.use` on every text of the test dataset and by comparing its resulting category with the expected category.
2023-02-02 01:56:37 +00:00
2023-02-03 22:27:44 +00:00
Returns a tuple with the number of correct results and the number of evaluated results.
"""
evaluated: int = 0
correct: int = 0
2023-02-02 01:56:37 +00:00
2023-02-03 22:27:44 +00:00
for text, expected_category in test_set:
resulting_category = self.use(text)
evaluated += 1
correct += 1 if resulting_category == expected_category else 0
if not evaluated % 100:
log.debug("%d evaluated, %d correct, %0.2d %% accuracy", evaluated, correct, correct / evaluated * 100)
2023-02-02 01:56:37 +00:00
2023-02-03 22:27:44 +00:00
return correct, evaluated
2023-02-03 16:50:40 +00:00
2023-02-03 22:27:44 +00:00
@abc.abstractmethod
def use(self, text: Text) -> Category:
"""
Run the model on the given input.
"""
raise NotImplementedError()
2023-02-02 01:56:37 +00:00
__all__ = (
2023-02-03 22:27:44 +00:00
"BaseSentimentAnalyzer",
2023-02-02 01:56:37 +00:00
)