Use composition instead of inheritance

2024-11-24 08:44:19 +00:00 · 2023-02-02 16:03:07 +01:00 · 2023-02-02 16:03:07 +01:00 · 4c3f892038
commit 4c3f892038
parent 3ae43b2714
3 changed files with 55 additions and 42 deletions
--- a/unimore_bda_6/main.py
+++ b/unimore_bda_6/main.py
@ -2,7 +2,7 @@ import logging
 from .config import config, DATA_SET_SIZE
 from .database import mongo_reviews_collection_from_config, get_reviews_dataset_polar, get_reviews_dataset_uniform
-from .analysis.vanilla import VanillaReviewSA, VanillaUniformReviewSA
+from .analysis.vanilla import VanillaReviewSA, polar_categorizer, stars_categorizer
 from .analysis.potts import PottsReviewSA
 from .log import install_log_handler
@ -16,7 +16,7 @@ def main():
        reviews_uniform_training = get_reviews_dataset_uniform(collection=reviews, amount=DATA_SET_SIZE.__wrapped__)
        reviews_uniform_evaluation = get_reviews_dataset_uniform(collection=reviews, amount=DATA_SET_SIZE.__wrapped__)
-    vanilla_polar = VanillaReviewSA()
+    vanilla_polar = VanillaReviewSA(categorizer=polar_categorizer)
    vanilla_polar.train(reviews_polar_training)
    log.info("Vanilla polar evaluation results: %s", vanilla_polar.evaluate(reviews_polar_evaluation))
@ -24,7 +24,7 @@ def main():
    potts_polar.train(reviews_polar_training)
    log.info("Potts polar evaluation results: %s", potts_polar.evaluate(reviews_polar_evaluation))
-    vanilla_uniform = VanillaUniformReviewSA()
+    vanilla_uniform = VanillaReviewSA(categorizer=stars_categorizer)
    vanilla_uniform.train(reviews_uniform_training)
    log.info("Vanilla uniform evaluation results: %s", vanilla_polar.evaluate(reviews_polar_evaluation))
--- a/unimore_bda_6/analysis/potts.py
+++ b/unimore_bda_6/analysis/potts.py
@ -1,5 +1,5 @@
 from ..vendor.potts import Tokenizer
-from .vanilla import VanillaSA, VanillaReviewSA, VanillaUniformReviewSA
+from .vanilla import VanillaSA, VanillaReviewSA
 class PottsSA(VanillaSA):
@ -24,12 +24,6 @@ class PottsReviewSA(VanillaReviewSA, PottsSA):
    """
 class PottsUniformReviewSA(VanillaUniformReviewSA, PottsSA):
    """
    A `PottsSA` with 5 buckets instead of 2.
    """
 __all__ = (
    "PottsSA",
    "PottsReviewSA",
--- a/unimore_bda_6/analysis/vanilla.py
+++ b/unimore_bda_6/analysis/vanilla.py
@ -76,25 +76,15 @@ class VanillaReviewSA(VanillaSA):
    A `VanillaSA` to be used with `Review`s.
    """
-    @staticmethod
+    def __init__(self, categorizer: t.Callable[[Review], str]) -> None:
-    def _rating_to_label(rating: float) -> str:
+        super().__init__()
-        """
+        self.categorizer: t.Callable[[Review], str] = categorizer
        Return the label corresponding to the given rating.
        Possible categories are:
        * negative (0.0 <= rating < 3.0)
        * positive (3.0 < rating <= 5.0)
        """
        if rating < 3.0:
            return "negative"
        else:
            return "positive"
    def _review_to_data_set(self, review: Review) -> tuple[list[str], str]:
        """
        Convert a review to a NLTK-compatible dataset.
        """
-        return self._tokenize_text(text=review["reviewText"]), self._rating_to_label(rating=review["overall"])
+        return self._tokenize_text(text=review["reviewText"]), self.categorizer(rating=review["overall"])
    def train(self, reviews: t.Iterable[Review]) -> None:
        data_set = list(map(self._review_to_data_set, reviews))
@ -108,27 +98,56 @@ class VanillaReviewSA(VanillaSA):
        return self._use_with_tokens(self._tokenize_text(text))
-class VanillaUniformReviewSA(VanillaReviewSA):
+def polar_categorizer(rating: float) -> str:
-    @staticmethod
+    """
-    def _rating_to_label(rating: float) -> str:
+    Return the polar label corresponding to the given rating.
-        match rating:
+
-            case 0.0:
+    Possible categories are:
-                return "abysmal"
+    
-            case 1.0:
+    * negative (1.0, 2.0)
-                return "terrible"
+    * positive (3.0, 4.0, 5.0)
-            case 2.0:
+    * unknown (everything else)
-                return "negative"
+    """
-            case 3.0:
+    match rating:
-                return "mixed"
+        case 1.0 | 2.0:
-            case 4.0:
+            return "negative"
-                return "positive"
+        case 3.0 | 4.0 | 5.0:
-            case 5.0:
+            return "positive"
-                return "great"
+        case _:
-            case _:
+            return "unknown"
-                return "unknown"
+
 def stars_categorizer(rating: float) -> str:
    """
    Return the "stars" label corresponding to the given rating.
    Possible categories are:
    * terrible (1.0)
    * negative (2.0)
    * mixed (3.0)
    * positive (4.0)
    * great (5.0)
    * unknown (everything else)
    """
    match rating:
        case 1.0:
            return "terrible"
        case 2.0:
            return "negative"
        case 3.0:
            return "mixed"
        case 4.0:
            return "positive"
        case 5.0:
            return "great"
        case _:
            return "unknown"
 __all__ = (
    "VanillaSA",
    "VanillaReviewSA",
    "polar_categorizer",
    "stars_categorizer",
 )