diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
index bc46082..78a7b4c 100644
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -5,6 +5,8 @@
       <option name="ignoredErrors">
         <list>
           <option value="E124" />
+          <option value="E501" />
+          <option value="E221" />
         </list>
       </option>
     </inspection_tool>
diff --git a/.idea/misc.xml b/.idea/misc.xml
index c31251c..6b0faad 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -4,6 +4,9 @@
     <option name="show" value="ASK" />
     <option name="description" value="" />
   </component>
+  <component name="PWA">
+    <option name="wasEnabledAtLeastOnce" value="true" />
+  </component>
   <component name="ProjectRootManager" version="2" languageLevel="JDK_19">
     <output url="file://$PROJECT_DIR$/out" />
   </component>
diff --git a/.idea/runConfigurations/unimore_bda_6.xml b/.idea/runConfigurations/unimore_bda_6.xml
index 22a226f..88d6f6a 100644
--- a/.idea/runConfigurations/unimore_bda_6.xml
+++ b/.idea/runConfigurations/unimore_bda_6.xml
@@ -4,8 +4,10 @@
     <option name="INTERPRETER_OPTIONS" value="" />
     <option name="PARENT_ENVS" value="true" />
     <envs>
+      <env name="DATA_SET_SIZE" value="10000" />
       <env name="NLTK_DATA" value="./data/nltk" />
       <env name="PYTHONUNBUFFERED" value="1" />
+      <env name="WORKING_SET_SIZE" value="1000000" />
     </envs>
     <option name="SDK_HOME" value="$PROJECT_DIR$/.venv/bin/python" />
     <option name="SDK_NAME" value="Poetry (unimore-bda-6)" />
diff --git a/.vscode/launch.json b/.vscode/launch.json
index afdced9..f21c516 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -9,7 +9,7 @@
             "type": "python",
             "request": "launch",
             "module": "unimore_bda_6",
-            "justMyCode": true,
+            "justMyCode": false,
             "env": {
                 "NLTK_DATA": "./data/nltk",
                 "DATA_SET_SIZE": "250",
@@ -17,4 +17,4 @@
             "cwd": "${workspaceFolder}",
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/unimore_bda_6/__main__.py b/unimore_bda_6/__main__.py
index eab91a5..009b505 100644
--- a/unimore_bda_6/__main__.py
+++ b/unimore_bda_6/__main__.py
@@ -64,35 +64,28 @@ def varied_categorizer(rating: float) -> str:
 
 
 def main():
-    with mongo_reviews_collection_from_config() as reviews:
-        reviews_polar_training = dataset_polar(collection=reviews, amount=DATA_SET_SIZE.__wrapped__)
-        reviews_polar_evaluation = dataset_polar(collection=reviews, amount=DATA_SET_SIZE.__wrapped__)
+    for dataset_func, categorizer in [
+        (dataset_polar, polar_categorizer),
+        (dataset_varied, varied_categorizer),
+    ]:
+        for tokenizer in all_tokenizers:
+            with mongo_reviews_collection_from_config() as reviews:
+                reviews_training = dataset_func(collection=reviews, amount=DATA_SET_SIZE.__wrapped__)
+                reviews_evaluation = dataset_func(collection=reviews, amount=DATA_SET_SIZE.__wrapped__)
 
-    for tokenizer in all_tokenizers:
-        log.info("Training polar model with %s tokenizer", tokenizer)
-        model = VanillaSA(extractor=review_vanilla_extractor, tokenizer=tokenizer, categorizer=polar_categorizer)
-        model.train(reviews_polar_training)
-        log.info("Evaluating polar model with %s tokenizer", tokenizer)
-        evaluation = model.evaluate(reviews_polar_evaluation)
-        log.info("Polar model with %s results: %s", tokenizer, evaluation)
+                model = VanillaSA(extractor=review_vanilla_extractor, tokenizer=tokenizer, categorizer=categorizer)
+                log.info("Training model %s", model)
+                model.train(reviews_training)
+                log.info("Evaluating model %s", model)
+                evaluation = model.evaluate(reviews_evaluation)
+                log.info("Results of model %s: %s", tokenizer, evaluation)
 
-    del reviews_polar_training
-    del reviews_polar_evaluation
-
-    with mongo_reviews_collection_from_config() as reviews:
-        reviews_varied_training = dataset_varied(collection=reviews, amount=DATA_SET_SIZE.__wrapped__)
-        reviews_varied_evaluation = dataset_varied(collection=reviews, amount=DATA_SET_SIZE.__wrapped__)
-
-    for tokenizer in all_tokenizers:
-        log.info("Training varied model with %s tokenizer", tokenizer)
-        model = VanillaSA(extractor=review_vanilla_extractor, tokenizer=tokenizer, categorizer=varied_categorizer)
-        model.train(reviews_varied_training)
-        log.info("Evaluating varied model with %s tokenizer", tokenizer)
-        evaluation =  model.evaluate(reviews_varied_evaluation)
-        log.info("Varied model with %s results: %s", tokenizer, evaluation)
-
-    del reviews_varied_training
-    del reviews_varied_evaluation
+            try:
+                print("Model %s" % model)
+                while True:
+                    print(model.use(input()))
+            except KeyboardInterrupt:
+                pass
 
 
 if __name__ == "__main__":
diff --git a/unimore_bda_6/analysis/base.py b/unimore_bda_6/analysis/base.py
index 7c92018..c5579ec 100644
--- a/unimore_bda_6/analysis/base.py
+++ b/unimore_bda_6/analysis/base.py
@@ -12,7 +12,7 @@ class BaseSA(metaclass=abc.ABCMeta):
     """
 
     @abc.abstractmethod
-    def train(self, training_set: list[tuple[Input, Category]]) -> None:
+    def train(self, training_set: t.Iterable[tuple[Input, Category]]) -> None:
         """
         Train the analyzer with the given training set.
         """
diff --git a/unimore_bda_6/analysis/vanilla.py b/unimore_bda_6/analysis/vanilla.py
index fde5d55..6f233d2 100644
--- a/unimore_bda_6/analysis/vanilla.py
+++ b/unimore_bda_6/analysis/vanilla.py
@@ -4,11 +4,14 @@ import nltk.sentiment
 import nltk.sentiment.util
 import logging
 import typing as t
+import itertools
 
 from .base import Input, Category, BaseSA, AlreadyTrainedError, NotTrainedError
+from ..log import count_passage
 
 TokenBag = list[str]
 IntermediateValue = t.TypeVar("IntermediateValue")
+Features = dict[str, int]
 
 
 log = logging.getLogger(__name__)
@@ -19,51 +22,72 @@ class VanillaSA(BaseSA):
     A sentiment analyzer resembling the one implemented in structure the one implemented in the classroom, using the basic sentiment analyzer of NLTK.
     """
 
-    def __init__(self, *, extractor: t.Callable[[Input], tuple[str, Category]], tokenizer: t.Callable[[str], TokenBag], categorizer: t.Callable[[Input], Category]) -> None:
+    def __init__(self, *, extractor: t.Callable[[Input], tuple[str, IntermediateValue]], tokenizer: t.Callable[[str], TokenBag], categorizer: t.Callable[[IntermediateValue], Category]) -> None:
         super().__init__()
         self.model: nltk.sentiment.SentimentAnalyzer = nltk.sentiment.SentimentAnalyzer()
         self.trained: bool = False
-
         self.extractor: t.Callable[[Input], tuple[str, IntermediateValue]] = extractor
         self.tokenizer: t.Callable[[str], TokenBag] = tokenizer
         self.categorizer: t.Callable[[IntermediateValue], Category] = categorizer
 
-    def __add_feature_unigrams(self, training_set: list[tuple[TokenBag, Category]]) -> None:
+    def __repr__(self):
+        return f"<{self.__class__.__qualname__} {'trained' if self.trained else 'untrained'} tokenizer={self.extractor!r} categorizer={self.categorizer!r}>"
+
+    @staticmethod
+    def __data_to_tokenbag(data: tuple[TokenBag, Category]) -> TokenBag:
         """
-        Add the `nltk.sentiment.util.extract_unigram_feats` feature to the model.
+        Access the tokenbag of a data tuple.
         """
-        all_words = self.model.all_words(training_set, labeled=True)
+        return data[0]
+
+    def __add_feature_unigrams(self, dataset: t.Iterator[tuple[TokenBag, Category]]) -> None:
+        """
+        Register the `nltk.sentiment.util.extract_unigram_feats` feature extrator on the model.
+        """
+        tokenbags = map(self.__data_to_tokenbag, dataset)
+        all_words = self.model.all_words(tokenbags, labeled=False)
         unigrams = self.model.unigram_word_feats(words=all_words, min_freq=4)
         self.model.add_feat_extractor(nltk.sentiment.util.extract_unigram_feats, unigrams=unigrams)
 
-    def _add_features(self, training_set: list[tuple[TokenBag, Category]]):
+    def _add_features(self, dataset: t.Iterator[tuple[TokenBag, Category]]):
         """
-        Add new features to the sentiment analyzer.
+        Register new feature extractors on the `.model`.
         """
-        self.__add_feature_unigrams(training_set)
+        self.__add_feature_unigrams(dataset)
 
-    def _train_from_dataset(self, dataset: list[tuple[TokenBag, Category]]) -> None:
+    def __extract_features(self, data: tuple[TokenBag, Category]) -> tuple[Features, Category]:
+        """
+        Convert a (TokenBag, Category) tuple to a (Features, Category) tuple.
+
+        Does not use `SentimentAnalyzer.apply_features` due to unexpected behaviour when using iterators.
+        """
+        return self.model.extract_features(data[0]), data[1]
+
+    def _train_from_dataset(self, dataset: t.Iterator[tuple[TokenBag, Category]]) -> None:
         """
         Train the model with the given training set.
         """
         if self.trained:
             raise AlreadyTrainedError()
 
-        self.__add_feature_unigrams(dataset)
-        training_set_with_features = self.model.apply_features(dataset, labeled=True)
+        dataset_1, dataset_2 = itertools.tee(dataset, 2)
 
-        self.model.train(trainer=nltk.classify.NaiveBayesClassifier.train, training_set=training_set_with_features)
+        self._add_features(dataset_1)
+        del dataset_1
+
+        dataset_2 = map(self.__extract_features, dataset_2)
+        self.model.classifier = nltk.classify.NaiveBayesClassifier.train(dataset_2)
         self.trained = True
 
-    def _evaluate_from_dataset(self, dataset: list[tuple[TokenBag, Category]]) -> dict:
+    def _evaluate_from_dataset(self, dataset: t.Iterator[tuple[TokenBag, Category]]) -> dict:
         """
         Perform a model evaluation with the given test set.
         """
         if not self.trained:
             raise NotTrainedError()
 
-        test_set_with_features = self.model.apply_features(dataset, labeled=True)
-        return self.model.evaluate(test_set_with_features)
+        dataset_1 = map(self.__extract_features, dataset)
+        return self.model.evaluate(dataset_1)
 
     def _use_from_tokenbag(self, tokens: TokenBag) -> Category:
         """
@@ -75,17 +99,18 @@ class VanillaSA(BaseSA):
         return self.model.classify(instance=tokens)
 
     def _extract_data(self, inp: Input) -> tuple[TokenBag, Category]:
+        count_passage("processed_data", 100)
         text, value = self.extractor(inp)
         return self.tokenizer(text), self.categorizer(value)
 
-    def _extract_dataset(self, inp: list[Input]) -> list[tuple[TokenBag, Category]]:
-        return list(map(self._extract_data, inp))
+    def _extract_dataset(self, inp: t.Iterator[Input]) -> list[tuple[TokenBag, Category]]:
+        return map(self._extract_data, inp)
 
-    def train(self, training_set: list[Input]) -> None:
+    def train(self, training_set: t.Iterator[Input]) -> None:
         dataset = self._extract_dataset(training_set)
         self._train_from_dataset(dataset)
 
-    def evaluate(self, test_set: list[tuple[Input, Category]]) -> None:
+    def evaluate(self, test_set: t.Iterator[Input]) -> dict:
         dataset = self._extract_dataset(test_set)
         return self._evaluate_from_dataset(dataset)
 
diff --git a/unimore_bda_6/database.py b/unimore_bda_6/database.py
index fd152f4..d699028 100644
--- a/unimore_bda_6/database.py
+++ b/unimore_bda_6/database.py
@@ -4,7 +4,7 @@ import pymongo.collection
 import contextlib
 import bson
 import logging
-import random
+import itertools
 
 from .config import MONGO_HOST, MONGO_PORT, WORKING_SET_SIZE, DATA_SET_SIZE
 
@@ -55,7 +55,7 @@ def mongo_reviews_collection_from_config() -> pymongo.collection.Collection[Revi
         yield collection
 
 
-def sample_reviews(reviews: pymongo.collection.Collection, amount: int) -> t.Iterable[Review]:
+def sample_reviews(reviews: pymongo.collection.Collection, amount: int) -> t.Iterator[Review]:
     """
     Get ``amount`` random reviews from the ``reviews`` collection.
     """
@@ -67,7 +67,7 @@ def sample_reviews(reviews: pymongo.collection.Collection, amount: int) -> t.Ite
     ])
 
 
-def sample_reviews_by_rating(reviews: pymongo.collection.Collection, rating: float, amount: int) -> t.Iterable[Review]:
+def sample_reviews_by_rating(reviews: pymongo.collection.Collection, rating: float, amount: int) -> t.Iterator[Review]:
     """
     Get ``amount`` random reviews with ``rating`` stars from the ``reviews`` collection.
     """
@@ -80,7 +80,7 @@ def sample_reviews_by_rating(reviews: pymongo.collection.Collection, rating: flo
     ])
 
 
-def dataset_polar(collection: pymongo.collection.Collection, amount: int) -> list[Review]:
+def dataset_polar(collection: pymongo.collection.Collection, amount: int) -> t.Iterator[Review]:
     """
     Get a list of the same amount of 1-star and 5-star reviews.
     """
@@ -91,12 +91,12 @@ def dataset_polar(collection: pymongo.collection.Collection, amount: int) -> lis
     negative = sample_reviews_by_rating(collection, rating=1.0, amount=amount)
 
     # Randomness here does not matter, so just merge the lists
-    both = [*positive, *negative]
+    both = itertools.chain(positive, negative)
 
     return both
 
 
-def dataset_varied(collection: pymongo.collection.Collection, amount: int) -> list[Review]:
+def dataset_varied(collection: pymongo.collection.Collection, amount: int) -> t.Iterator[Review]:
     """
     Get a list of the same amount of reviews for each rating.
     """
@@ -109,8 +109,7 @@ def dataset_varied(collection: pymongo.collection.Collection, amount: int) -> li
     positive = sample_reviews_by_rating(collection, rating=4.0, amount=amount)
     great    = sample_reviews_by_rating(collection, rating=5.0, amount=amount)
 
-    # Randomness here does not matter, so just merge the lists
-    full = [*terrible, *negative, *mixed, *positive, *great]
+    full = itertools.chain(terrible, negative, mixed, positive, great)
 
     return full
 
@@ -122,4 +121,5 @@ __all__ = (
     "sample_reviews",
     "sample_reviews_by_rating",
     "dataset_polar",
+    "dataset_varied",
 )
diff --git a/unimore_bda_6/log.py b/unimore_bda_6/log.py
index 1aa46a8..934eab4 100644
--- a/unimore_bda_6/log.py
+++ b/unimore_bda_6/log.py
@@ -1,3 +1,4 @@
+import collections
 import logging
 import coloredlogs
 
@@ -34,6 +35,16 @@ def install_log_handler(loggers: list[logging.Logger] = None):
         log.debug("Installed custom log handler on: %s", logger)
 
 
+_passage_counts = collections.defaultdict(lambda: 0)
+
+
+def count_passage(key: str, mod: int):
+    _passage_counts[key] += 1
+    if not _passage_counts[key] % mod:
+        log.debug("%s - %d calls", key, _passage_counts[key])
+
+
 __all__ = (
     "install_log_handler",
+    "count_passage",
 )
diff --git a/unimore_bda_6/tokenization/__init__.py b/unimore_bda_6/tokenization/__init__.py
index 57f3b66..24850c4 100644
--- a/unimore_bda_6/tokenization/__init__.py
+++ b/unimore_bda_6/tokenization/__init__.py
@@ -3,8 +3,8 @@ from . import potts_based
 
 
 all_tokenizers = [
-    nltk_based.tokenizer,
-    potts_based.tokenizer,
+    nltk_based.nltk_tokenizer,
+    potts_based.potts_tokenizer,
 ]
 
 
diff --git a/unimore_bda_6/tokenization/nltk_based.py b/unimore_bda_6/tokenization/nltk_based.py
index cd4bd86..1c6eda1 100644
--- a/unimore_bda_6/tokenization/nltk_based.py
+++ b/unimore_bda_6/tokenization/nltk_based.py
@@ -2,7 +2,7 @@ import nltk
 import nltk.sentiment.util
 
 
-def tokenizer(text: str) -> list[str]:
+def nltk_tokenizer(text: str) -> list[str]:
     """
     Convert a text string into a list of tokens.
     """
@@ -12,5 +12,5 @@ def tokenizer(text: str) -> list[str]:
 
 
 __all__ = (
-    "tokenizer",
+    "nltk_tokenizer",
 )
diff --git a/unimore_bda_6/tokenization/potts_based.py b/unimore_bda_6/tokenization/potts_based.py
index a0ab5eb..c56c23c 100644
--- a/unimore_bda_6/tokenization/potts_based.py
+++ b/unimore_bda_6/tokenization/potts_based.py
@@ -143,7 +143,7 @@ amp = "&amp;"
 ######################################################################
 
 
-def tokenizer(text: str) -> t.Iterable[str]:
+def potts_tokenizer(text: str) -> t.Iterable[str]:
     """
     Argument: s -- any string object
     Value: a tokenize list of strings; conatenating this list returns the original string if preserve_case=False
@@ -187,5 +187,5 @@ def __html2string(html: str) -> str:
 
 
 __all__ = (
-    "tokenizer",
+    "potts_tokenizer",
 )