Fix some leftover bugs

2024-11-21 23:44:19 +00:00 · 2023-02-10 05:18:24 +01:00 · 2023-02-10 05:18:24 +01:00 · 4f40aa44b4
commit 4f40aa44b4
parent 3d9eeecb2a
3 changed files with 7 additions and 7 deletions
--- a/unimore_bda_6/main.py
+++ b/unimore_bda_6/main.py
@ -39,19 +39,19 @@ def main():
            slog.debug("Selected sample_func: %s", sample_func.__name__)
            for SentimentAnalyzer in [
                NLTKSentimentAnalyzer,
                TensorflowCategorySentimentAnalyzer,
                NLTKSentimentAnalyzer,
            ]:
                slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}")
                slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__)
                for Tokenizer in [
                    PottsTokenizer,
                    PottsTokenizerWithNegation,
                    PlainTokenizer,
                    LowercaseTokenizer,
                    NLTKWordTokenizer,
                    PottsTokenizer,
                    PottsTokenizerWithNegation,
                ]:
                    slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
--- a/unimore_bda_6/analysis/nltk_sentiment.py
+++ b/unimore_bda_6/analysis/nltk_sentiment.py
@ -40,7 +40,7 @@ class NLTKSentimentAnalyzer(BaseSentimentAnalyzer):
        Convert the `Text` of a `DataTuple` to a `TokenBag`.
        """
        count_passage(log, "tokenize_datatuple", 100)
-        return self.tokenizer.tokenize_plain(datatuple.text), datatuple.category
+        return self.tokenizer.tokenize_and_split_plain(datatuple.text), datatuple.category
    def _add_feature_unigrams(self, dataset: t.Iterator[tuple[TokenBag, Category]]) -> None:
        """
--- a/unimore_bda_6/analysis/tf_text.py
+++ b/unimore_bda_6/analysis/tf_text.py
@ -71,7 +71,7 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer, metaclass=abc.ABCMeta):
        """
        log.debug("Creating TextVectorization layer...")
        layer = tensorflow.keras.layers.TextVectorization(
-            standardize=self.tokenizer.tokenize_tensorflow,
+            standardize=self.tokenizer.tokenize_tensorflow_and_expand_dims,
            max_tokens=TENSORFLOW_MAX_FEATURES.__wrapped__
        )
        log.debug("Created TextVectorization layer: %s", layer)
@ -177,8 +177,8 @@ class TensorflowCategorySentimentAnalyzer(TensorflowSentimentAnalyzer):
            dataset_func=dataset_func,
            conversion_func=Review.to_tensor_tuple,
            output_signature=(
-                tensorflow.TensorSpec(shape=(1,), dtype=tensorflow.string, name="text"),
+                tensorflow.TensorSpec(shape=(), dtype=tensorflow.string, name="text"),
-                tensorflow.TensorSpec(shape=(5,), dtype=tensorflow.float32, name="review_one_hot"),
+                tensorflow.TensorSpec(shape=(1, 5,), dtype=tensorflow.float32, name="review_one_hot"),
            ),
        )