From 4f40aa44b4af17d138dc82961d850e1516e90502 Mon Sep 17 00:00:00 2001 From: Stefano Pigozzi Date: Fri, 10 Feb 2023 05:18:24 +0100 Subject: [PATCH] Fix some leftover bugs --- unimore_bda_6/__main__.py | 6 +++--- unimore_bda_6/analysis/nltk_sentiment.py | 2 +- unimore_bda_6/analysis/tf_text.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/unimore_bda_6/__main__.py b/unimore_bda_6/__main__.py index 7d3b7f0..5111065 100644 --- a/unimore_bda_6/__main__.py +++ b/unimore_bda_6/__main__.py @@ -39,19 +39,19 @@ def main(): slog.debug("Selected sample_func: %s", sample_func.__name__) for SentimentAnalyzer in [ - NLTKSentimentAnalyzer, TensorflowCategorySentimentAnalyzer, + NLTKSentimentAnalyzer, ]: slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}") slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__) for Tokenizer in [ - PottsTokenizer, - PottsTokenizerWithNegation, PlainTokenizer, LowercaseTokenizer, NLTKWordTokenizer, + PottsTokenizer, + PottsTokenizerWithNegation, ]: slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}") diff --git a/unimore_bda_6/analysis/nltk_sentiment.py b/unimore_bda_6/analysis/nltk_sentiment.py index f3d4b86..4c063b7 100644 --- a/unimore_bda_6/analysis/nltk_sentiment.py +++ b/unimore_bda_6/analysis/nltk_sentiment.py @@ -40,7 +40,7 @@ class NLTKSentimentAnalyzer(BaseSentimentAnalyzer): Convert the `Text` of a `DataTuple` to a `TokenBag`. """ count_passage(log, "tokenize_datatuple", 100) - return self.tokenizer.tokenize_plain(datatuple.text), datatuple.category + return self.tokenizer.tokenize_and_split_plain(datatuple.text), datatuple.category def _add_feature_unigrams(self, dataset: t.Iterator[tuple[TokenBag, Category]]) -> None: """ diff --git a/unimore_bda_6/analysis/tf_text.py b/unimore_bda_6/analysis/tf_text.py index 840bfac..8d3442f 100644 --- a/unimore_bda_6/analysis/tf_text.py +++ b/unimore_bda_6/analysis/tf_text.py @@ -71,7 +71,7 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer, metaclass=abc.ABCMeta): """ log.debug("Creating TextVectorization layer...") layer = tensorflow.keras.layers.TextVectorization( - standardize=self.tokenizer.tokenize_tensorflow, + standardize=self.tokenizer.tokenize_tensorflow_and_expand_dims, max_tokens=TENSORFLOW_MAX_FEATURES.__wrapped__ ) log.debug("Created TextVectorization layer: %s", layer) @@ -177,8 +177,8 @@ class TensorflowCategorySentimentAnalyzer(TensorflowSentimentAnalyzer): dataset_func=dataset_func, conversion_func=Review.to_tensor_tuple, output_signature=( - tensorflow.TensorSpec(shape=(1,), dtype=tensorflow.string, name="text"), - tensorflow.TensorSpec(shape=(5,), dtype=tensorflow.float32, name="review_one_hot"), + tensorflow.TensorSpec(shape=(), dtype=tensorflow.string, name="text"), + tensorflow.TensorSpec(shape=(1, 5,), dtype=tensorflow.float32, name="review_one_hot"), ), )