mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-25 17:24:20 +00:00
Fix some leftover bugs
This commit is contained in:
parent
3d9eeecb2a
commit
4f40aa44b4
3 changed files with 7 additions and 7 deletions
|
@ -39,19 +39,19 @@ def main():
|
||||||
slog.debug("Selected sample_func: %s", sample_func.__name__)
|
slog.debug("Selected sample_func: %s", sample_func.__name__)
|
||||||
|
|
||||||
for SentimentAnalyzer in [
|
for SentimentAnalyzer in [
|
||||||
NLTKSentimentAnalyzer,
|
|
||||||
TensorflowCategorySentimentAnalyzer,
|
TensorflowCategorySentimentAnalyzer,
|
||||||
|
NLTKSentimentAnalyzer,
|
||||||
]:
|
]:
|
||||||
|
|
||||||
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}")
|
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}")
|
||||||
slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__)
|
slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__)
|
||||||
|
|
||||||
for Tokenizer in [
|
for Tokenizer in [
|
||||||
PottsTokenizer,
|
|
||||||
PottsTokenizerWithNegation,
|
|
||||||
PlainTokenizer,
|
PlainTokenizer,
|
||||||
LowercaseTokenizer,
|
LowercaseTokenizer,
|
||||||
NLTKWordTokenizer,
|
NLTKWordTokenizer,
|
||||||
|
PottsTokenizer,
|
||||||
|
PottsTokenizerWithNegation,
|
||||||
]:
|
]:
|
||||||
|
|
||||||
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
|
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
|
||||||
|
|
|
@ -40,7 +40,7 @@ class NLTKSentimentAnalyzer(BaseSentimentAnalyzer):
|
||||||
Convert the `Text` of a `DataTuple` to a `TokenBag`.
|
Convert the `Text` of a `DataTuple` to a `TokenBag`.
|
||||||
"""
|
"""
|
||||||
count_passage(log, "tokenize_datatuple", 100)
|
count_passage(log, "tokenize_datatuple", 100)
|
||||||
return self.tokenizer.tokenize_plain(datatuple.text), datatuple.category
|
return self.tokenizer.tokenize_and_split_plain(datatuple.text), datatuple.category
|
||||||
|
|
||||||
def _add_feature_unigrams(self, dataset: t.Iterator[tuple[TokenBag, Category]]) -> None:
|
def _add_feature_unigrams(self, dataset: t.Iterator[tuple[TokenBag, Category]]) -> None:
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -71,7 +71,7 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer, metaclass=abc.ABCMeta):
|
||||||
"""
|
"""
|
||||||
log.debug("Creating TextVectorization layer...")
|
log.debug("Creating TextVectorization layer...")
|
||||||
layer = tensorflow.keras.layers.TextVectorization(
|
layer = tensorflow.keras.layers.TextVectorization(
|
||||||
standardize=self.tokenizer.tokenize_tensorflow,
|
standardize=self.tokenizer.tokenize_tensorflow_and_expand_dims,
|
||||||
max_tokens=TENSORFLOW_MAX_FEATURES.__wrapped__
|
max_tokens=TENSORFLOW_MAX_FEATURES.__wrapped__
|
||||||
)
|
)
|
||||||
log.debug("Created TextVectorization layer: %s", layer)
|
log.debug("Created TextVectorization layer: %s", layer)
|
||||||
|
@ -177,8 +177,8 @@ class TensorflowCategorySentimentAnalyzer(TensorflowSentimentAnalyzer):
|
||||||
dataset_func=dataset_func,
|
dataset_func=dataset_func,
|
||||||
conversion_func=Review.to_tensor_tuple,
|
conversion_func=Review.to_tensor_tuple,
|
||||||
output_signature=(
|
output_signature=(
|
||||||
tensorflow.TensorSpec(shape=(1,), dtype=tensorflow.string, name="text"),
|
tensorflow.TensorSpec(shape=(), dtype=tensorflow.string, name="text"),
|
||||||
tensorflow.TensorSpec(shape=(5,), dtype=tensorflow.float32, name="review_one_hot"),
|
tensorflow.TensorSpec(shape=(1, 5,), dtype=tensorflow.float32, name="review_one_hot"),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue