diff --git a/.idea/runConfigurations/unimore_bda_6.xml b/.idea/runConfigurations/unimore_bda_6.xml
index 77fdf31..fa5fcae 100644
--- a/.idea/runConfigurations/unimore_bda_6.xml
+++ b/.idea/runConfigurations/unimore_bda_6.xml
@@ -5,10 +5,11 @@
-
+
+
diff --git a/unimore_bda_6/analysis/base.py b/unimore_bda_6/analysis/base.py
index b9c3900..eea07b4 100644
--- a/unimore_bda_6/analysis/base.py
+++ b/unimore_bda_6/analysis/base.py
@@ -1,9 +1,8 @@
import abc
import logging
-import typing as t
import dataclasses
-from ..database import Text, Category, Review, DatasetFunc
+from ..database import Text, Category, DatasetFunc
log = logging.getLogger(__name__)
diff --git a/unimore_bda_6/analysis/tf_text.py b/unimore_bda_6/analysis/tf_text.py
index 7ac3825..8beee6d 100644
--- a/unimore_bda_6/analysis/tf_text.py
+++ b/unimore_bda_6/analysis/tf_text.py
@@ -1,6 +1,7 @@
import tensorflow
from ..database import Text, Category, DatasetFunc
+from ..config import DATA_SET_SIZE
from .base import BaseSentimentAnalyzer, AlreadyTrainedError, NotTrainedError
@@ -21,17 +22,20 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer):
dataset_func_with_tensor_tuple,
output_signature=(
tensorflow.TensorSpec(shape=(), dtype=tensorflow.string, name="text"),
- tensorflow.TensorSpec(shape=(), dtype=tensorflow.float32, name="category"),
+ tensorflow.TensorSpec(shape=(5,), dtype=tensorflow.float32, name="category"),
)
)
def _build_model(self) -> tensorflow.keras.Sequential:
return tensorflow.keras.Sequential([
- tensorflow.keras.layers.Embedding(input_dim=self.MAX_FEATURES + 1, output_dim=self.EMBEDDING_DIM),
- tensorflow.keras.layers.Dropout(0.2),
+ tensorflow.keras.layers.Embedding(
+ input_dim=self.MAX_FEATURES + 1,
+ output_dim=self.EMBEDDING_DIM,
+ ),
+ # tensorflow.keras.layers.Dropout(0.2),
tensorflow.keras.layers.GlobalAveragePooling1D(),
- tensorflow.keras.layers.Dropout(0.2),
- tensorflow.keras.layers.Dense(1),
+ # tensorflow.keras.layers.Dropout(0.2),
+ tensorflow.keras.layers.Dense(5, activation="softmax"),
])
def _build_vectorizer(self) -> tensorflow.keras.layers.TextVectorization:
@@ -41,9 +45,13 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer):
text = tensorflow.expand_dims(text, -1) # TODO: ??????
return self.text_vectorization_layer(text), category
- MAX_FEATURES = 1000
- EMBEDDING_DIM = 16
- EPOCHS = 10
+ MAX_FEATURES = 2500
+ EMBEDDING_DIM = 24
+ """
+ Count of possible "semantic meanings" of words, represented as dimensions of a tensor.
+ """
+
+ EPOCHS = 3
def train(self, dataset_func: DatasetFunc) -> None:
if self.trained:
@@ -55,11 +63,10 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer):
self.text_vectorization_layer.adapt(only_text_set)
training_set = training_set.map(self.__vectorize_data)
- self.model.compile(loss=tensorflow.keras.losses.CosineSimilarity(axis=0), metrics=["accuracy"])
+ # self.model.compile(loss=tensorflow.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer="adam", metrics=["accuracy"])
+ self.model.compile(loss=tensorflow.keras.losses.MeanAbsoluteError(), optimizer="adam", metrics=["accuracy"])
- history = self.model.fit(training_set, epochs=self.EPOCHS)
-
- ...
+ self.model.fit(training_set, epochs=self.EPOCHS)
self.trained = True
@@ -67,5 +74,15 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer):
if not self.trained:
raise NotTrainedError()
- prediction = self.model.predict(text)
- breakpoint()
+ vector = self.text_vectorization_layer(tensorflow.expand_dims(text, -1))
+
+ prediction = self.model.predict(vector)
+
+ max_i = None
+ max_p = None
+ for i, p in enumerate(iter(prediction[0])):
+ if max_p is None or p > max_p:
+ max_i = i
+ max_p = p
+
+ return float(max_i) + 1.0
diff --git a/unimore_bda_6/database/cache.py b/unimore_bda_6/database/cache.py
index 9db9829..ae00f34 100644
--- a/unimore_bda_6/database/cache.py
+++ b/unimore_bda_6/database/cache.py
@@ -41,7 +41,6 @@ def load_cache(path: str | pathlib.Path) -> DatasetFunc:
path = pathlib.Path(path)
if not path.exists():
- log.error("Specified cache directory does not exist: %s", path)
raise FileNotFoundError("The specified path does not exist.")
def data_cache_loader():
diff --git a/unimore_bda_6/database/datatypes.py b/unimore_bda_6/database/datatypes.py
index 32c65f1..5f1df35 100644
--- a/unimore_bda_6/database/datatypes.py
+++ b/unimore_bda_6/database/datatypes.py
@@ -33,7 +33,13 @@ class Review:
return tensorflow.convert_to_tensor(self.text, dtype=tensorflow.string)
def to_tensor_category(self) -> tensorflow.Tensor:
- return tensorflow.convert_to_tensor(self.category / 5.0, dtype=tensorflow.float32)
+ return tensorflow.convert_to_tensor([
+ 1.0 if self.category == 1.0 else 0.0,
+ 1.0 if self.category == 2.0 else 0.0,
+ 1.0 if self.category == 3.0 else 0.0,
+ 1.0 if self.category == 4.0 else 0.0,
+ 1.0 if self.category == 5.0 else 0.0,
+ ], dtype=tensorflow.float32)
def to_tensor_tuple(self) -> tuple[tensorflow.Tensor, tensorflow.Tensor]:
return (
diff --git a/unimore_bda_6/log.py b/unimore_bda_6/log.py
index 4789be8..c7272fe 100644
--- a/unimore_bda_6/log.py
+++ b/unimore_bda_6/log.py
@@ -15,7 +15,7 @@ def install_log_handler(loggers: list[logging.Logger] = None):
for logger in loggers:
coloredlogs.install(
logger=logger,
- level="DEBUG",
+ level="INFO",
fmt="{asctime} | {name:<32} | {levelname:>8} | {message}",
style="{",
level_styles=dict(