1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-21 23:44:19 +00:00

back to i have no idea of what's happening, but at least it works

This commit is contained in:
Steffo 2023-02-07 10:22:09 +01:00
parent e9a4421acd
commit c31743f066
Signed by: steffo
GPG key ID: 2A24051445686895
6 changed files with 42 additions and 20 deletions

View file

@ -5,10 +5,11 @@
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="CONFIRM_OVERWRITE" value="False" />
<env name="DATA_SET_SIZE" value="100" />
<env name="DATA_SET_SIZE" value="2500" />
<env name="NLTK_DATA" value="./data/nltk" />
<env name="PYTHONUNBUFFERED" value="1" />
<env name="WORKING_SET_SIZE" value="1000000" />
<env name="XLA_FLAGS" value="--xla_gpu_cuda_data_dir=/opt/cuda" />
</envs>
<option name="SDK_HOME" value="$PROJECT_DIR$/.venv/bin/python" />
<option name="SDK_NAME" value="Poetry (unimore-bda-6)" />

View file

@ -1,9 +1,8 @@
import abc
import logging
import typing as t
import dataclasses
from ..database import Text, Category, Review, DatasetFunc
from ..database import Text, Category, DatasetFunc
log = logging.getLogger(__name__)

View file

@ -1,6 +1,7 @@
import tensorflow
from ..database import Text, Category, DatasetFunc
from ..config import DATA_SET_SIZE
from .base import BaseSentimentAnalyzer, AlreadyTrainedError, NotTrainedError
@ -21,17 +22,20 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer):
dataset_func_with_tensor_tuple,
output_signature=(
tensorflow.TensorSpec(shape=(), dtype=tensorflow.string, name="text"),
tensorflow.TensorSpec(shape=(), dtype=tensorflow.float32, name="category"),
tensorflow.TensorSpec(shape=(5,), dtype=tensorflow.float32, name="category"),
)
)
def _build_model(self) -> tensorflow.keras.Sequential:
return tensorflow.keras.Sequential([
tensorflow.keras.layers.Embedding(input_dim=self.MAX_FEATURES + 1, output_dim=self.EMBEDDING_DIM),
tensorflow.keras.layers.Dropout(0.2),
tensorflow.keras.layers.Embedding(
input_dim=self.MAX_FEATURES + 1,
output_dim=self.EMBEDDING_DIM,
),
# tensorflow.keras.layers.Dropout(0.2),
tensorflow.keras.layers.GlobalAveragePooling1D(),
tensorflow.keras.layers.Dropout(0.2),
tensorflow.keras.layers.Dense(1),
# tensorflow.keras.layers.Dropout(0.2),
tensorflow.keras.layers.Dense(5, activation="softmax"),
])
def _build_vectorizer(self) -> tensorflow.keras.layers.TextVectorization:
@ -41,9 +45,13 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer):
text = tensorflow.expand_dims(text, -1) # TODO: ??????
return self.text_vectorization_layer(text), category
MAX_FEATURES = 1000
EMBEDDING_DIM = 16
EPOCHS = 10
MAX_FEATURES = 2500
EMBEDDING_DIM = 24
"""
Count of possible "semantic meanings" of words, represented as dimensions of a tensor.
"""
EPOCHS = 3
def train(self, dataset_func: DatasetFunc) -> None:
if self.trained:
@ -55,11 +63,10 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer):
self.text_vectorization_layer.adapt(only_text_set)
training_set = training_set.map(self.__vectorize_data)
self.model.compile(loss=tensorflow.keras.losses.CosineSimilarity(axis=0), metrics=["accuracy"])
# self.model.compile(loss=tensorflow.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer="adam", metrics=["accuracy"])
self.model.compile(loss=tensorflow.keras.losses.MeanAbsoluteError(), optimizer="adam", metrics=["accuracy"])
history = self.model.fit(training_set, epochs=self.EPOCHS)
...
self.model.fit(training_set, epochs=self.EPOCHS)
self.trained = True
@ -67,5 +74,15 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer):
if not self.trained:
raise NotTrainedError()
prediction = self.model.predict(text)
breakpoint()
vector = self.text_vectorization_layer(tensorflow.expand_dims(text, -1))
prediction = self.model.predict(vector)
max_i = None
max_p = None
for i, p in enumerate(iter(prediction[0])):
if max_p is None or p > max_p:
max_i = i
max_p = p
return float(max_i) + 1.0

View file

@ -41,7 +41,6 @@ def load_cache(path: str | pathlib.Path) -> DatasetFunc:
path = pathlib.Path(path)
if not path.exists():
log.error("Specified cache directory does not exist: %s", path)
raise FileNotFoundError("The specified path does not exist.")
def data_cache_loader():

View file

@ -33,7 +33,13 @@ class Review:
return tensorflow.convert_to_tensor(self.text, dtype=tensorflow.string)
def to_tensor_category(self) -> tensorflow.Tensor:
return tensorflow.convert_to_tensor(self.category / 5.0, dtype=tensorflow.float32)
return tensorflow.convert_to_tensor([
1.0 if self.category == 1.0 else 0.0,
1.0 if self.category == 2.0 else 0.0,
1.0 if self.category == 3.0 else 0.0,
1.0 if self.category == 4.0 else 0.0,
1.0 if self.category == 5.0 else 0.0,
], dtype=tensorflow.float32)
def to_tensor_tuple(self) -> tuple[tensorflow.Tensor, tensorflow.Tensor]:
return (

View file

@ -15,7 +15,7 @@ def install_log_handler(loggers: list[logging.Logger] = None):
for logger in loggers:
coloredlogs.install(
logger=logger,
level="DEBUG",
level="INFO",
fmt="{asctime} | {name:<32} | {levelname:>8} | {message}",
style="{",
level_styles=dict(