1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-21 23:44:19 +00:00

back to i have no idea of what's happening, but at least it works

This commit is contained in:
Steffo 2023-02-07 10:22:09 +01:00
parent e9a4421acd
commit c31743f066
Signed by: steffo
GPG key ID: 2A24051445686895
6 changed files with 42 additions and 20 deletions

View file

@ -5,10 +5,11 @@
<option name="PARENT_ENVS" value="true" /> <option name="PARENT_ENVS" value="true" />
<envs> <envs>
<env name="CONFIRM_OVERWRITE" value="False" /> <env name="CONFIRM_OVERWRITE" value="False" />
<env name="DATA_SET_SIZE" value="100" /> <env name="DATA_SET_SIZE" value="2500" />
<env name="NLTK_DATA" value="./data/nltk" /> <env name="NLTK_DATA" value="./data/nltk" />
<env name="PYTHONUNBUFFERED" value="1" /> <env name="PYTHONUNBUFFERED" value="1" />
<env name="WORKING_SET_SIZE" value="1000000" /> <env name="WORKING_SET_SIZE" value="1000000" />
<env name="XLA_FLAGS" value="--xla_gpu_cuda_data_dir=/opt/cuda" />
</envs> </envs>
<option name="SDK_HOME" value="$PROJECT_DIR$/.venv/bin/python" /> <option name="SDK_HOME" value="$PROJECT_DIR$/.venv/bin/python" />
<option name="SDK_NAME" value="Poetry (unimore-bda-6)" /> <option name="SDK_NAME" value="Poetry (unimore-bda-6)" />

View file

@ -1,9 +1,8 @@
import abc import abc
import logging import logging
import typing as t
import dataclasses import dataclasses
from ..database import Text, Category, Review, DatasetFunc from ..database import Text, Category, DatasetFunc
log = logging.getLogger(__name__) log = logging.getLogger(__name__)

View file

@ -1,6 +1,7 @@
import tensorflow import tensorflow
from ..database import Text, Category, DatasetFunc from ..database import Text, Category, DatasetFunc
from ..config import DATA_SET_SIZE
from .base import BaseSentimentAnalyzer, AlreadyTrainedError, NotTrainedError from .base import BaseSentimentAnalyzer, AlreadyTrainedError, NotTrainedError
@ -21,17 +22,20 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer):
dataset_func_with_tensor_tuple, dataset_func_with_tensor_tuple,
output_signature=( output_signature=(
tensorflow.TensorSpec(shape=(), dtype=tensorflow.string, name="text"), tensorflow.TensorSpec(shape=(), dtype=tensorflow.string, name="text"),
tensorflow.TensorSpec(shape=(), dtype=tensorflow.float32, name="category"), tensorflow.TensorSpec(shape=(5,), dtype=tensorflow.float32, name="category"),
) )
) )
def _build_model(self) -> tensorflow.keras.Sequential: def _build_model(self) -> tensorflow.keras.Sequential:
return tensorflow.keras.Sequential([ return tensorflow.keras.Sequential([
tensorflow.keras.layers.Embedding(input_dim=self.MAX_FEATURES + 1, output_dim=self.EMBEDDING_DIM), tensorflow.keras.layers.Embedding(
tensorflow.keras.layers.Dropout(0.2), input_dim=self.MAX_FEATURES + 1,
output_dim=self.EMBEDDING_DIM,
),
# tensorflow.keras.layers.Dropout(0.2),
tensorflow.keras.layers.GlobalAveragePooling1D(), tensorflow.keras.layers.GlobalAveragePooling1D(),
tensorflow.keras.layers.Dropout(0.2), # tensorflow.keras.layers.Dropout(0.2),
tensorflow.keras.layers.Dense(1), tensorflow.keras.layers.Dense(5, activation="softmax"),
]) ])
def _build_vectorizer(self) -> tensorflow.keras.layers.TextVectorization: def _build_vectorizer(self) -> tensorflow.keras.layers.TextVectorization:
@ -41,9 +45,13 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer):
text = tensorflow.expand_dims(text, -1) # TODO: ?????? text = tensorflow.expand_dims(text, -1) # TODO: ??????
return self.text_vectorization_layer(text), category return self.text_vectorization_layer(text), category
MAX_FEATURES = 1000 MAX_FEATURES = 2500
EMBEDDING_DIM = 16 EMBEDDING_DIM = 24
EPOCHS = 10 """
Count of possible "semantic meanings" of words, represented as dimensions of a tensor.
"""
EPOCHS = 3
def train(self, dataset_func: DatasetFunc) -> None: def train(self, dataset_func: DatasetFunc) -> None:
if self.trained: if self.trained:
@ -55,11 +63,10 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer):
self.text_vectorization_layer.adapt(only_text_set) self.text_vectorization_layer.adapt(only_text_set)
training_set = training_set.map(self.__vectorize_data) training_set = training_set.map(self.__vectorize_data)
self.model.compile(loss=tensorflow.keras.losses.CosineSimilarity(axis=0), metrics=["accuracy"]) # self.model.compile(loss=tensorflow.keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer="adam", metrics=["accuracy"])
self.model.compile(loss=tensorflow.keras.losses.MeanAbsoluteError(), optimizer="adam", metrics=["accuracy"])
history = self.model.fit(training_set, epochs=self.EPOCHS) self.model.fit(training_set, epochs=self.EPOCHS)
...
self.trained = True self.trained = True
@ -67,5 +74,15 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer):
if not self.trained: if not self.trained:
raise NotTrainedError() raise NotTrainedError()
prediction = self.model.predict(text) vector = self.text_vectorization_layer(tensorflow.expand_dims(text, -1))
breakpoint()
prediction = self.model.predict(vector)
max_i = None
max_p = None
for i, p in enumerate(iter(prediction[0])):
if max_p is None or p > max_p:
max_i = i
max_p = p
return float(max_i) + 1.0

View file

@ -41,7 +41,6 @@ def load_cache(path: str | pathlib.Path) -> DatasetFunc:
path = pathlib.Path(path) path = pathlib.Path(path)
if not path.exists(): if not path.exists():
log.error("Specified cache directory does not exist: %s", path)
raise FileNotFoundError("The specified path does not exist.") raise FileNotFoundError("The specified path does not exist.")
def data_cache_loader(): def data_cache_loader():

View file

@ -33,7 +33,13 @@ class Review:
return tensorflow.convert_to_tensor(self.text, dtype=tensorflow.string) return tensorflow.convert_to_tensor(self.text, dtype=tensorflow.string)
def to_tensor_category(self) -> tensorflow.Tensor: def to_tensor_category(self) -> tensorflow.Tensor:
return tensorflow.convert_to_tensor(self.category / 5.0, dtype=tensorflow.float32) return tensorflow.convert_to_tensor([
1.0 if self.category == 1.0 else 0.0,
1.0 if self.category == 2.0 else 0.0,
1.0 if self.category == 3.0 else 0.0,
1.0 if self.category == 4.0 else 0.0,
1.0 if self.category == 5.0 else 0.0,
], dtype=tensorflow.float32)
def to_tensor_tuple(self) -> tuple[tensorflow.Tensor, tensorflow.Tensor]: def to_tensor_tuple(self) -> tuple[tensorflow.Tensor, tensorflow.Tensor]:
return ( return (

View file

@ -15,7 +15,7 @@ def install_log_handler(loggers: list[logging.Logger] = None):
for logger in loggers: for logger in loggers:
coloredlogs.install( coloredlogs.install(
logger=logger, logger=logger,
level="DEBUG", level="INFO",
fmt="{asctime} | {name:<32} | {levelname:>8} | {message}", fmt="{asctime} | {name:<32} | {levelname:>8} | {message}",
style="{", style="{",
level_styles=dict( level_styles=dict(