1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-10-16 14:27:32 +00:00

Add polar model

This commit is contained in:
Steffo 2023-02-10 05:52:13 +01:00
parent 4f40aa44b4
commit c979699ff1
Signed by: steffo
GPG key ID: 2A24051445686895
5 changed files with 68 additions and 11 deletions

View file

@ -4,10 +4,14 @@
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
<env name="CONFIRM_OVERWRITE" value="False" />
<env name="NLTK_DATA" value="./data/nltk" />
<env name="PYTHONUNBUFFERED" value="1" />
<env name="TENSORFLOW_EPOCHS" value="4" />
<env name="EVALUATION_SET_SIZE" value="100" />
<env name="TF_CPP_MIN_LOG_LEVEL" value="2" />
<env name="TRAINING_SET_SIZE" value="1000" />
<env name="VALIDATION_SET_SIZE" value="100" />
<env name="WORKING_SET_SIZE" value="1000000" />
<env name="XLA_FLAGS" value="--xla_gpu_cuda_data_dir=/opt/cuda" />
</envs>

View file

@ -6,8 +6,7 @@ install_general_log_handlers()
from .config import config
from .database import mongo_client_from_config, reviews_collection, sample_reviews_polar, sample_reviews_varied
from .analysis.nltk_sentiment import NLTKSentimentAnalyzer
from .analysis.tf_text import TensorflowCategorySentimentAnalyzer
from .analysis import NLTKSentimentAnalyzer, TensorflowCategorySentimentAnalyzer, TensorflowPolarSentimentAnalyzer
from .analysis.base import TrainingFailedError
from .tokenizer import PlainTokenizer, LowercaseTokenizer, NLTKWordTokenizer, PottsTokenizer, PottsTokenizerWithNegation
from .gathering import Caches
@ -39,6 +38,7 @@ def main():
slog.debug("Selected sample_func: %s", sample_func.__name__)
for SentimentAnalyzer in [
TensorflowPolarSentimentAnalyzer,
TensorflowCategorySentimentAnalyzer,
NLTKSentimentAnalyzer,
]:

View file

@ -50,7 +50,7 @@ class BaseSentimentAnalyzer(metaclass=abc.ABCMeta):
for review in evaluation_dataset_func():
resulting_category = self.use(review.text)
evaluated += 1
correct += 1 if resulting_category == review.category else 0
correct += 1 if round(resulting_category) == round(review.category) else 0
score += 1 - (abs(resulting_category - review.category) / 4)
return EvaluationResults(correct=correct, evaluated=evaluated, score=score)

View file

@ -164,7 +164,7 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer, metaclass=abc.ABCMeta):
vector = self.text_vectorization_layer(text)
prediction = self.model.predict(vector, verbose=False)
return prediction
return self._translate_prediction(prediction)
class TensorflowCategorySentimentAnalyzer(TensorflowSentimentAnalyzer):
@ -175,10 +175,10 @@ class TensorflowCategorySentimentAnalyzer(TensorflowSentimentAnalyzer):
def _build_dataset(self, dataset_func: CachedDatasetFunc) -> tensorflow.data.Dataset:
return build_dataset(
dataset_func=dataset_func,
conversion_func=Review.to_tensor_tuple,
conversion_func=Review.to_tensor_tuple_category,
output_signature=(
tensorflow.TensorSpec(shape=(), dtype=tensorflow.string, name="text"),
tensorflow.TensorSpec(shape=(1, 5,), dtype=tensorflow.float32, name="review_one_hot"),
tensorflow.TensorSpec(shape=(1, 5,), dtype=tensorflow.float32, name="category_one_hot"),
),
)
@ -218,7 +218,53 @@ class TensorflowCategorySentimentAnalyzer(TensorflowSentimentAnalyzer):
return result
class TensorflowPolarSentimentAnalyzer(TensorflowSentimentAnalyzer):
"""
A `tensorflow`-based sentiment analyzer that uses the floating point value rating to get as close as possible to the correct category.
"""
def _build_dataset(self, dataset_func: CachedDatasetFunc) -> tensorflow.data.Dataset:
return build_dataset(
dataset_func=dataset_func,
conversion_func=Review.to_tensor_tuple_normvalue,
output_signature=(
tensorflow.TensorSpec(shape=(), dtype=tensorflow.string, name="text"),
tensorflow.TensorSpec(shape=(1,), dtype=tensorflow.float32, name="category"),
),
)
def _build_model(self) -> tensorflow.keras.Sequential:
log.debug("Creating sequential categorizer model...")
model = tensorflow.keras.Sequential([
tensorflow.keras.layers.Embedding(
input_dim=TENSORFLOW_MAX_FEATURES.__wrapped__ + 1,
output_dim=TENSORFLOW_EMBEDDING_SIZE.__wrapped__,
),
tensorflow.keras.layers.Dropout(0.25),
tensorflow.keras.layers.GlobalAveragePooling1D(),
tensorflow.keras.layers.Dropout(0.25),
tensorflow.keras.layers.Dense(1),
])
log.debug("Compiling model: %s", model)
model.compile(
optimizer=tensorflow.keras.optimizers.Adam(global_clipnorm=1.0),
loss=tensorflow.keras.losses.MeanSquaredError(),
metrics=[
tensorflow.keras.metrics.MeanAbsoluteError(),
tensorflow.keras.metrics.CosineSimilarity(),
]
)
log.debug("Compiled model: %s", model)
return model
def _translate_prediction(self, a: numpy.array) -> Category:
return a[0, 0]
__all__ = (
"TensorflowSentimentAnalyzer",
"TensorflowCategorySentimentAnalyzer",
"TensorflowPolarSentimentAnalyzer",
)

View file

@ -40,6 +40,15 @@ class Review:
def to_tensor_text(self) -> tensorflow.Tensor:
return tensorflow.convert_to_tensor(self.text, dtype=tensorflow.string)
def to_tensor_normvalue(self) -> tensorflow.Tensor:
return tensorflow.convert_to_tensor([self.category / 5], dtype=tensorflow.float32)
def to_tensor_tuple_normvalue(self) -> tuple[tensorflow.Tensor, tensorflow.Tensor]:
return (
self.to_tensor_text(),
self.to_tensor_normvalue(),
)
def to_tensor_category(self) -> tensorflow.Tensor:
return tensorflow.convert_to_tensor([[
1.0 if self.category == 1.0 else 0.0,
@ -49,13 +58,11 @@ class Review:
1.0 if self.category == 5.0 else 0.0,
]], dtype=tensorflow.float32)
def to_tensor_tuple(self) -> tuple[tensorflow.Tensor, tensorflow.Tensor]:
t = (
def to_tensor_tuple_category(self) -> tuple[tensorflow.Tensor, tensorflow.Tensor]:
return (
self.to_tensor_text(),
self.to_tensor_category(),
)
log.debug("Converted %s", t)
return t
__all__ = (