1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-22 07:54:19 +00:00

Add polar model

This commit is contained in:
Steffo 2023-02-10 05:52:13 +01:00
parent 4f40aa44b4
commit c979699ff1
Signed by: steffo
GPG key ID: 2A24051445686895
5 changed files with 68 additions and 11 deletions

View file

@ -4,10 +4,14 @@
<option name="INTERPRETER_OPTIONS" value="" /> <option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" /> <option name="PARENT_ENVS" value="true" />
<envs> <envs>
<env name="PYTHONUNBUFFERED" value="1" />
<env name="CONFIRM_OVERWRITE" value="False" /> <env name="CONFIRM_OVERWRITE" value="False" />
<env name="NLTK_DATA" value="./data/nltk" /> <env name="NLTK_DATA" value="./data/nltk" />
<env name="PYTHONUNBUFFERED" value="1" />
<env name="TENSORFLOW_EPOCHS" value="4" />
<env name="EVALUATION_SET_SIZE" value="100" />
<env name="TF_CPP_MIN_LOG_LEVEL" value="2" /> <env name="TF_CPP_MIN_LOG_LEVEL" value="2" />
<env name="TRAINING_SET_SIZE" value="1000" />
<env name="VALIDATION_SET_SIZE" value="100" />
<env name="WORKING_SET_SIZE" value="1000000" /> <env name="WORKING_SET_SIZE" value="1000000" />
<env name="XLA_FLAGS" value="--xla_gpu_cuda_data_dir=/opt/cuda" /> <env name="XLA_FLAGS" value="--xla_gpu_cuda_data_dir=/opt/cuda" />
</envs> </envs>

View file

@ -6,8 +6,7 @@ install_general_log_handlers()
from .config import config from .config import config
from .database import mongo_client_from_config, reviews_collection, sample_reviews_polar, sample_reviews_varied from .database import mongo_client_from_config, reviews_collection, sample_reviews_polar, sample_reviews_varied
from .analysis.nltk_sentiment import NLTKSentimentAnalyzer from .analysis import NLTKSentimentAnalyzer, TensorflowCategorySentimentAnalyzer, TensorflowPolarSentimentAnalyzer
from .analysis.tf_text import TensorflowCategorySentimentAnalyzer
from .analysis.base import TrainingFailedError from .analysis.base import TrainingFailedError
from .tokenizer import PlainTokenizer, LowercaseTokenizer, NLTKWordTokenizer, PottsTokenizer, PottsTokenizerWithNegation from .tokenizer import PlainTokenizer, LowercaseTokenizer, NLTKWordTokenizer, PottsTokenizer, PottsTokenizerWithNegation
from .gathering import Caches from .gathering import Caches
@ -39,6 +38,7 @@ def main():
slog.debug("Selected sample_func: %s", sample_func.__name__) slog.debug("Selected sample_func: %s", sample_func.__name__)
for SentimentAnalyzer in [ for SentimentAnalyzer in [
TensorflowPolarSentimentAnalyzer,
TensorflowCategorySentimentAnalyzer, TensorflowCategorySentimentAnalyzer,
NLTKSentimentAnalyzer, NLTKSentimentAnalyzer,
]: ]:

View file

@ -50,7 +50,7 @@ class BaseSentimentAnalyzer(metaclass=abc.ABCMeta):
for review in evaluation_dataset_func(): for review in evaluation_dataset_func():
resulting_category = self.use(review.text) resulting_category = self.use(review.text)
evaluated += 1 evaluated += 1
correct += 1 if resulting_category == review.category else 0 correct += 1 if round(resulting_category) == round(review.category) else 0
score += 1 - (abs(resulting_category - review.category) / 4) score += 1 - (abs(resulting_category - review.category) / 4)
return EvaluationResults(correct=correct, evaluated=evaluated, score=score) return EvaluationResults(correct=correct, evaluated=evaluated, score=score)

View file

@ -164,7 +164,7 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer, metaclass=abc.ABCMeta):
vector = self.text_vectorization_layer(text) vector = self.text_vectorization_layer(text)
prediction = self.model.predict(vector, verbose=False) prediction = self.model.predict(vector, verbose=False)
return prediction return self._translate_prediction(prediction)
class TensorflowCategorySentimentAnalyzer(TensorflowSentimentAnalyzer): class TensorflowCategorySentimentAnalyzer(TensorflowSentimentAnalyzer):
@ -175,10 +175,10 @@ class TensorflowCategorySentimentAnalyzer(TensorflowSentimentAnalyzer):
def _build_dataset(self, dataset_func: CachedDatasetFunc) -> tensorflow.data.Dataset: def _build_dataset(self, dataset_func: CachedDatasetFunc) -> tensorflow.data.Dataset:
return build_dataset( return build_dataset(
dataset_func=dataset_func, dataset_func=dataset_func,
conversion_func=Review.to_tensor_tuple, conversion_func=Review.to_tensor_tuple_category,
output_signature=( output_signature=(
tensorflow.TensorSpec(shape=(), dtype=tensorflow.string, name="text"), tensorflow.TensorSpec(shape=(), dtype=tensorflow.string, name="text"),
tensorflow.TensorSpec(shape=(1, 5,), dtype=tensorflow.float32, name="review_one_hot"), tensorflow.TensorSpec(shape=(1, 5,), dtype=tensorflow.float32, name="category_one_hot"),
), ),
) )
@ -218,7 +218,53 @@ class TensorflowCategorySentimentAnalyzer(TensorflowSentimentAnalyzer):
return result return result
class TensorflowPolarSentimentAnalyzer(TensorflowSentimentAnalyzer):
"""
A `tensorflow`-based sentiment analyzer that uses the floating point value rating to get as close as possible to the correct category.
"""
def _build_dataset(self, dataset_func: CachedDatasetFunc) -> tensorflow.data.Dataset:
return build_dataset(
dataset_func=dataset_func,
conversion_func=Review.to_tensor_tuple_normvalue,
output_signature=(
tensorflow.TensorSpec(shape=(), dtype=tensorflow.string, name="text"),
tensorflow.TensorSpec(shape=(1,), dtype=tensorflow.float32, name="category"),
),
)
def _build_model(self) -> tensorflow.keras.Sequential:
log.debug("Creating sequential categorizer model...")
model = tensorflow.keras.Sequential([
tensorflow.keras.layers.Embedding(
input_dim=TENSORFLOW_MAX_FEATURES.__wrapped__ + 1,
output_dim=TENSORFLOW_EMBEDDING_SIZE.__wrapped__,
),
tensorflow.keras.layers.Dropout(0.25),
tensorflow.keras.layers.GlobalAveragePooling1D(),
tensorflow.keras.layers.Dropout(0.25),
tensorflow.keras.layers.Dense(1),
])
log.debug("Compiling model: %s", model)
model.compile(
optimizer=tensorflow.keras.optimizers.Adam(global_clipnorm=1.0),
loss=tensorflow.keras.losses.MeanSquaredError(),
metrics=[
tensorflow.keras.metrics.MeanAbsoluteError(),
tensorflow.keras.metrics.CosineSimilarity(),
]
)
log.debug("Compiled model: %s", model)
return model
def _translate_prediction(self, a: numpy.array) -> Category:
return a[0, 0]
__all__ = ( __all__ = (
"TensorflowSentimentAnalyzer", "TensorflowSentimentAnalyzer",
"TensorflowCategorySentimentAnalyzer", "TensorflowCategorySentimentAnalyzer",
"TensorflowPolarSentimentAnalyzer",
) )

View file

@ -40,6 +40,15 @@ class Review:
def to_tensor_text(self) -> tensorflow.Tensor: def to_tensor_text(self) -> tensorflow.Tensor:
return tensorflow.convert_to_tensor(self.text, dtype=tensorflow.string) return tensorflow.convert_to_tensor(self.text, dtype=tensorflow.string)
def to_tensor_normvalue(self) -> tensorflow.Tensor:
return tensorflow.convert_to_tensor([self.category / 5], dtype=tensorflow.float32)
def to_tensor_tuple_normvalue(self) -> tuple[tensorflow.Tensor, tensorflow.Tensor]:
return (
self.to_tensor_text(),
self.to_tensor_normvalue(),
)
def to_tensor_category(self) -> tensorflow.Tensor: def to_tensor_category(self) -> tensorflow.Tensor:
return tensorflow.convert_to_tensor([[ return tensorflow.convert_to_tensor([[
1.0 if self.category == 1.0 else 0.0, 1.0 if self.category == 1.0 else 0.0,
@ -49,13 +58,11 @@ class Review:
1.0 if self.category == 5.0 else 0.0, 1.0 if self.category == 5.0 else 0.0,
]], dtype=tensorflow.float32) ]], dtype=tensorflow.float32)
def to_tensor_tuple(self) -> tuple[tensorflow.Tensor, tensorflow.Tensor]: def to_tensor_tuple_category(self) -> tuple[tensorflow.Tensor, tensorflow.Tensor]:
t = ( return (
self.to_tensor_text(), self.to_tensor_text(),
self.to_tensor_category(), self.to_tensor_category(),
) )
log.debug("Converted %s", t)
return t
__all__ = ( __all__ = (