mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-22 07:54:19 +00:00
Add polar model
This commit is contained in:
parent
4f40aa44b4
commit
c979699ff1
5 changed files with 68 additions and 11 deletions
|
@ -4,10 +4,14 @@
|
|||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
<env name="CONFIRM_OVERWRITE" value="False" />
|
||||
<env name="NLTK_DATA" value="./data/nltk" />
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
<env name="TENSORFLOW_EPOCHS" value="4" />
|
||||
<env name="EVALUATION_SET_SIZE" value="100" />
|
||||
<env name="TF_CPP_MIN_LOG_LEVEL" value="2" />
|
||||
<env name="TRAINING_SET_SIZE" value="1000" />
|
||||
<env name="VALIDATION_SET_SIZE" value="100" />
|
||||
<env name="WORKING_SET_SIZE" value="1000000" />
|
||||
<env name="XLA_FLAGS" value="--xla_gpu_cuda_data_dir=/opt/cuda" />
|
||||
</envs>
|
||||
|
|
|
@ -6,8 +6,7 @@ install_general_log_handlers()
|
|||
|
||||
from .config import config
|
||||
from .database import mongo_client_from_config, reviews_collection, sample_reviews_polar, sample_reviews_varied
|
||||
from .analysis.nltk_sentiment import NLTKSentimentAnalyzer
|
||||
from .analysis.tf_text import TensorflowCategorySentimentAnalyzer
|
||||
from .analysis import NLTKSentimentAnalyzer, TensorflowCategorySentimentAnalyzer, TensorflowPolarSentimentAnalyzer
|
||||
from .analysis.base import TrainingFailedError
|
||||
from .tokenizer import PlainTokenizer, LowercaseTokenizer, NLTKWordTokenizer, PottsTokenizer, PottsTokenizerWithNegation
|
||||
from .gathering import Caches
|
||||
|
@ -39,6 +38,7 @@ def main():
|
|||
slog.debug("Selected sample_func: %s", sample_func.__name__)
|
||||
|
||||
for SentimentAnalyzer in [
|
||||
TensorflowPolarSentimentAnalyzer,
|
||||
TensorflowCategorySentimentAnalyzer,
|
||||
NLTKSentimentAnalyzer,
|
||||
]:
|
||||
|
|
|
@ -50,7 +50,7 @@ class BaseSentimentAnalyzer(metaclass=abc.ABCMeta):
|
|||
for review in evaluation_dataset_func():
|
||||
resulting_category = self.use(review.text)
|
||||
evaluated += 1
|
||||
correct += 1 if resulting_category == review.category else 0
|
||||
correct += 1 if round(resulting_category) == round(review.category) else 0
|
||||
score += 1 - (abs(resulting_category - review.category) / 4)
|
||||
|
||||
return EvaluationResults(correct=correct, evaluated=evaluated, score=score)
|
||||
|
|
|
@ -164,7 +164,7 @@ class TensorflowSentimentAnalyzer(BaseSentimentAnalyzer, metaclass=abc.ABCMeta):
|
|||
vector = self.text_vectorization_layer(text)
|
||||
prediction = self.model.predict(vector, verbose=False)
|
||||
|
||||
return prediction
|
||||
return self._translate_prediction(prediction)
|
||||
|
||||
|
||||
class TensorflowCategorySentimentAnalyzer(TensorflowSentimentAnalyzer):
|
||||
|
@ -175,10 +175,10 @@ class TensorflowCategorySentimentAnalyzer(TensorflowSentimentAnalyzer):
|
|||
def _build_dataset(self, dataset_func: CachedDatasetFunc) -> tensorflow.data.Dataset:
|
||||
return build_dataset(
|
||||
dataset_func=dataset_func,
|
||||
conversion_func=Review.to_tensor_tuple,
|
||||
conversion_func=Review.to_tensor_tuple_category,
|
||||
output_signature=(
|
||||
tensorflow.TensorSpec(shape=(), dtype=tensorflow.string, name="text"),
|
||||
tensorflow.TensorSpec(shape=(1, 5,), dtype=tensorflow.float32, name="review_one_hot"),
|
||||
tensorflow.TensorSpec(shape=(1, 5,), dtype=tensorflow.float32, name="category_one_hot"),
|
||||
),
|
||||
)
|
||||
|
||||
|
@ -218,7 +218,53 @@ class TensorflowCategorySentimentAnalyzer(TensorflowSentimentAnalyzer):
|
|||
return result
|
||||
|
||||
|
||||
class TensorflowPolarSentimentAnalyzer(TensorflowSentimentAnalyzer):
|
||||
"""
|
||||
A `tensorflow`-based sentiment analyzer that uses the floating point value rating to get as close as possible to the correct category.
|
||||
"""
|
||||
|
||||
def _build_dataset(self, dataset_func: CachedDatasetFunc) -> tensorflow.data.Dataset:
|
||||
return build_dataset(
|
||||
dataset_func=dataset_func,
|
||||
conversion_func=Review.to_tensor_tuple_normvalue,
|
||||
output_signature=(
|
||||
tensorflow.TensorSpec(shape=(), dtype=tensorflow.string, name="text"),
|
||||
tensorflow.TensorSpec(shape=(1,), dtype=tensorflow.float32, name="category"),
|
||||
),
|
||||
)
|
||||
|
||||
def _build_model(self) -> tensorflow.keras.Sequential:
|
||||
log.debug("Creating sequential categorizer model...")
|
||||
model = tensorflow.keras.Sequential([
|
||||
tensorflow.keras.layers.Embedding(
|
||||
input_dim=TENSORFLOW_MAX_FEATURES.__wrapped__ + 1,
|
||||
output_dim=TENSORFLOW_EMBEDDING_SIZE.__wrapped__,
|
||||
),
|
||||
tensorflow.keras.layers.Dropout(0.25),
|
||||
tensorflow.keras.layers.GlobalAveragePooling1D(),
|
||||
tensorflow.keras.layers.Dropout(0.25),
|
||||
tensorflow.keras.layers.Dense(1),
|
||||
])
|
||||
|
||||
log.debug("Compiling model: %s", model)
|
||||
model.compile(
|
||||
optimizer=tensorflow.keras.optimizers.Adam(global_clipnorm=1.0),
|
||||
loss=tensorflow.keras.losses.MeanSquaredError(),
|
||||
metrics=[
|
||||
tensorflow.keras.metrics.MeanAbsoluteError(),
|
||||
tensorflow.keras.metrics.CosineSimilarity(),
|
||||
]
|
||||
)
|
||||
|
||||
log.debug("Compiled model: %s", model)
|
||||
return model
|
||||
|
||||
def _translate_prediction(self, a: numpy.array) -> Category:
|
||||
return a[0, 0]
|
||||
|
||||
|
||||
__all__ = (
|
||||
"TensorflowSentimentAnalyzer",
|
||||
"TensorflowCategorySentimentAnalyzer",
|
||||
"TensorflowPolarSentimentAnalyzer",
|
||||
)
|
||||
|
|
|
@ -40,6 +40,15 @@ class Review:
|
|||
def to_tensor_text(self) -> tensorflow.Tensor:
|
||||
return tensorflow.convert_to_tensor(self.text, dtype=tensorflow.string)
|
||||
|
||||
def to_tensor_normvalue(self) -> tensorflow.Tensor:
|
||||
return tensorflow.convert_to_tensor([self.category / 5], dtype=tensorflow.float32)
|
||||
|
||||
def to_tensor_tuple_normvalue(self) -> tuple[tensorflow.Tensor, tensorflow.Tensor]:
|
||||
return (
|
||||
self.to_tensor_text(),
|
||||
self.to_tensor_normvalue(),
|
||||
)
|
||||
|
||||
def to_tensor_category(self) -> tensorflow.Tensor:
|
||||
return tensorflow.convert_to_tensor([[
|
||||
1.0 if self.category == 1.0 else 0.0,
|
||||
|
@ -49,13 +58,11 @@ class Review:
|
|||
1.0 if self.category == 5.0 else 0.0,
|
||||
]], dtype=tensorflow.float32)
|
||||
|
||||
def to_tensor_tuple(self) -> tuple[tensorflow.Tensor, tensorflow.Tensor]:
|
||||
t = (
|
||||
def to_tensor_tuple_category(self) -> tuple[tensorflow.Tensor, tensorflow.Tensor]:
|
||||
return (
|
||||
self.to_tensor_text(),
|
||||
self.to_tensor_category(),
|
||||
)
|
||||
log.debug("Converted %s", t)
|
||||
return t
|
||||
|
||||
|
||||
__all__ = (
|
||||
|
|
Loading…
Reference in a new issue