1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-22 07:54:19 +00:00

Final improvements

This commit is contained in:
Steffo 2023-02-13 15:40:22 +01:00
parent 4e8aa68db3
commit 62e8c6fa9f
Signed by: steffo
GPG key ID: 2A24051445686895
3 changed files with 12 additions and 19 deletions

View file

@ -4,16 +4,9 @@
<option name="INTERPRETER_OPTIONS" value="" /> <option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" /> <option name="PARENT_ENVS" value="true" />
<envs> <envs>
<env name="CONFIRM_OVERWRITE" value="False" />
<env name="EVALUATION_SET_SIZE" value="4000" />
<env name="NLTK_DATA" value="./data/nltk" /> <env name="NLTK_DATA" value="./data/nltk" />
<env name="PYTHONUNBUFFERED" value="1" /> <env name="PYTHONUNBUFFERED" value="1" />
<env name="TENSORFLOW_EMBEDDING_SIZE" value="64" />
<env name="TENSORFLOW_MAX_FEATURES" value="1000000" />
<env name="TF_CPP_MIN_LOG_LEVEL" value="2" /> <env name="TF_CPP_MIN_LOG_LEVEL" value="2" />
<env name="TRAINING_SET_SIZE" value="4000" />
<env name="VALIDATION_SET_SIZE" value="100" />
<env name="WORKING_SET_SIZE" value="100000" />
<env name="XLA_FLAGS" value="--xla_gpu_cuda_data_dir=/opt/cuda" /> <env name="XLA_FLAGS" value="--xla_gpu_cuda_data_dir=/opt/cuda" />
</envs> </envs>
<option name="SDK_HOME" value="$PROJECT_DIR$/.venv/bin/python" /> <option name="SDK_HOME" value="$PROJECT_DIR$/.venv/bin/python" />
@ -26,7 +19,7 @@
<option name="SCRIPT_NAME" value="unimore_bda_6" /> <option name="SCRIPT_NAME" value="unimore_bda_6" />
<option name="PARAMETERS" value="" /> <option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" /> <option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" /> <option name="EMULATE_TERMINAL" value="true" />
<option name="MODULE_MODE" value="true" /> <option name="MODULE_MODE" value="true" />
<option name="REDIRECT_INPUT" value="false" /> <option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" /> <option name="INPUT_FILE" value="" />

View file

@ -41,22 +41,22 @@ def main():
slog.debug("Selected sample_func: %s", sample_func.__name__) slog.debug("Selected sample_func: %s", sample_func.__name__)
for SentimentAnalyzer in [ for SentimentAnalyzer in [
# ThreeCheat, ThreeCheat,
NLTKSentimentAnalyzer,
TensorflowPolarSentimentAnalyzer, TensorflowPolarSentimentAnalyzer,
TensorflowCategorySentimentAnalyzer, TensorflowCategorySentimentAnalyzer,
NLTKSentimentAnalyzer,
]: ]:
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}") slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}")
slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__) slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__)
for Tokenizer in [ for Tokenizer in [
PottsTokenizer,
PlainTokenizer, PlainTokenizer,
HuggingBertTokenizer,
PottsTokenizerWithNegation,
LowercaseTokenizer, LowercaseTokenizer,
NLTKWordTokenizer, NLTKWordTokenizer,
PottsTokenizer,
PottsTokenizerWithNegation,
HuggingBertTokenizer,
]: ]:
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}") slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")

View file

@ -34,10 +34,10 @@ def WORKING_SET_SIZE(val: str | None) -> int:
The number of reviews to consider from the database. The number of reviews to consider from the database.
Set this to a low number to prevent slowness due to the dataset's huge size. Set this to a low number to prevent slowness due to the dataset's huge size.
Defaults to `10000`. Defaults to `1000000`.
""" """
if val is None: if val is None:
return 10000 return 1000000
try: try:
return int(val) return int(val)
except ValueError: except ValueError:
@ -49,10 +49,10 @@ def TRAINING_SET_SIZE(val: str | None) -> int:
""" """
The number of reviews from each category to fetch for the training dataset. The number of reviews from each category to fetch for the training dataset.
Defaults to `5000`. Defaults to `4000`.
""" """
if val is None: if val is None:
return 5000 return 4000
try: try:
return int(val) return int(val)
except ValueError: except ValueError:
@ -94,10 +94,10 @@ def TENSORFLOW_MAX_FEATURES(val: str | None) -> int:
""" """
The maximum number of features to use in Tensorflow models. The maximum number of features to use in Tensorflow models.
Defaults to `30000`. Defaults to `300000`.
""" """
if val is None: if val is None:
return 30000 return 300000
try: try:
return int(val) return int(val)
except ValueError: except ValueError: