1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-21 23:44:19 +00:00

Final improvements

This commit is contained in:
Steffo 2023-02-13 15:40:22 +01:00
parent 4e8aa68db3
commit 62e8c6fa9f
Signed by: steffo
GPG key ID: 2A24051445686895
3 changed files with 12 additions and 19 deletions

View file

@ -4,16 +4,9 @@
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="CONFIRM_OVERWRITE" value="False" />
<env name="EVALUATION_SET_SIZE" value="4000" />
<env name="NLTK_DATA" value="./data/nltk" />
<env name="PYTHONUNBUFFERED" value="1" />
<env name="TENSORFLOW_EMBEDDING_SIZE" value="64" />
<env name="TENSORFLOW_MAX_FEATURES" value="1000000" />
<env name="TF_CPP_MIN_LOG_LEVEL" value="2" />
<env name="TRAINING_SET_SIZE" value="4000" />
<env name="VALIDATION_SET_SIZE" value="100" />
<env name="WORKING_SET_SIZE" value="100000" />
<env name="XLA_FLAGS" value="--xla_gpu_cuda_data_dir=/opt/cuda" />
</envs>
<option name="SDK_HOME" value="$PROJECT_DIR$/.venv/bin/python" />
@ -26,7 +19,7 @@
<option name="SCRIPT_NAME" value="unimore_bda_6" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="EMULATE_TERMINAL" value="true" />
<option name="MODULE_MODE" value="true" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />

View file

@ -41,22 +41,22 @@ def main():
slog.debug("Selected sample_func: %s", sample_func.__name__)
for SentimentAnalyzer in [
# ThreeCheat,
ThreeCheat,
NLTKSentimentAnalyzer,
TensorflowPolarSentimentAnalyzer,
TensorflowCategorySentimentAnalyzer,
NLTKSentimentAnalyzer,
]:
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}")
slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__)
for Tokenizer in [
PottsTokenizer,
PlainTokenizer,
HuggingBertTokenizer,
PottsTokenizerWithNegation,
LowercaseTokenizer,
NLTKWordTokenizer,
PottsTokenizer,
PottsTokenizerWithNegation,
HuggingBertTokenizer,
]:
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")

View file

@ -34,10 +34,10 @@ def WORKING_SET_SIZE(val: str | None) -> int:
The number of reviews to consider from the database.
Set this to a low number to prevent slowness due to the dataset's huge size.
Defaults to `10000`.
Defaults to `1000000`.
"""
if val is None:
return 10000
return 1000000
try:
return int(val)
except ValueError:
@ -49,10 +49,10 @@ def TRAINING_SET_SIZE(val: str | None) -> int:
"""
The number of reviews from each category to fetch for the training dataset.
Defaults to `5000`.
Defaults to `4000`.
"""
if val is None:
return 5000
return 4000
try:
return int(val)
except ValueError:
@ -94,10 +94,10 @@ def TENSORFLOW_MAX_FEATURES(val: str | None) -> int:
"""
The maximum number of features to use in Tensorflow models.
Defaults to `30000`.
Defaults to `300000`.
"""
if val is None:
return 30000
return 300000
try:
return int(val)
except ValueError: