mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-22 07:54:19 +00:00
Final improvements
This commit is contained in:
parent
4e8aa68db3
commit
62e8c6fa9f
3 changed files with 12 additions and 19 deletions
|
@ -4,16 +4,9 @@
|
||||||
<option name="INTERPRETER_OPTIONS" value="" />
|
<option name="INTERPRETER_OPTIONS" value="" />
|
||||||
<option name="PARENT_ENVS" value="true" />
|
<option name="PARENT_ENVS" value="true" />
|
||||||
<envs>
|
<envs>
|
||||||
<env name="CONFIRM_OVERWRITE" value="False" />
|
|
||||||
<env name="EVALUATION_SET_SIZE" value="4000" />
|
|
||||||
<env name="NLTK_DATA" value="./data/nltk" />
|
<env name="NLTK_DATA" value="./data/nltk" />
|
||||||
<env name="PYTHONUNBUFFERED" value="1" />
|
<env name="PYTHONUNBUFFERED" value="1" />
|
||||||
<env name="TENSORFLOW_EMBEDDING_SIZE" value="64" />
|
|
||||||
<env name="TENSORFLOW_MAX_FEATURES" value="1000000" />
|
|
||||||
<env name="TF_CPP_MIN_LOG_LEVEL" value="2" />
|
<env name="TF_CPP_MIN_LOG_LEVEL" value="2" />
|
||||||
<env name="TRAINING_SET_SIZE" value="4000" />
|
|
||||||
<env name="VALIDATION_SET_SIZE" value="100" />
|
|
||||||
<env name="WORKING_SET_SIZE" value="100000" />
|
|
||||||
<env name="XLA_FLAGS" value="--xla_gpu_cuda_data_dir=/opt/cuda" />
|
<env name="XLA_FLAGS" value="--xla_gpu_cuda_data_dir=/opt/cuda" />
|
||||||
</envs>
|
</envs>
|
||||||
<option name="SDK_HOME" value="$PROJECT_DIR$/.venv/bin/python" />
|
<option name="SDK_HOME" value="$PROJECT_DIR$/.venv/bin/python" />
|
||||||
|
@ -26,7 +19,7 @@
|
||||||
<option name="SCRIPT_NAME" value="unimore_bda_6" />
|
<option name="SCRIPT_NAME" value="unimore_bda_6" />
|
||||||
<option name="PARAMETERS" value="" />
|
<option name="PARAMETERS" value="" />
|
||||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||||
<option name="EMULATE_TERMINAL" value="false" />
|
<option name="EMULATE_TERMINAL" value="true" />
|
||||||
<option name="MODULE_MODE" value="true" />
|
<option name="MODULE_MODE" value="true" />
|
||||||
<option name="REDIRECT_INPUT" value="false" />
|
<option name="REDIRECT_INPUT" value="false" />
|
||||||
<option name="INPUT_FILE" value="" />
|
<option name="INPUT_FILE" value="" />
|
||||||
|
|
|
@ -41,22 +41,22 @@ def main():
|
||||||
slog.debug("Selected sample_func: %s", sample_func.__name__)
|
slog.debug("Selected sample_func: %s", sample_func.__name__)
|
||||||
|
|
||||||
for SentimentAnalyzer in [
|
for SentimentAnalyzer in [
|
||||||
# ThreeCheat,
|
ThreeCheat,
|
||||||
|
NLTKSentimentAnalyzer,
|
||||||
TensorflowPolarSentimentAnalyzer,
|
TensorflowPolarSentimentAnalyzer,
|
||||||
TensorflowCategorySentimentAnalyzer,
|
TensorflowCategorySentimentAnalyzer,
|
||||||
NLTKSentimentAnalyzer,
|
|
||||||
]:
|
]:
|
||||||
|
|
||||||
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}")
|
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}")
|
||||||
slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__)
|
slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__)
|
||||||
|
|
||||||
for Tokenizer in [
|
for Tokenizer in [
|
||||||
PottsTokenizer,
|
|
||||||
PlainTokenizer,
|
PlainTokenizer,
|
||||||
HuggingBertTokenizer,
|
|
||||||
PottsTokenizerWithNegation,
|
|
||||||
LowercaseTokenizer,
|
LowercaseTokenizer,
|
||||||
NLTKWordTokenizer,
|
NLTKWordTokenizer,
|
||||||
|
PottsTokenizer,
|
||||||
|
PottsTokenizerWithNegation,
|
||||||
|
HuggingBertTokenizer,
|
||||||
]:
|
]:
|
||||||
|
|
||||||
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
|
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
|
||||||
|
|
|
@ -34,10 +34,10 @@ def WORKING_SET_SIZE(val: str | None) -> int:
|
||||||
The number of reviews to consider from the database.
|
The number of reviews to consider from the database.
|
||||||
Set this to a low number to prevent slowness due to the dataset's huge size.
|
Set this to a low number to prevent slowness due to the dataset's huge size.
|
||||||
|
|
||||||
Defaults to `10000`.
|
Defaults to `1000000`.
|
||||||
"""
|
"""
|
||||||
if val is None:
|
if val is None:
|
||||||
return 10000
|
return 1000000
|
||||||
try:
|
try:
|
||||||
return int(val)
|
return int(val)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
@ -49,10 +49,10 @@ def TRAINING_SET_SIZE(val: str | None) -> int:
|
||||||
"""
|
"""
|
||||||
The number of reviews from each category to fetch for the training dataset.
|
The number of reviews from each category to fetch for the training dataset.
|
||||||
|
|
||||||
Defaults to `5000`.
|
Defaults to `4000`.
|
||||||
"""
|
"""
|
||||||
if val is None:
|
if val is None:
|
||||||
return 5000
|
return 4000
|
||||||
try:
|
try:
|
||||||
return int(val)
|
return int(val)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
@ -94,10 +94,10 @@ def TENSORFLOW_MAX_FEATURES(val: str | None) -> int:
|
||||||
"""
|
"""
|
||||||
The maximum number of features to use in Tensorflow models.
|
The maximum number of features to use in Tensorflow models.
|
||||||
|
|
||||||
Defaults to `30000`.
|
Defaults to `300000`.
|
||||||
"""
|
"""
|
||||||
if val is None:
|
if val is None:
|
||||||
return 30000
|
return 300000
|
||||||
try:
|
try:
|
||||||
return int(val)
|
return int(val)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
|
Loading…
Reference in a new issue