mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-21 23:44:19 +00:00
Final improvements
This commit is contained in:
parent
4e8aa68db3
commit
62e8c6fa9f
3 changed files with 12 additions and 19 deletions
|
@ -4,16 +4,9 @@
|
|||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="CONFIRM_OVERWRITE" value="False" />
|
||||
<env name="EVALUATION_SET_SIZE" value="4000" />
|
||||
<env name="NLTK_DATA" value="./data/nltk" />
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
<env name="TENSORFLOW_EMBEDDING_SIZE" value="64" />
|
||||
<env name="TENSORFLOW_MAX_FEATURES" value="1000000" />
|
||||
<env name="TF_CPP_MIN_LOG_LEVEL" value="2" />
|
||||
<env name="TRAINING_SET_SIZE" value="4000" />
|
||||
<env name="VALIDATION_SET_SIZE" value="100" />
|
||||
<env name="WORKING_SET_SIZE" value="100000" />
|
||||
<env name="XLA_FLAGS" value="--xla_gpu_cuda_data_dir=/opt/cuda" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="$PROJECT_DIR$/.venv/bin/python" />
|
||||
|
@ -26,7 +19,7 @@
|
|||
<option name="SCRIPT_NAME" value="unimore_bda_6" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="true" />
|
||||
<option name="MODULE_MODE" value="true" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
|
|
|
@ -41,22 +41,22 @@ def main():
|
|||
slog.debug("Selected sample_func: %s", sample_func.__name__)
|
||||
|
||||
for SentimentAnalyzer in [
|
||||
# ThreeCheat,
|
||||
ThreeCheat,
|
||||
NLTKSentimentAnalyzer,
|
||||
TensorflowPolarSentimentAnalyzer,
|
||||
TensorflowCategorySentimentAnalyzer,
|
||||
NLTKSentimentAnalyzer,
|
||||
]:
|
||||
|
||||
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}")
|
||||
slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__)
|
||||
|
||||
for Tokenizer in [
|
||||
PottsTokenizer,
|
||||
PlainTokenizer,
|
||||
HuggingBertTokenizer,
|
||||
PottsTokenizerWithNegation,
|
||||
LowercaseTokenizer,
|
||||
NLTKWordTokenizer,
|
||||
PottsTokenizer,
|
||||
PottsTokenizerWithNegation,
|
||||
HuggingBertTokenizer,
|
||||
]:
|
||||
|
||||
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
|
||||
|
|
|
@ -34,10 +34,10 @@ def WORKING_SET_SIZE(val: str | None) -> int:
|
|||
The number of reviews to consider from the database.
|
||||
Set this to a low number to prevent slowness due to the dataset's huge size.
|
||||
|
||||
Defaults to `10000`.
|
||||
Defaults to `1000000`.
|
||||
"""
|
||||
if val is None:
|
||||
return 10000
|
||||
return 1000000
|
||||
try:
|
||||
return int(val)
|
||||
except ValueError:
|
||||
|
@ -49,10 +49,10 @@ def TRAINING_SET_SIZE(val: str | None) -> int:
|
|||
"""
|
||||
The number of reviews from each category to fetch for the training dataset.
|
||||
|
||||
Defaults to `5000`.
|
||||
Defaults to `4000`.
|
||||
"""
|
||||
if val is None:
|
||||
return 5000
|
||||
return 4000
|
||||
try:
|
||||
return int(val)
|
||||
except ValueError:
|
||||
|
@ -94,10 +94,10 @@ def TENSORFLOW_MAX_FEATURES(val: str | None) -> int:
|
|||
"""
|
||||
The maximum number of features to use in Tensorflow models.
|
||||
|
||||
Defaults to `30000`.
|
||||
Defaults to `300000`.
|
||||
"""
|
||||
if val is None:
|
||||
return 30000
|
||||
return 300000
|
||||
try:
|
||||
return int(val)
|
||||
except ValueError:
|
||||
|
|
Loading…
Reference in a new issue