1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-25 17:24:20 +00:00
bda-6-steffo/unimore_bda_6/config.py

173 lines
3.8 KiB
Python
Raw Normal View History

2023-02-01 03:20:09 +00:00
import cfig
config = cfig.Configuration()
2023-02-01 15:03:10 +00:00
@config.optional()
def MONGO_HOST(val: str | None) -> str:
2023-02-01 03:20:09 +00:00
"""
The hostname of the MongoDB database to connect to.
2023-02-02 01:56:37 +00:00
Defaults to `"127.0.0.1"`.
2023-02-01 03:20:09 +00:00
"""
2023-02-01 15:03:10 +00:00
return val or "127.0.0.1"
2023-02-01 03:20:09 +00:00
2023-02-01 15:03:10 +00:00
@config.optional()
def MONGO_PORT(val: str | None) -> int:
2023-02-01 03:20:09 +00:00
"""
The port of the MongoDB database to connect to.
2023-02-02 01:56:37 +00:00
Defaults to `27017`.
2023-02-01 03:20:09 +00:00
"""
2023-02-01 16:46:25 +00:00
if val is None:
2023-02-01 15:03:10 +00:00
return 27017
try:
return int(val)
except ValueError:
raise cfig.InvalidValueError("Not an int.")
2023-02-01 03:20:09 +00:00
2023-02-02 03:07:17 +00:00
@config.optional()
def WORKING_SET_SIZE(val: str | None) -> int:
"""
The number of reviews to consider from the database.
Set this to a low number to prevent slowness due to the dataset's huge size.
2023-02-13 14:40:22 +00:00
Defaults to `1000000`.
2023-02-02 03:07:17 +00:00
"""
if val is None:
2023-02-13 14:40:22 +00:00
return 1000000
2023-02-02 03:07:17 +00:00
try:
return int(val)
except ValueError:
raise cfig.InvalidValueError("Not an int.")
2023-02-01 15:03:10 +00:00
@config.optional()
2023-02-08 18:46:05 +00:00
def TRAINING_SET_SIZE(val: str | None) -> int:
2023-02-01 03:20:09 +00:00
"""
2023-02-08 18:46:05 +00:00
The number of reviews from each category to fetch for the training dataset.
2023-02-13 14:40:22 +00:00
Defaults to `4000`.
2023-02-01 16:46:25 +00:00
"""
if val is None:
2023-02-13 14:40:22 +00:00
return 4000
2023-02-08 18:46:05 +00:00
try:
return int(val)
except ValueError:
raise cfig.InvalidValueError("Not an int.")
@config.optional()
def VALIDATION_SET_SIZE(val: str | None) -> int:
"""
The number of reviews from each category to fetch for the training dataset.
Defaults to `400`.
"""
if val is None:
return 400
try:
return int(val)
except ValueError:
raise cfig.InvalidValueError("Not an int.")
@config.optional()
def EVALUATION_SET_SIZE(val: str | None) -> int:
"""
The number of reviews from each category to fetch for the evaluation dataset.
Defaults to `1000`.
"""
if val is None:
return 1000
2023-02-08 09:54:14 +00:00
try:
return int(val)
except ValueError:
raise cfig.InvalidValueError("Not an int.")
@config.optional()
def TENSORFLOW_MAX_FEATURES(val: str | None) -> int:
"""
The maximum number of features to use in Tensorflow models.
2023-02-13 14:40:22 +00:00
Defaults to `300000`.
2023-02-08 09:54:14 +00:00
"""
if val is None:
2023-02-13 14:40:22 +00:00
return 300000
2023-02-08 09:54:14 +00:00
try:
return int(val)
except ValueError:
raise cfig.InvalidValueError("Not an int.")
@config.optional()
def TENSORFLOW_EMBEDDING_SIZE(val: str | None) -> int:
"""
The size of the embeddings tensor to use in Tensorflow models.
2023-02-11 03:32:17 +00:00
Defaults to `12`.
2023-02-08 09:54:14 +00:00
"""
if val is None:
2023-02-11 03:32:17 +00:00
return 12
2023-02-08 09:54:14 +00:00
try:
return int(val)
except ValueError:
raise cfig.InvalidValueError("Not an int.")
@config.optional()
def TENSORFLOW_EPOCHS(val: str | None) -> int:
"""
The number of epochs to train Tensorflow models for.
2023-02-11 04:57:14 +00:00
Defaults to `3`.
2023-02-08 09:54:14 +00:00
"""
if val is None:
2023-02-11 04:57:14 +00:00
return 3
2023-02-01 03:20:09 +00:00
try:
return int(val)
except ValueError:
raise cfig.InvalidValueError("Not an int.")
2023-02-14 01:25:38 +00:00
@config.optional()
def TARGET_RUNS(val: str | None) -> int:
"""
The amount of successful runs to perform on a sample-model-tokenizer combination.
Defaults to `1`.
"""
if val is None:
return 1
try:
return int(val)
except ValueError:
raise cfig.InvalidValueError("Not an int.")
@config.optional()
def MAXIMUM_RUNS(val: str | None) -> int:
"""
The maximum amount of runs to perform on a sample-model-tokenizer combination before skipping it.
Defaults to `25`.
"""
if val is None:
return 25
try:
return int(val)
except ValueError:
raise cfig.InvalidValueError("Not an int.")
2023-02-01 03:20:09 +00:00
__all__ = (
"config",
"MONGO_HOST",
"MONGO_PORT",
2023-02-02 03:07:17 +00:00
"WORKING_SET_SIZE",
2023-02-08 18:46:05 +00:00
"TRAINING_SET_SIZE",
"VALIDATION_SET_SIZE",
"EVALUATION_SET_SIZE",
2023-02-08 09:54:14 +00:00
"TENSORFLOW_MAX_FEATURES",
"TENSORFLOW_EMBEDDING_SIZE",
"TENSORFLOW_EPOCHS",
2023-02-01 03:20:09 +00:00
)
2023-02-02 01:56:37 +00:00
if __name__ == "__main__":
config.cli()