2023-02-01 03:20:09 +00:00
|
|
|
import cfig
|
|
|
|
|
|
|
|
config = cfig.Configuration()
|
|
|
|
|
|
|
|
|
2023-02-01 15:03:10 +00:00
|
|
|
@config.optional()
|
|
|
|
def MONGO_HOST(val: str | None) -> str:
|
2023-02-01 03:20:09 +00:00
|
|
|
"""
|
|
|
|
The hostname of the MongoDB database to connect to.
|
2023-02-02 01:56:37 +00:00
|
|
|
Defaults to `"127.0.0.1"`.
|
2023-02-01 03:20:09 +00:00
|
|
|
"""
|
2023-02-01 15:03:10 +00:00
|
|
|
return val or "127.0.0.1"
|
2023-02-01 03:20:09 +00:00
|
|
|
|
|
|
|
|
2023-02-01 15:03:10 +00:00
|
|
|
@config.optional()
|
|
|
|
def MONGO_PORT(val: str | None) -> int:
|
2023-02-01 03:20:09 +00:00
|
|
|
"""
|
|
|
|
The port of the MongoDB database to connect to.
|
2023-02-02 01:56:37 +00:00
|
|
|
Defaults to `27017`.
|
2023-02-01 03:20:09 +00:00
|
|
|
"""
|
2023-02-01 16:46:25 +00:00
|
|
|
if val is None:
|
2023-02-01 15:03:10 +00:00
|
|
|
return 27017
|
|
|
|
try:
|
|
|
|
return int(val)
|
|
|
|
except ValueError:
|
|
|
|
raise cfig.InvalidValueError("Not an int.")
|
2023-02-01 03:20:09 +00:00
|
|
|
|
|
|
|
|
2023-02-02 03:07:17 +00:00
|
|
|
@config.optional()
|
|
|
|
def WORKING_SET_SIZE(val: str | None) -> int:
|
|
|
|
"""
|
|
|
|
The number of reviews to consider from the database.
|
|
|
|
Set this to a low number to prevent slowness due to the dataset's huge size.
|
2023-02-13 14:40:22 +00:00
|
|
|
Defaults to `1000000`.
|
2023-02-02 03:07:17 +00:00
|
|
|
"""
|
|
|
|
if val is None:
|
2023-02-13 14:40:22 +00:00
|
|
|
return 1000000
|
2023-02-02 03:07:17 +00:00
|
|
|
try:
|
|
|
|
return int(val)
|
|
|
|
except ValueError:
|
|
|
|
raise cfig.InvalidValueError("Not an int.")
|
|
|
|
|
|
|
|
|
2023-02-01 15:03:10 +00:00
|
|
|
@config.optional()
|
2023-02-08 18:46:05 +00:00
|
|
|
def TRAINING_SET_SIZE(val: str | None) -> int:
|
2023-02-01 03:20:09 +00:00
|
|
|
"""
|
2023-02-08 18:46:05 +00:00
|
|
|
The number of reviews from each category to fetch for the training dataset.
|
2023-02-13 14:40:22 +00:00
|
|
|
Defaults to `4000`.
|
2023-02-01 16:46:25 +00:00
|
|
|
"""
|
|
|
|
if val is None:
|
2023-02-13 14:40:22 +00:00
|
|
|
return 4000
|
2023-02-08 18:46:05 +00:00
|
|
|
try:
|
|
|
|
return int(val)
|
|
|
|
except ValueError:
|
|
|
|
raise cfig.InvalidValueError("Not an int.")
|
|
|
|
|
|
|
|
|
|
|
|
@config.optional()
|
|
|
|
def VALIDATION_SET_SIZE(val: str | None) -> int:
|
|
|
|
"""
|
|
|
|
The number of reviews from each category to fetch for the training dataset.
|
|
|
|
Defaults to `400`.
|
|
|
|
"""
|
|
|
|
if val is None:
|
|
|
|
return 400
|
|
|
|
try:
|
|
|
|
return int(val)
|
|
|
|
except ValueError:
|
|
|
|
raise cfig.InvalidValueError("Not an int.")
|
|
|
|
|
|
|
|
|
|
|
|
@config.optional()
|
|
|
|
def EVALUATION_SET_SIZE(val: str | None) -> int:
|
|
|
|
"""
|
|
|
|
The number of reviews from each category to fetch for the evaluation dataset.
|
|
|
|
Defaults to `1000`.
|
|
|
|
"""
|
|
|
|
if val is None:
|
|
|
|
return 1000
|
2023-02-08 09:54:14 +00:00
|
|
|
try:
|
|
|
|
return int(val)
|
|
|
|
except ValueError:
|
|
|
|
raise cfig.InvalidValueError("Not an int.")
|
|
|
|
|
|
|
|
|
|
|
|
@config.optional()
|
|
|
|
def TENSORFLOW_MAX_FEATURES(val: str | None) -> int:
|
|
|
|
"""
|
|
|
|
The maximum number of features to use in Tensorflow models.
|
2023-02-13 14:40:22 +00:00
|
|
|
Defaults to `300000`.
|
2023-02-08 09:54:14 +00:00
|
|
|
"""
|
|
|
|
if val is None:
|
2023-02-13 14:40:22 +00:00
|
|
|
return 300000
|
2023-02-08 09:54:14 +00:00
|
|
|
try:
|
|
|
|
return int(val)
|
|
|
|
except ValueError:
|
|
|
|
raise cfig.InvalidValueError("Not an int.")
|
|
|
|
|
|
|
|
|
|
|
|
@config.optional()
|
|
|
|
def TENSORFLOW_EMBEDDING_SIZE(val: str | None) -> int:
|
|
|
|
"""
|
|
|
|
The size of the embeddings tensor to use in Tensorflow models.
|
2023-02-11 03:32:17 +00:00
|
|
|
Defaults to `12`.
|
2023-02-08 09:54:14 +00:00
|
|
|
"""
|
|
|
|
if val is None:
|
2023-02-11 03:32:17 +00:00
|
|
|
return 12
|
2023-02-08 09:54:14 +00:00
|
|
|
try:
|
|
|
|
return int(val)
|
|
|
|
except ValueError:
|
|
|
|
raise cfig.InvalidValueError("Not an int.")
|
|
|
|
|
|
|
|
|
|
|
|
@config.optional()
|
|
|
|
def TENSORFLOW_EPOCHS(val: str | None) -> int:
|
|
|
|
"""
|
|
|
|
The number of epochs to train Tensorflow models for.
|
2023-02-11 04:57:14 +00:00
|
|
|
Defaults to `3`.
|
2023-02-08 09:54:14 +00:00
|
|
|
"""
|
|
|
|
if val is None:
|
2023-02-11 04:57:14 +00:00
|
|
|
return 3
|
2023-02-01 03:20:09 +00:00
|
|
|
try:
|
|
|
|
return int(val)
|
|
|
|
except ValueError:
|
|
|
|
raise cfig.InvalidValueError("Not an int.")
|
|
|
|
|
|
|
|
|
|
|
|
__all__ = (
|
|
|
|
"config",
|
|
|
|
"MONGO_HOST",
|
|
|
|
"MONGO_PORT",
|
2023-02-02 03:07:17 +00:00
|
|
|
"WORKING_SET_SIZE",
|
2023-02-08 18:46:05 +00:00
|
|
|
"TRAINING_SET_SIZE",
|
|
|
|
"VALIDATION_SET_SIZE",
|
|
|
|
"EVALUATION_SET_SIZE",
|
2023-02-08 09:54:14 +00:00
|
|
|
"TENSORFLOW_MAX_FEATURES",
|
|
|
|
"TENSORFLOW_EMBEDDING_SIZE",
|
|
|
|
"TENSORFLOW_EPOCHS",
|
2023-02-01 03:20:09 +00:00
|
|
|
)
|
2023-02-02 01:56:37 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
config.cli()
|