2023-02-01 03:20:09 +00:00
|
|
|
import cfig
|
|
|
|
|
|
|
|
config = cfig.Configuration()
|
|
|
|
|
|
|
|
|
2023-02-01 15:03:10 +00:00
|
|
|
@config.optional()
|
|
|
|
def MONGO_HOST(val: str | None) -> str:
|
2023-02-01 03:20:09 +00:00
|
|
|
"""
|
|
|
|
The hostname of the MongoDB database to connect to.
|
|
|
|
"""
|
2023-02-01 15:03:10 +00:00
|
|
|
return val or "127.0.0.1"
|
2023-02-01 03:20:09 +00:00
|
|
|
|
|
|
|
|
2023-02-01 15:03:10 +00:00
|
|
|
@config.optional()
|
|
|
|
def MONGO_PORT(val: str | None) -> int:
|
2023-02-01 03:20:09 +00:00
|
|
|
"""
|
|
|
|
The port of the MongoDB database to connect to.
|
|
|
|
"""
|
2023-02-01 16:46:25 +00:00
|
|
|
if val is None:
|
2023-02-01 15:03:10 +00:00
|
|
|
return 27017
|
|
|
|
try:
|
|
|
|
return int(val)
|
|
|
|
except ValueError:
|
|
|
|
raise cfig.InvalidValueError("Not an int.")
|
2023-02-01 03:20:09 +00:00
|
|
|
|
|
|
|
|
2023-02-01 16:46:25 +00:00
|
|
|
@config.optional()
|
|
|
|
def SAMPLE_MODE(val: str | None) -> str:
|
|
|
|
"""
|
|
|
|
Whether `$sample` or `$limit` should be used to aggregate the training and test sets.
|
|
|
|
`$limit` is much faster, but not truly random, while `$sample` is completely random.
|
|
|
|
"""
|
|
|
|
if val is None:
|
|
|
|
return "$sample"
|
|
|
|
if val not in ["$sample", "$limit"]:
|
|
|
|
raise cfig.InvalidValueError("Neither $sample or $limit.")
|
|
|
|
return val
|
|
|
|
|
|
|
|
|
2023-02-01 15:03:10 +00:00
|
|
|
@config.optional()
|
|
|
|
def TRAINING_SET_SIZE(val: str | None) -> int:
|
2023-02-01 03:20:09 +00:00
|
|
|
"""
|
|
|
|
The number of reviews from each category to fetch for the training set.
|
|
|
|
"""
|
2023-02-01 16:46:25 +00:00
|
|
|
if val is None:
|
|
|
|
return 1000
|
|
|
|
try:
|
|
|
|
return int(val)
|
|
|
|
except ValueError:
|
|
|
|
raise cfig.InvalidValueError("Not an int.")
|
|
|
|
|
|
|
|
|
|
|
|
@config.optional()
|
|
|
|
def TEST_SET_SIZE(val: str | None) -> int:
|
|
|
|
"""
|
|
|
|
The number of reviews to fetch for the test set.
|
|
|
|
"""
|
|
|
|
if val is None:
|
2023-02-01 15:03:10 +00:00
|
|
|
return 1000
|
2023-02-01 03:20:09 +00:00
|
|
|
try:
|
|
|
|
return int(val)
|
|
|
|
except ValueError:
|
|
|
|
raise cfig.InvalidValueError("Not an int.")
|
|
|
|
|
|
|
|
|
|
|
|
__all__ = (
|
|
|
|
"config",
|
|
|
|
"MONGO_HOST",
|
|
|
|
"MONGO_PORT",
|
2023-02-01 16:46:25 +00:00
|
|
|
"SAMPLE_MODE",
|
|
|
|
"TRAINING_SET_SIZE",
|
|
|
|
"TEST_SET_SIZE",
|
2023-02-01 03:20:09 +00:00
|
|
|
)
|