mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-24 16:54:20 +00:00
Configure working set
This commit is contained in:
parent
14d1e1a22f
commit
ded20c33e1
2 changed files with 21 additions and 4 deletions
|
@ -28,6 +28,22 @@ def MONGO_PORT(val: str | None) -> int:
|
|||
raise cfig.InvalidValueError("Not an int.")
|
||||
|
||||
|
||||
@config.optional()
|
||||
def WORKING_SET_SIZE(val: str | None) -> int:
|
||||
"""
|
||||
The number of reviews to consider from the database.
|
||||
Set this to a low number to prevent slowness due to the dataset's huge size.
|
||||
|
||||
Defaults to `10000`.
|
||||
"""
|
||||
if val is None:
|
||||
return 10000
|
||||
try:
|
||||
return int(val)
|
||||
except ValueError:
|
||||
raise cfig.InvalidValueError("Not an int.")
|
||||
|
||||
|
||||
@config.optional()
|
||||
def TRAINING_SET_SIZE(val: str | None) -> int:
|
||||
"""
|
||||
|
@ -62,6 +78,7 @@ __all__ = (
|
|||
"config",
|
||||
"MONGO_HOST",
|
||||
"MONGO_PORT",
|
||||
"WORKING_SET_SIZE",
|
||||
"TRAINING_SET_SIZE",
|
||||
"TEST_SET_SIZE",
|
||||
"NLTK_DOUBLE_NEG_SWITCH",
|
||||
|
|
|
@ -5,7 +5,7 @@ import contextlib
|
|||
import bson
|
||||
import logging
|
||||
|
||||
from .config import MONGO_HOST, MONGO_PORT, TRAINING_SET_SIZE, TEST_SET_SIZE
|
||||
from .config import MONGO_HOST, MONGO_PORT, WORKING_SET_SIZE, TRAINING_SET_SIZE, TEST_SET_SIZE
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
@ -61,7 +61,7 @@ def sample_reviews(reviews: pymongo.collection.Collection, amount: int) -> t.Ite
|
|||
log.debug("Getting a sample of %d reviews...", amount)
|
||||
|
||||
return reviews.aggregate([
|
||||
{"$limit": 10000}, # TODO
|
||||
{"$limit": WORKING_SET_SIZE.__wrapped__},
|
||||
{"$sample": {"size": amount}},
|
||||
])
|
||||
|
||||
|
@ -73,7 +73,7 @@ def sample_reviews_by_rating(reviews: pymongo.collection.Collection, rating: flo
|
|||
log.debug("Getting a sample of %d reviews with %d stars...", amount, rating)
|
||||
|
||||
return reviews.aggregate([
|
||||
{"$limit": 10000}, # TODO
|
||||
{"$limit": WORKING_SET_SIZE.__wrapped__},
|
||||
{"$match": {"overall": rating}},
|
||||
{"$sample": {"size": amount}},
|
||||
])
|
||||
|
@ -86,7 +86,7 @@ def sample_reviews_by_rating_polar(reviews: pymongo.collection.Collection, amoun
|
|||
log.debug("Getting a sample of %d reviews with either 5 or 1 stars...", amount)
|
||||
|
||||
return reviews.aggregate([
|
||||
{"$limit": 10000}, # TODO
|
||||
{"$limit": WORKING_SET_SIZE.__wrapped__},
|
||||
{"$match":
|
||||
{"$or":
|
||||
[
|
||||
|
|
Loading…
Reference in a new issue