1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-25 01:04:19 +00:00

Configure working set

This commit is contained in:
Steffo 2023-02-02 04:07:17 +01:00
parent 14d1e1a22f
commit ded20c33e1
Signed by: steffo
GPG key ID: 2A24051445686895
2 changed files with 21 additions and 4 deletions

View file

@ -28,6 +28,22 @@ def MONGO_PORT(val: str | None) -> int:
raise cfig.InvalidValueError("Not an int.") raise cfig.InvalidValueError("Not an int.")
@config.optional()
def WORKING_SET_SIZE(val: str | None) -> int:
"""
The number of reviews to consider from the database.
Set this to a low number to prevent slowness due to the dataset's huge size.
Defaults to `10000`.
"""
if val is None:
return 10000
try:
return int(val)
except ValueError:
raise cfig.InvalidValueError("Not an int.")
@config.optional() @config.optional()
def TRAINING_SET_SIZE(val: str | None) -> int: def TRAINING_SET_SIZE(val: str | None) -> int:
""" """
@ -62,6 +78,7 @@ __all__ = (
"config", "config",
"MONGO_HOST", "MONGO_HOST",
"MONGO_PORT", "MONGO_PORT",
"WORKING_SET_SIZE",
"TRAINING_SET_SIZE", "TRAINING_SET_SIZE",
"TEST_SET_SIZE", "TEST_SET_SIZE",
"NLTK_DOUBLE_NEG_SWITCH", "NLTK_DOUBLE_NEG_SWITCH",

View file

@ -5,7 +5,7 @@ import contextlib
import bson import bson
import logging import logging
from .config import MONGO_HOST, MONGO_PORT, TRAINING_SET_SIZE, TEST_SET_SIZE from .config import MONGO_HOST, MONGO_PORT, WORKING_SET_SIZE, TRAINING_SET_SIZE, TEST_SET_SIZE
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -61,7 +61,7 @@ def sample_reviews(reviews: pymongo.collection.Collection, amount: int) -> t.Ite
log.debug("Getting a sample of %d reviews...", amount) log.debug("Getting a sample of %d reviews...", amount)
return reviews.aggregate([ return reviews.aggregate([
{"$limit": 10000}, # TODO {"$limit": WORKING_SET_SIZE.__wrapped__},
{"$sample": {"size": amount}}, {"$sample": {"size": amount}},
]) ])
@ -73,7 +73,7 @@ def sample_reviews_by_rating(reviews: pymongo.collection.Collection, rating: flo
log.debug("Getting a sample of %d reviews with %d stars...", amount, rating) log.debug("Getting a sample of %d reviews with %d stars...", amount, rating)
return reviews.aggregate([ return reviews.aggregate([
{"$limit": 10000}, # TODO {"$limit": WORKING_SET_SIZE.__wrapped__},
{"$match": {"overall": rating}}, {"$match": {"overall": rating}},
{"$sample": {"size": amount}}, {"$sample": {"size": amount}},
]) ])
@ -86,7 +86,7 @@ def sample_reviews_by_rating_polar(reviews: pymongo.collection.Collection, amoun
log.debug("Getting a sample of %d reviews with either 5 or 1 stars...", amount) log.debug("Getting a sample of %d reviews with either 5 or 1 stars...", amount)
return reviews.aggregate([ return reviews.aggregate([
{"$limit": 10000}, # TODO {"$limit": WORKING_SET_SIZE.__wrapped__},
{"$match": {"$match":
{"$or": {"$or":
[ [