mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-24 16:54:20 +00:00
Configure working set
This commit is contained in:
parent
14d1e1a22f
commit
ded20c33e1
2 changed files with 21 additions and 4 deletions
|
@ -28,6 +28,22 @@ def MONGO_PORT(val: str | None) -> int:
|
||||||
raise cfig.InvalidValueError("Not an int.")
|
raise cfig.InvalidValueError("Not an int.")
|
||||||
|
|
||||||
|
|
||||||
|
@config.optional()
|
||||||
|
def WORKING_SET_SIZE(val: str | None) -> int:
|
||||||
|
"""
|
||||||
|
The number of reviews to consider from the database.
|
||||||
|
Set this to a low number to prevent slowness due to the dataset's huge size.
|
||||||
|
|
||||||
|
Defaults to `10000`.
|
||||||
|
"""
|
||||||
|
if val is None:
|
||||||
|
return 10000
|
||||||
|
try:
|
||||||
|
return int(val)
|
||||||
|
except ValueError:
|
||||||
|
raise cfig.InvalidValueError("Not an int.")
|
||||||
|
|
||||||
|
|
||||||
@config.optional()
|
@config.optional()
|
||||||
def TRAINING_SET_SIZE(val: str | None) -> int:
|
def TRAINING_SET_SIZE(val: str | None) -> int:
|
||||||
"""
|
"""
|
||||||
|
@ -62,6 +78,7 @@ __all__ = (
|
||||||
"config",
|
"config",
|
||||||
"MONGO_HOST",
|
"MONGO_HOST",
|
||||||
"MONGO_PORT",
|
"MONGO_PORT",
|
||||||
|
"WORKING_SET_SIZE",
|
||||||
"TRAINING_SET_SIZE",
|
"TRAINING_SET_SIZE",
|
||||||
"TEST_SET_SIZE",
|
"TEST_SET_SIZE",
|
||||||
"NLTK_DOUBLE_NEG_SWITCH",
|
"NLTK_DOUBLE_NEG_SWITCH",
|
||||||
|
|
|
@ -5,7 +5,7 @@ import contextlib
|
||||||
import bson
|
import bson
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from .config import MONGO_HOST, MONGO_PORT, TRAINING_SET_SIZE, TEST_SET_SIZE
|
from .config import MONGO_HOST, MONGO_PORT, WORKING_SET_SIZE, TRAINING_SET_SIZE, TEST_SET_SIZE
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ def sample_reviews(reviews: pymongo.collection.Collection, amount: int) -> t.Ite
|
||||||
log.debug("Getting a sample of %d reviews...", amount)
|
log.debug("Getting a sample of %d reviews...", amount)
|
||||||
|
|
||||||
return reviews.aggregate([
|
return reviews.aggregate([
|
||||||
{"$limit": 10000}, # TODO
|
{"$limit": WORKING_SET_SIZE.__wrapped__},
|
||||||
{"$sample": {"size": amount}},
|
{"$sample": {"size": amount}},
|
||||||
])
|
])
|
||||||
|
|
||||||
|
@ -73,7 +73,7 @@ def sample_reviews_by_rating(reviews: pymongo.collection.Collection, rating: flo
|
||||||
log.debug("Getting a sample of %d reviews with %d stars...", amount, rating)
|
log.debug("Getting a sample of %d reviews with %d stars...", amount, rating)
|
||||||
|
|
||||||
return reviews.aggregate([
|
return reviews.aggregate([
|
||||||
{"$limit": 10000}, # TODO
|
{"$limit": WORKING_SET_SIZE.__wrapped__},
|
||||||
{"$match": {"overall": rating}},
|
{"$match": {"overall": rating}},
|
||||||
{"$sample": {"size": amount}},
|
{"$sample": {"size": amount}},
|
||||||
])
|
])
|
||||||
|
@ -86,7 +86,7 @@ def sample_reviews_by_rating_polar(reviews: pymongo.collection.Collection, amoun
|
||||||
log.debug("Getting a sample of %d reviews with either 5 or 1 stars...", amount)
|
log.debug("Getting a sample of %d reviews with either 5 or 1 stars...", amount)
|
||||||
|
|
||||||
return reviews.aggregate([
|
return reviews.aggregate([
|
||||||
{"$limit": 10000}, # TODO
|
{"$limit": WORKING_SET_SIZE.__wrapped__},
|
||||||
{"$match":
|
{"$match":
|
||||||
{"$or":
|
{"$or":
|
||||||
[
|
[
|
||||||
|
|
Loading…
Reference in a new issue