2023-02-01 16:46:25 +00:00
|
|
|
import logging
|
2023-02-05 16:40:22 +00:00
|
|
|
import pymongo
|
|
|
|
import typing as t
|
2023-02-01 03:20:09 +00:00
|
|
|
|
2023-02-05 16:40:22 +00:00
|
|
|
from ..config import WORKING_SET_SIZE
|
|
|
|
from .collections import MongoReview
|
|
|
|
from .datatypes import Review
|
2023-02-01 16:46:25 +00:00
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
2023-02-01 03:20:09 +00:00
|
|
|
|
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
def sample_reviews(collection: pymongo.collection.Collection, amount: int) -> t.Iterator[Review]:
|
2023-02-01 03:20:09 +00:00
|
|
|
"""
|
|
|
|
Get ``amount`` random reviews from the ``reviews`` collection.
|
|
|
|
"""
|
2023-02-01 16:46:25 +00:00
|
|
|
log.debug("Getting a sample of %d reviews...", amount)
|
2023-02-01 03:20:09 +00:00
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
cursor = collection.aggregate([
|
2023-02-02 03:07:17 +00:00
|
|
|
{"$limit": WORKING_SET_SIZE.__wrapped__},
|
2023-02-02 01:56:37 +00:00
|
|
|
{"$sample": {"size": amount}},
|
2023-02-01 03:20:09 +00:00
|
|
|
])
|
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
cursor = map(Review.from_mongoreview, cursor)
|
2023-02-05 16:40:22 +00:00
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
return cursor
|
2023-02-01 03:20:09 +00:00
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
|
|
|
|
def sample_reviews_by_rating(collection: pymongo.collection.Collection, rating: float, amount: int) -> t.Iterator[Review]:
|
2023-02-01 03:20:09 +00:00
|
|
|
"""
|
|
|
|
Get ``amount`` random reviews with ``rating`` stars from the ``reviews`` collection.
|
|
|
|
"""
|
2023-02-01 16:46:25 +00:00
|
|
|
log.debug("Getting a sample of %d reviews with %d stars...", amount, rating)
|
2023-02-01 03:20:09 +00:00
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
cursor = collection.aggregate([
|
2023-02-02 03:07:17 +00:00
|
|
|
{"$limit": WORKING_SET_SIZE.__wrapped__},
|
2023-02-01 15:02:52 +00:00
|
|
|
{"$match": {"overall": rating}},
|
2023-02-02 01:56:37 +00:00
|
|
|
{"$sample": {"size": amount}},
|
2023-02-01 03:20:09 +00:00
|
|
|
])
|
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
return cursor
|
2023-02-01 03:20:09 +00:00
|
|
|
|
2023-02-03 22:27:44 +00:00
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
def sample_reviews_polar(collection: pymongo.collection.Collection, amount: int) -> t.Iterator[Review]:
|
|
|
|
log.debug("Getting a sample of %d polar reviews...", amount * 2)
|
2023-02-01 03:20:09 +00:00
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
cursor = collection.aggregate([
|
|
|
|
{"$limit": WORKING_SET_SIZE.__wrapped__},
|
|
|
|
{"$match": {"overall": 1.0}},
|
|
|
|
{"$sample": {"size": amount}},
|
|
|
|
{"$unionWith": {
|
|
|
|
"coll": collection.name,
|
|
|
|
"pipeline": [
|
|
|
|
{"$limit": WORKING_SET_SIZE.__wrapped__},
|
|
|
|
{"$match": {"overall": 5.0}},
|
|
|
|
{"$sample": {"size": amount}},
|
|
|
|
],
|
|
|
|
}}
|
|
|
|
])
|
2023-02-01 03:20:09 +00:00
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
cursor = map(Review.from_mongoreview, cursor)
|
2023-02-05 16:40:22 +00:00
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
return cursor
|
2023-02-02 04:01:31 +00:00
|
|
|
|
2023-02-01 03:20:09 +00:00
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
def sample_reviews_varied(collection: pymongo.collection.Collection, amount: int) -> t.Iterator[Review]:
|
|
|
|
log.debug("Getting a sample of %d varied reviews...", amount * 5)
|
2023-02-01 16:46:25 +00:00
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
# Wow, this is ugly.
|
|
|
|
cursor = collection.aggregate([
|
|
|
|
{"$limit": WORKING_SET_SIZE.__wrapped__},
|
|
|
|
{"$match": {"overall": 1.0}},
|
|
|
|
{"$sample": {"size": amount}},
|
|
|
|
{"$unionWith": {
|
|
|
|
"coll": collection.name,
|
|
|
|
"pipeline": [
|
|
|
|
{"$limit": WORKING_SET_SIZE.__wrapped__},
|
|
|
|
{"$match": {"overall": 2.0}},
|
|
|
|
{"$sample": {"size": amount}},
|
|
|
|
{"$unionWith": {
|
|
|
|
"coll": collection.name,
|
|
|
|
"pipeline": [
|
|
|
|
{"$limit": WORKING_SET_SIZE.__wrapped__},
|
|
|
|
{"$match": {"overall": 3.0}},
|
|
|
|
{"$sample": {"size": amount}},
|
|
|
|
{"$unionWith": {
|
|
|
|
"coll": collection.name,
|
|
|
|
"pipeline": [
|
|
|
|
{"$limit": WORKING_SET_SIZE.__wrapped__},
|
|
|
|
{"$match": {"overall": 4.0}},
|
|
|
|
{"$sample": {"size": amount}},
|
|
|
|
{"$unionWith": {
|
|
|
|
"coll": collection.name,
|
|
|
|
"pipeline": [
|
|
|
|
{"$limit": WORKING_SET_SIZE.__wrapped__},
|
|
|
|
{"$match": {"overall": 5.0}},
|
|
|
|
{"$sample": {"size": amount}},
|
|
|
|
],
|
|
|
|
}}
|
|
|
|
],
|
|
|
|
}}
|
|
|
|
],
|
|
|
|
}}
|
|
|
|
],
|
|
|
|
}}
|
|
|
|
])
|
2023-02-03 22:27:44 +00:00
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
cursor = map(Review.from_mongoreview, cursor)
|
2023-02-05 16:40:22 +00:00
|
|
|
|
2023-02-04 05:14:24 +00:00
|
|
|
return cursor
|
2023-02-01 16:46:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
__all__ = (
|
|
|
|
"sample_reviews",
|
|
|
|
"sample_reviews_by_rating",
|
2023-02-04 05:14:24 +00:00
|
|
|
"sample_reviews_polar",
|
|
|
|
"sample_reviews_varied",
|
2023-02-01 16:46:25 +00:00
|
|
|
)
|