1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-21 23:44:19 +00:00

Do not create a dataset with just 2 and 4 reviews

This commit is contained in:
Steffo 2023-02-02 15:16:46 +01:00
parent 4344752cf6
commit 3ae43b2714
Signed by: steffo
GPG key ID: 2A24051445686895

View file

@ -114,13 +114,13 @@ def get_reviews_dataset_uniform(collection: pymongo.collection.Collection, amoun
great = sample_reviews_by_rating(collection, rating=5.0, amount=amount)
# Randomness here does not matter, so just merge the lists
both = [*positive, *negative]
full = [*terrible, *negative, *mixed, *positive, *great]
# Shuffle the dataset, just in case it affects the performance
# TODO: does it actually?
random.shuffle(both)
random.shuffle(full)
return both
return full
__all__ = (