1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-22 07:54:19 +00:00

Use float instead of str as Category

This commit is contained in:
Steffo 2023-02-04 05:28:18 +01:00
parent 4f24d399b8
commit 2675f5ead8
Signed by: steffo
GPG key ID: 2A24051445686895
2 changed files with 8 additions and 22 deletions

View file

@ -19,13 +19,13 @@ def main():
for dataset_func in [polar_dataset, varied_dataset]:
for SentimentAnalyzer in [
# NLTKSentimentAnalyzer,
TensorflowSentimentAnalyzer,
NLTKSentimentAnalyzer,
# TensorflowSentimentAnalyzer,
]:
for Tokenizer in [
# NLTKWordTokenizer,
# PottsTokenizer,
# PottsTokenizerWithNegation,
NLTKWordTokenizer,
PottsTokenizer,
PottsTokenizerWithNegation,
LowercaseTokenizer,
]:
tokenizer = Tokenizer()

View file

@ -26,7 +26,7 @@ class Review(t.TypedDict):
Text = str
Category = str
Category = float
DataTuple = collections.namedtuple("DataTuple", ["text", "category"])
DataSet = t.Iterable[DataTuple]
@ -101,23 +101,9 @@ def review_to_datatuple(review: Review) -> DataTuple:
* unknown (everything else)
"""
text = review["reviewText"]
rating = review["overall"]
category = review["overall"]
match rating:
case 1.0:
category = "terrible"
case 2.0:
category = "negative"
case 3.0:
category = "mixed"
case 4.0:
category = "positive"
case 5.0:
category = "great"
case _:
category = "unknown"
return DataTuple(text, category)
return DataTuple(text=text, category=category)
def polar_dataset(collection: pymongo.collection.Collection, amount: int) -> t.Iterator[DataTuple]: