1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-25 17:24:20 +00:00

Use float instead of str as Category

This commit is contained in:
Steffo 2023-02-04 05:28:18 +01:00
parent 4f24d399b8
commit 2675f5ead8
Signed by: steffo
GPG key ID: 2A24051445686895
2 changed files with 8 additions and 22 deletions

View file

@ -19,13 +19,13 @@ def main():
for dataset_func in [polar_dataset, varied_dataset]: for dataset_func in [polar_dataset, varied_dataset]:
for SentimentAnalyzer in [ for SentimentAnalyzer in [
# NLTKSentimentAnalyzer, NLTKSentimentAnalyzer,
TensorflowSentimentAnalyzer, # TensorflowSentimentAnalyzer,
]: ]:
for Tokenizer in [ for Tokenizer in [
# NLTKWordTokenizer, NLTKWordTokenizer,
# PottsTokenizer, PottsTokenizer,
# PottsTokenizerWithNegation, PottsTokenizerWithNegation,
LowercaseTokenizer, LowercaseTokenizer,
]: ]:
tokenizer = Tokenizer() tokenizer = Tokenizer()

View file

@ -26,7 +26,7 @@ class Review(t.TypedDict):
Text = str Text = str
Category = str Category = float
DataTuple = collections.namedtuple("DataTuple", ["text", "category"]) DataTuple = collections.namedtuple("DataTuple", ["text", "category"])
DataSet = t.Iterable[DataTuple] DataSet = t.Iterable[DataTuple]
@ -101,23 +101,9 @@ def review_to_datatuple(review: Review) -> DataTuple:
* unknown (everything else) * unknown (everything else)
""" """
text = review["reviewText"] text = review["reviewText"]
rating = review["overall"] category = review["overall"]
match rating: return DataTuple(text=text, category=category)
case 1.0:
category = "terrible"
case 2.0:
category = "negative"
case 3.0:
category = "mixed"
case 4.0:
category = "positive"
case 5.0:
category = "great"
case _:
category = "unknown"
return DataTuple(text, category)
def polar_dataset(collection: pymongo.collection.Collection, amount: int) -> t.Iterator[DataTuple]: def polar_dataset(collection: pymongo.collection.Collection, amount: int) -> t.Iterator[DataTuple]: