mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-21 23:44:19 +00:00
Use float
instead of str
as Category
This commit is contained in:
parent
4f24d399b8
commit
2675f5ead8
2 changed files with 8 additions and 22 deletions
|
@ -19,13 +19,13 @@ def main():
|
|||
|
||||
for dataset_func in [polar_dataset, varied_dataset]:
|
||||
for SentimentAnalyzer in [
|
||||
# NLTKSentimentAnalyzer,
|
||||
TensorflowSentimentAnalyzer,
|
||||
NLTKSentimentAnalyzer,
|
||||
# TensorflowSentimentAnalyzer,
|
||||
]:
|
||||
for Tokenizer in [
|
||||
# NLTKWordTokenizer,
|
||||
# PottsTokenizer,
|
||||
# PottsTokenizerWithNegation,
|
||||
NLTKWordTokenizer,
|
||||
PottsTokenizer,
|
||||
PottsTokenizerWithNegation,
|
||||
LowercaseTokenizer,
|
||||
]:
|
||||
tokenizer = Tokenizer()
|
||||
|
|
|
@ -26,7 +26,7 @@ class Review(t.TypedDict):
|
|||
|
||||
|
||||
Text = str
|
||||
Category = str
|
||||
Category = float
|
||||
DataTuple = collections.namedtuple("DataTuple", ["text", "category"])
|
||||
DataSet = t.Iterable[DataTuple]
|
||||
|
||||
|
@ -101,23 +101,9 @@ def review_to_datatuple(review: Review) -> DataTuple:
|
|||
* unknown (everything else)
|
||||
"""
|
||||
text = review["reviewText"]
|
||||
rating = review["overall"]
|
||||
category = review["overall"]
|
||||
|
||||
match rating:
|
||||
case 1.0:
|
||||
category = "terrible"
|
||||
case 2.0:
|
||||
category = "negative"
|
||||
case 3.0:
|
||||
category = "mixed"
|
||||
case 4.0:
|
||||
category = "positive"
|
||||
case 5.0:
|
||||
category = "great"
|
||||
case _:
|
||||
category = "unknown"
|
||||
|
||||
return DataTuple(text, category)
|
||||
return DataTuple(text=text, category=category)
|
||||
|
||||
|
||||
def polar_dataset(collection: pymongo.collection.Collection, amount: int) -> t.Iterator[DataTuple]:
|
||||
|
|
Loading…
Reference in a new issue