mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-25 17:24:20 +00:00
Use float
instead of str
as Category
This commit is contained in:
parent
4f24d399b8
commit
2675f5ead8
2 changed files with 8 additions and 22 deletions
|
@ -19,13 +19,13 @@ def main():
|
||||||
|
|
||||||
for dataset_func in [polar_dataset, varied_dataset]:
|
for dataset_func in [polar_dataset, varied_dataset]:
|
||||||
for SentimentAnalyzer in [
|
for SentimentAnalyzer in [
|
||||||
# NLTKSentimentAnalyzer,
|
NLTKSentimentAnalyzer,
|
||||||
TensorflowSentimentAnalyzer,
|
# TensorflowSentimentAnalyzer,
|
||||||
]:
|
]:
|
||||||
for Tokenizer in [
|
for Tokenizer in [
|
||||||
# NLTKWordTokenizer,
|
NLTKWordTokenizer,
|
||||||
# PottsTokenizer,
|
PottsTokenizer,
|
||||||
# PottsTokenizerWithNegation,
|
PottsTokenizerWithNegation,
|
||||||
LowercaseTokenizer,
|
LowercaseTokenizer,
|
||||||
]:
|
]:
|
||||||
tokenizer = Tokenizer()
|
tokenizer = Tokenizer()
|
||||||
|
|
|
@ -26,7 +26,7 @@ class Review(t.TypedDict):
|
||||||
|
|
||||||
|
|
||||||
Text = str
|
Text = str
|
||||||
Category = str
|
Category = float
|
||||||
DataTuple = collections.namedtuple("DataTuple", ["text", "category"])
|
DataTuple = collections.namedtuple("DataTuple", ["text", "category"])
|
||||||
DataSet = t.Iterable[DataTuple]
|
DataSet = t.Iterable[DataTuple]
|
||||||
|
|
||||||
|
@ -101,23 +101,9 @@ def review_to_datatuple(review: Review) -> DataTuple:
|
||||||
* unknown (everything else)
|
* unknown (everything else)
|
||||||
"""
|
"""
|
||||||
text = review["reviewText"]
|
text = review["reviewText"]
|
||||||
rating = review["overall"]
|
category = review["overall"]
|
||||||
|
|
||||||
match rating:
|
return DataTuple(text=text, category=category)
|
||||||
case 1.0:
|
|
||||||
category = "terrible"
|
|
||||||
case 2.0:
|
|
||||||
category = "negative"
|
|
||||||
case 3.0:
|
|
||||||
category = "mixed"
|
|
||||||
case 4.0:
|
|
||||||
category = "positive"
|
|
||||||
case 5.0:
|
|
||||||
category = "great"
|
|
||||||
case _:
|
|
||||||
category = "unknown"
|
|
||||||
|
|
||||||
return DataTuple(text, category)
|
|
||||||
|
|
||||||
|
|
||||||
def polar_dataset(collection: pymongo.collection.Collection, amount: int) -> t.Iterator[DataTuple]:
|
def polar_dataset(collection: pymongo.collection.Collection, amount: int) -> t.Iterator[DataTuple]:
|
||||||
|
|
Loading…
Reference in a new issue