mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-21 23:44:19 +00:00
Implement basic Potts sentiment analyzer
This commit is contained in:
parent
e2b9133bd5
commit
ab5f12f8fc
2 changed files with 42 additions and 9 deletions
|
@ -1,23 +1,26 @@
|
||||||
|
import logging
|
||||||
|
|
||||||
from .config import config
|
from .config import config
|
||||||
from .database import mongo_reviews_collection_from_config, get_training_reviews, get_test_reviews
|
from .database import mongo_reviews_collection_from_config, get_training_reviews, get_test_reviews
|
||||||
from .analysis.vanilla import VanillaReviewSA
|
from .analysis.vanilla import VanillaReviewSA
|
||||||
|
from .analysis.potts import PottsReviewSA
|
||||||
from .log import install_log_handler
|
from .log import install_log_handler
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
with mongo_reviews_collection_from_config() as reviews:
|
with mongo_reviews_collection_from_config() as reviews:
|
||||||
training_reviews = get_training_reviews(collection=reviews)
|
training_reviews = get_training_reviews(collection=reviews)
|
||||||
test_reviews = get_test_reviews(collection=reviews)
|
test_reviews = get_test_reviews(collection=reviews)
|
||||||
|
|
||||||
model = VanillaReviewSA()
|
vanilla = VanillaReviewSA()
|
||||||
model.train(training_reviews)
|
vanilla.train(training_reviews)
|
||||||
|
log.info("Vanilla evaluation results: %s", vanilla.evaluate(test_reviews))
|
||||||
evaluation = model.evaluate(test_reviews)
|
|
||||||
print(evaluation)
|
potts = PottsReviewSA()
|
||||||
|
potts.train(training_reviews)
|
||||||
while True:
|
log.info("Potts evaluation results: %s", potts.evaluate(test_reviews))
|
||||||
classification = model.use(input())
|
|
||||||
print(classification)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
30
unimore_bda_6/analysis/potts.py
Normal file
30
unimore_bda_6/analysis/potts.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
from ..vendor.potts import Tokenizer
|
||||||
|
from .vanilla import VanillaSA, VanillaReviewSA
|
||||||
|
|
||||||
|
|
||||||
|
class PottsSA(VanillaSA):
|
||||||
|
"""
|
||||||
|
A sentiment analyzer using Potts' tokenizer.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def _tokenize_text(self, text: str) -> list[str]:
|
||||||
|
"""
|
||||||
|
Convert a text string into a list of tokens, using the language of the model.
|
||||||
|
"""
|
||||||
|
tokenizer: Tokenizer = Tokenizer(preserve_case=False)
|
||||||
|
return list(tokenizer.tokenize(text))
|
||||||
|
|
||||||
|
|
||||||
|
class PottsReviewSA(VanillaReviewSA, PottsSA):
|
||||||
|
"""
|
||||||
|
A `PottsSA` to be used with `Review`s.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = (
|
||||||
|
"PottsSA",
|
||||||
|
"PottsReviewSA",
|
||||||
|
)
|
Loading…
Reference in a new issue