mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-21 23:44:19 +00:00
Implement basic Potts sentiment analyzer
This commit is contained in:
parent
e2b9133bd5
commit
ab5f12f8fc
2 changed files with 42 additions and 9 deletions
|
@ -1,23 +1,26 @@
|
|||
import logging
|
||||
|
||||
from .config import config
|
||||
from .database import mongo_reviews_collection_from_config, get_training_reviews, get_test_reviews
|
||||
from .analysis.vanilla import VanillaReviewSA
|
||||
from .analysis.potts import PottsReviewSA
|
||||
from .log import install_log_handler
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def main():
|
||||
with mongo_reviews_collection_from_config() as reviews:
|
||||
training_reviews = get_training_reviews(collection=reviews)
|
||||
test_reviews = get_test_reviews(collection=reviews)
|
||||
|
||||
model = VanillaReviewSA()
|
||||
model.train(training_reviews)
|
||||
|
||||
evaluation = model.evaluate(test_reviews)
|
||||
print(evaluation)
|
||||
|
||||
while True:
|
||||
classification = model.use(input())
|
||||
print(classification)
|
||||
vanilla = VanillaReviewSA()
|
||||
vanilla.train(training_reviews)
|
||||
log.info("Vanilla evaluation results: %s", vanilla.evaluate(test_reviews))
|
||||
|
||||
potts = PottsReviewSA()
|
||||
potts.train(training_reviews)
|
||||
log.info("Potts evaluation results: %s", potts.evaluate(test_reviews))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
30
unimore_bda_6/analysis/potts.py
Normal file
30
unimore_bda_6/analysis/potts.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
from ..vendor.potts import Tokenizer
|
||||
from .vanilla import VanillaSA, VanillaReviewSA
|
||||
|
||||
|
||||
class PottsSA(VanillaSA):
|
||||
"""
|
||||
A sentiment analyzer using Potts' tokenizer.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
def _tokenize_text(self, text: str) -> list[str]:
|
||||
"""
|
||||
Convert a text string into a list of tokens, using the language of the model.
|
||||
"""
|
||||
tokenizer: Tokenizer = Tokenizer(preserve_case=False)
|
||||
return list(tokenizer.tokenize(text))
|
||||
|
||||
|
||||
class PottsReviewSA(VanillaReviewSA, PottsSA):
|
||||
"""
|
||||
A `PottsSA` to be used with `Review`s.
|
||||
"""
|
||||
|
||||
|
||||
__all__ = (
|
||||
"PottsSA",
|
||||
"PottsReviewSA",
|
||||
)
|
Loading…
Reference in a new issue