2023-02-02 03:34:05 +00:00
|
|
|
from ..vendor.potts import Tokenizer
|
2023-02-02 04:01:31 +00:00
|
|
|
from .vanilla import VanillaSA, VanillaReviewSA, VanillaUniformReviewSA
|
2023-02-02 03:34:05 +00:00
|
|
|
|
|
|
|
|
|
|
|
class PottsSA(VanillaSA):
|
|
|
|
"""
|
|
|
|
A sentiment analyzer using Potts' tokenizer.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
super().__init__()
|
|
|
|
|
|
|
|
def _tokenize_text(self, text: str) -> list[str]:
|
|
|
|
"""
|
|
|
|
Convert a text string into a list of tokens, using the language of the model.
|
|
|
|
"""
|
|
|
|
tokenizer: Tokenizer = Tokenizer(preserve_case=False)
|
|
|
|
return list(tokenizer.tokenize(text))
|
|
|
|
|
|
|
|
|
|
|
|
class PottsReviewSA(VanillaReviewSA, PottsSA):
|
|
|
|
"""
|
|
|
|
A `PottsSA` to be used with `Review`s.
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
2023-02-02 04:01:31 +00:00
|
|
|
class PottsUniformReviewSA(VanillaUniformReviewSA, PottsSA):
|
|
|
|
"""
|
|
|
|
A `PottsSA` with 5 buckets instead of 2.
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
2023-02-02 03:34:05 +00:00
|
|
|
__all__ = (
|
|
|
|
"PottsSA",
|
|
|
|
"PottsReviewSA",
|
|
|
|
)
|