mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-21 15:34:18 +00:00
Do not stringify processed tokens
This commit is contained in:
parent
f0561cf078
commit
58554c84e0
1 changed files with 3 additions and 2 deletions
|
@ -1,3 +1,4 @@
|
|||
import typing as t
|
||||
import nltk
|
||||
import nltk.sentiment.util
|
||||
|
||||
|
@ -9,10 +10,10 @@ class NLTKWordTokenizer(BaseTokenizer):
|
|||
Tokenizer based on `nltk.word_tokenize`.
|
||||
"""
|
||||
|
||||
def tokenize(self, text: str) -> str:
|
||||
def tokenize(self, text: str) -> t.Iterator[str]:
|
||||
tokens = nltk.word_tokenize(text)
|
||||
nltk.sentiment.util.mark_negation(tokens, shallow=True)
|
||||
return " ".join(tokens)
|
||||
return tokens
|
||||
|
||||
|
||||
__all__ = (
|
||||
|
|
Loading…
Reference in a new issue