mirror of
https://github.com/Steffo99/unimore-bda-6.git
synced 2024-11-22 07:54:19 +00:00
Remove __main__
This commit is contained in:
parent
ce959f18be
commit
cf37d13cb4
1 changed files with 0 additions and 16 deletions
16
unimore_bda_6/vendor/potts.py
vendored
16
unimore_bda_6/vendor/potts.py
vendored
|
@ -186,19 +186,3 @@ class Tokenizer:
|
||||||
pass
|
pass
|
||||||
s = s.replace(amp, " and ")
|
s = s.replace(amp, " and ")
|
||||||
return s
|
return s
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
tok = Tokenizer(preserve_case=False)
|
|
||||||
samples = (
|
|
||||||
u"RT @ #happyfuncoding: this is a typical Twitter tweet :-)",
|
|
||||||
u"HTML entities & other Web oddities can be an ácute <em class='grumpy'>pain</em> >:(",
|
|
||||||
u"It's perhaps noteworthy that phone numbers like +1 (800) 123-4567, (800) 123-4567, and 123-4567 are treated as words despite their whitespace."
|
|
||||||
)
|
|
||||||
|
|
||||||
for s in samples:
|
|
||||||
print "======================================================================"
|
|
||||||
print s
|
|
||||||
tokenized = tok.tokenize(s)
|
|
||||||
print "\n".join(tokenized)
|
|
Loading…
Reference in a new issue