1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-22 07:54:19 +00:00

Remove __main__

This commit is contained in:
Steffo 2023-02-02 04:18:46 +01:00
parent ce959f18be
commit cf37d13cb4
Signed by: steffo
GPG key ID: 2A24051445686895

View file

@ -186,19 +186,3 @@ class Tokenizer:
pass
s = s.replace(amp, " and ")
return s
###############################################################################
if __name__ == '__main__':
tok = Tokenizer(preserve_case=False)
samples = (
u"RT @ #happyfuncoding: this is a typical Twitter tweet :-)",
u"HTML entities &amp; other Web oddities can be an &aacute;cute <em class='grumpy'>pain</em> >:(",
u"It's perhaps noteworthy that phone numbers like +1 (800) 123-4567, (800) 123-4567, and 123-4567 are treated as words despite their whitespace."
)
for s in samples:
print "======================================================================"
print s
tokenized = tok.tokenize(s)
print "\n".join(tokenized)