1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-25 17:24:20 +00:00

it works, but at what cost

This commit is contained in:
Steffo 2023-02-03 02:49:14 +01:00
parent 4e1a9f842f
commit 32cd81bca6
Signed by: steffo
GPG key ID: 2A24051445686895
3 changed files with 6 additions and 4 deletions

View file

@ -4,7 +4,7 @@
<option name="INTERPRETER_OPTIONS" value="" /> <option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" /> <option name="PARENT_ENVS" value="true" />
<envs> <envs>
<env name="DATA_SET_SIZE" value="10000" /> <env name="DATA_SET_SIZE" value="750" />
<env name="NLTK_DATA" value="./data/nltk" /> <env name="NLTK_DATA" value="./data/nltk" />
<env name="PYTHONUNBUFFERED" value="1" /> <env name="PYTHONUNBUFFERED" value="1" />
<env name="WORKING_SET_SIZE" value="1000000" /> <env name="WORKING_SET_SIZE" value="1000000" />

View file

@ -82,8 +82,8 @@ def main():
try: try:
print("Model %s" % model) print("Model %s" % model)
while True: while inp := input():
print(model.use(input())) print(model.use(inp))
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass

View file

@ -61,6 +61,7 @@ class VanillaSA(BaseSA):
Does not use `SentimentAnalyzer.apply_features` due to unexpected behaviour when using iterators. Does not use `SentimentAnalyzer.apply_features` due to unexpected behaviour when using iterators.
""" """
count_passage("processed_features", 100)
return self.model.extract_features(data[0]), data[1] return self.model.extract_features(data[0]), data[1]
def _train_from_dataset(self, dataset: t.Iterator[tuple[TokenBag, Category]]) -> None: def _train_from_dataset(self, dataset: t.Iterator[tuple[TokenBag, Category]]) -> None:
@ -87,7 +88,8 @@ class VanillaSA(BaseSA):
raise NotTrainedError() raise NotTrainedError()
dataset_1 = map(self.__extract_features, dataset) dataset_1 = map(self.__extract_features, dataset)
return self.model.evaluate(dataset_1) # FIXME: This won't work with streams :(
return self.model.evaluate(list(dataset_1))
def _use_from_tokenbag(self, tokens: TokenBag) -> Category: def _use_from_tokenbag(self, tokens: TokenBag) -> Category:
""" """