1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-21 23:44:19 +00:00

Some memory usage tweaks

This commit is contained in:
Steffo 2023-02-18 03:18:34 +01:00
parent 35616d35c7
commit 61141248db
Signed by: steffo
GPG key ID: 2A24051445686895
3 changed files with 69 additions and 64 deletions

1
.vscode/launch.json vendored
View file

@ -12,7 +12,6 @@
"justMyCode": false, "justMyCode": false,
"env": { "env": {
"NLTK_DATA": "./data/nltk", "NLTK_DATA": "./data/nltk",
"DATA_SET_SIZE": "250",
"XLA_FLAGS": "--xla_gpu_cuda_data_dir=/opt/cuda" "XLA_FLAGS": "--xla_gpu_cuda_data_dir=/opt/cuda"
}, },
"cwd": "${workspaceFolder}", "cwd": "${workspaceFolder}",

View file

@ -31,88 +31,94 @@ def main():
log.fatal("MongoDB database is not available, exiting...") log.fatal("MongoDB database is not available, exiting...")
exit(1) exit(1)
reviews = reviews_collection(db) for sample_func in [
sample_reviews_polar,
sample_reviews_varied,
]:
for sample_func in [ slog = logging.getLogger(f"{__name__}.{sample_func.__name__}")
sample_reviews_polar, slog.debug("Selected sample_func: %s", sample_func.__name__)
sample_reviews_varied,
for SentimentAnalyzer in [
# ThreeCheat,
NLTKSentimentAnalyzer,
TensorflowPolarSentimentAnalyzer,
TensorflowCategorySentimentAnalyzer,
]: ]:
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}") slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}")
slog.debug("Selected sample_func: %s", sample_func.__name__) slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__)
for SentimentAnalyzer in [ for Tokenizer in [
# ThreeCheat, PlainTokenizer,
NLTKSentimentAnalyzer, LowercaseTokenizer,
TensorflowPolarSentimentAnalyzer, NLTKWordTokenizer,
TensorflowCategorySentimentAnalyzer, PottsTokenizer,
PottsTokenizerWithNegation,
HuggingBertTokenizer,
]: ]:
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}") log.debug("Running garbage collection...")
slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__) garbage_count = gc.collect()
log.debug("Collected %d pieces of garbage!", garbage_count)
for Tokenizer in [ slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
PlainTokenizer, slog.debug("Selected Tokenizer: %s", Tokenizer.__name__)
LowercaseTokenizer,
NLTKWordTokenizer,
PottsTokenizer,
PottsTokenizerWithNegation,
HuggingBertTokenizer,
]:
log.debug("Running garbage collection...") runs = 0
garbage_count = gc.collect() successful_runs = 0
log.debug("Collected %d pieces of garbage!", garbage_count) cumulative_evaluation_results = EvaluationResults()
while True:
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}") slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
slog.debug("Selected Tokenizer: %s", Tokenizer.__name__)
runs = 0 if successful_runs >= TARGET_RUNS.__wrapped__:
successful_runs = 0 slog.info("Reached target of %d runs, moving on...", TARGET_RUNS.__wrapped__)
cumulative_evaluation_results = EvaluationResults() break
while True: if runs >= MAXIMUM_RUNS.__wrapped__:
slog.fatal("Exceeded %d runs, giving up and exiting...", MAXIMUM_RUNS.__wrapped__)
break
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}") runs += 1
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}.{runs}")
slog.debug("Run #%d", runs)
if successful_runs >= TARGET_RUNS.__wrapped__: try:
slog.info("Reached target of %d runs, moving on...", TARGET_RUNS.__wrapped__) slog.debug("Instantiating %s with %s...", SentimentAnalyzer.__name__, Tokenizer.__name__)
break sa = SentimentAnalyzer(tokenizer=Tokenizer())
except TypeError:
slog.warning("%s is not supported by %s, skipping run...", SentimentAnalyzer.__name__, Tokenizer.__name__)
break
if runs >= MAXIMUM_RUNS.__wrapped__:
slog.fatal("Exceeded %d runs, giving up and exiting...", MAXIMUM_RUNS.__wrapped__)
break
runs += 1 with mongo_client_from_config() as db:
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}.{runs}") reviews = reviews_collection(db)
slog.debug("Run #%d", runs) datasets_cm = Caches.from_database_samples(collection=reviews, sample_func=sample_func)
datasets = datasets_cm.__enter__()
try:
try: try:
slog.debug("Instantiating %s with %s...", SentimentAnalyzer.__name__, Tokenizer.__name__) slog.info("Training sentiment analyzer: %s", sa)
sa = SentimentAnalyzer(tokenizer=Tokenizer()) sa.train(training_dataset_func=datasets.training, validation_dataset_func=datasets.validation)
except TypeError:
slog.warning("%s is not supported by %s, skipping run...", SentimentAnalyzer.__name__, Tokenizer.__name__) except TrainingFailedError:
slog.error("Training failed, trying again with a different dataset...")
continue
else:
slog.info("Training succeeded!")
slog.info("Evaluating sentiment analyzer: %s", sa)
evaluation_results = sa.evaluate(evaluation_dataset_func=datasets.evaluation)
slog.info("Evaluation results: %s", evaluation_results)
successful_runs += 1
cumulative_evaluation_results += evaluation_results
break break
finally:
datasets_cm.__exit__()
with Caches.from_database_samples(collection=reviews, sample_func=sample_func) as datasets: slog.info("Cumulative evaluation results: %s", cumulative_evaluation_results)
try:
slog.info("Training sentiment analyzer: %s", sa)
sa.train(training_dataset_func=datasets.training, validation_dataset_func=datasets.validation)
except TrainingFailedError:
slog.error("Training failed, trying again with a different dataset...")
continue
else:
slog.info("Training succeeded!")
slog.info("Evaluating sentiment analyzer: %s", sa)
evaluation_results = sa.evaluate(evaluation_dataset_func=datasets.evaluation)
slog.info("Evaluation results: %s", evaluation_results)
successful_runs += 1
cumulative_evaluation_results += evaluation_results
break
slog.info("Cumulative evaluation results: %s", cumulative_evaluation_results)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -23,7 +23,7 @@ def mongo_client_from_config() -> t.ContextManager[pymongo.MongoClient]:
yield client yield client
log.info("Closing connection to MongoDB...") log.debug("Closing connection to MongoDB...")
client.close() client.close()
log.debug("Closed connection to MongoDB!") log.debug("Closed connection to MongoDB!")