1
Fork 0
mirror of https://github.com/Steffo99/unimore-bda-6.git synced 2024-11-21 15:34:18 +00:00

Some memory usage tweaks

This commit is contained in:
Steffo 2023-02-18 03:18:34 +01:00
parent 35616d35c7
commit 61141248db
Signed by: steffo
GPG key ID: 2A24051445686895
3 changed files with 69 additions and 64 deletions

1
.vscode/launch.json vendored
View file

@ -12,7 +12,6 @@
"justMyCode": false,
"env": {
"NLTK_DATA": "./data/nltk",
"DATA_SET_SIZE": "250",
"XLA_FLAGS": "--xla_gpu_cuda_data_dir=/opt/cuda"
},
"cwd": "${workspaceFolder}",

View file

@ -31,88 +31,94 @@ def main():
log.fatal("MongoDB database is not available, exiting...")
exit(1)
reviews = reviews_collection(db)
for sample_func in [
sample_reviews_polar,
sample_reviews_varied,
]:
for sample_func in [
sample_reviews_polar,
sample_reviews_varied,
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}")
slog.debug("Selected sample_func: %s", sample_func.__name__)
for SentimentAnalyzer in [
# ThreeCheat,
NLTKSentimentAnalyzer,
TensorflowPolarSentimentAnalyzer,
TensorflowCategorySentimentAnalyzer,
]:
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}")
slog.debug("Selected sample_func: %s", sample_func.__name__)
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}")
slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__)
for SentimentAnalyzer in [
# ThreeCheat,
NLTKSentimentAnalyzer,
TensorflowPolarSentimentAnalyzer,
TensorflowCategorySentimentAnalyzer,
for Tokenizer in [
PlainTokenizer,
LowercaseTokenizer,
NLTKWordTokenizer,
PottsTokenizer,
PottsTokenizerWithNegation,
HuggingBertTokenizer,
]:
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}")
slog.debug("Selected SentimentAnalyzer: %s", SentimentAnalyzer.__name__)
log.debug("Running garbage collection...")
garbage_count = gc.collect()
log.debug("Collected %d pieces of garbage!", garbage_count)
for Tokenizer in [
PlainTokenizer,
LowercaseTokenizer,
NLTKWordTokenizer,
PottsTokenizer,
PottsTokenizerWithNegation,
HuggingBertTokenizer,
]:
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
slog.debug("Selected Tokenizer: %s", Tokenizer.__name__)
log.debug("Running garbage collection...")
garbage_count = gc.collect()
log.debug("Collected %d pieces of garbage!", garbage_count)
runs = 0
successful_runs = 0
cumulative_evaluation_results = EvaluationResults()
while True:
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
slog.debug("Selected Tokenizer: %s", Tokenizer.__name__)
runs = 0
successful_runs = 0
cumulative_evaluation_results = EvaluationResults()
if successful_runs >= TARGET_RUNS.__wrapped__:
slog.info("Reached target of %d runs, moving on...", TARGET_RUNS.__wrapped__)
break
while True:
if runs >= MAXIMUM_RUNS.__wrapped__:
slog.fatal("Exceeded %d runs, giving up and exiting...", MAXIMUM_RUNS.__wrapped__)
break
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}")
runs += 1
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}.{runs}")
slog.debug("Run #%d", runs)
if successful_runs >= TARGET_RUNS.__wrapped__:
slog.info("Reached target of %d runs, moving on...", TARGET_RUNS.__wrapped__)
break
try:
slog.debug("Instantiating %s with %s...", SentimentAnalyzer.__name__, Tokenizer.__name__)
sa = SentimentAnalyzer(tokenizer=Tokenizer())
except TypeError:
slog.warning("%s is not supported by %s, skipping run...", SentimentAnalyzer.__name__, Tokenizer.__name__)
break
if runs >= MAXIMUM_RUNS.__wrapped__:
slog.fatal("Exceeded %d runs, giving up and exiting...", MAXIMUM_RUNS.__wrapped__)
break
runs += 1
slog = logging.getLogger(f"{__name__}.{sample_func.__name__}.{SentimentAnalyzer.__name__}.{Tokenizer.__name__}.{runs}")
slog.debug("Run #%d", runs)
with mongo_client_from_config() as db:
reviews = reviews_collection(db)
datasets_cm = Caches.from_database_samples(collection=reviews, sample_func=sample_func)
datasets = datasets_cm.__enter__()
try:
try:
slog.debug("Instantiating %s with %s...", SentimentAnalyzer.__name__, Tokenizer.__name__)
sa = SentimentAnalyzer(tokenizer=Tokenizer())
except TypeError:
slog.warning("%s is not supported by %s, skipping run...", SentimentAnalyzer.__name__, Tokenizer.__name__)
slog.info("Training sentiment analyzer: %s", sa)
sa.train(training_dataset_func=datasets.training, validation_dataset_func=datasets.validation)
except TrainingFailedError:
slog.error("Training failed, trying again with a different dataset...")
continue
else:
slog.info("Training succeeded!")
slog.info("Evaluating sentiment analyzer: %s", sa)
evaluation_results = sa.evaluate(evaluation_dataset_func=datasets.evaluation)
slog.info("Evaluation results: %s", evaluation_results)
successful_runs += 1
cumulative_evaluation_results += evaluation_results
break
finally:
datasets_cm.__exit__()
with Caches.from_database_samples(collection=reviews, sample_func=sample_func) as datasets:
try:
slog.info("Training sentiment analyzer: %s", sa)
sa.train(training_dataset_func=datasets.training, validation_dataset_func=datasets.validation)
except TrainingFailedError:
slog.error("Training failed, trying again with a different dataset...")
continue
else:
slog.info("Training succeeded!")
slog.info("Evaluating sentiment analyzer: %s", sa)
evaluation_results = sa.evaluate(evaluation_dataset_func=datasets.evaluation)
slog.info("Evaluation results: %s", evaluation_results)
successful_runs += 1
cumulative_evaluation_results += evaluation_results
break
slog.info("Cumulative evaluation results: %s", cumulative_evaluation_results)
slog.info("Cumulative evaluation results: %s", cumulative_evaluation_results)
if __name__ == "__main__":

View file

@ -23,7 +23,7 @@ def mongo_client_from_config() -> t.ContextManager[pymongo.MongoClient]:
yield client
log.info("Closing connection to MongoDB...")
log.debug("Closing connection to MongoDB...")
client.close()
log.debug("Closed connection to MongoDB!")