bda-6-steffo/unimore_bda_6/__main__.py

import logging
import tensorflow

from .config import config, DATA_SET_SIZE
from .database import mongo_client_from_config, reviews_collection, sample_reviews_polar, sample_reviews_varied, store_cache, load_cache, delete_cache
from .analysis.nltk_sentiment import NLTKSentimentAnalyzer
from .analysis.tf_text import TensorflowSentimentAnalyzer
from .analysis.base import TrainingFailedError
from .tokenizer import LowercaseTokenizer
from .log import install_log_handler

log = logging.getLogger(__name__)


def main():
    if len(tensorflow.config.list_physical_devices(device_type="GPU")) == 0:
        log.warning("Tensorflow reports no GPU acceleration available.")
    else:
        log.debug("Tensorflow successfully found GPU acceleration!")

    try:
        delete_cache("./data/training")
        delete_cache("./data/evaluation")
    except FileNotFoundError:
        pass

    for dataset_func in [sample_reviews_polar, sample_reviews_varied]:
        for SentimentAnalyzer in [TensorflowSentimentAnalyzer, NLTKSentimentAnalyzer]:
            for Tokenizer in [
                # NLTKWordTokenizer,
                # PottsTokenizer,
                # PottsTokenizerWithNegation,
                LowercaseTokenizer,
            ]:
                while True:
                    try:
                        tokenizer = Tokenizer()
                        model = SentimentAnalyzer(tokenizer=tokenizer)

                        with mongo_client_from_config() as db:
                            log.debug("Finding the reviews MongoDB collection...")
                            collection = reviews_collection(db)

                            try:
                                training_cache = load_cache("./data/training")
                                evaluation_cache = load_cache("./data/evaluation")
                            except FileNotFoundError:
                                log.debug("Gathering datasets...")
                                reviews_training = dataset_func(collection=collection, amount=DATA_SET_SIZE.__wrapped__)
                                reviews_evaluation = dataset_func(collection=collection, amount=DATA_SET_SIZE.__wrapped__)

                                log.debug("Caching datasets...")
                                store_cache(reviews_training, "./data/training")
                                store_cache(reviews_evaluation, "./data/evaluation")
                                del reviews_training
                                del reviews_evaluation

                                training_cache = load_cache("./data/training")
                                evaluation_cache = load_cache("./data/evaluation")
                                log.debug("Caches stored and loaded successfully!")
                            else:
                                log.debug("Caches loaded successfully!")

                            log.info("Training model: %s", model)
                            model.train(training_cache)
                            log.info("Evaluating model: %s", model)
                            evaluation_results = model.evaluate(evaluation_cache)
                            log.info("%s", evaluation_results)

                    except TrainingFailedError:
                        log.error("Training failed, restarting with a different dataset.")
                        continue
                    else:
                        log.info("Training")
                        break
                    finally:
                        delete_cache("./data/training")
                        delete_cache("./data/evaluation")


if __name__ == "__main__":
    install_log_handler()
    config.proxies.resolve()
    main()
Implement basic Potts sentiment analyzer 2023-02-02 03:34:05 +00:00			`import logging`
stop here for now 2023-02-04 00:36:42 +00:00			`import tensorflow`
Implement basic Potts sentiment analyzer 2023-02-02 03:34:05 +00:00
Make some more progress for the night Many things still do not work properly 2023-02-02 04:01:31 +00:00			`from .config import config, DATA_SET_SIZE`
stuff's working 2023-02-08 09:54:14 +00:00			`from .database import mongo_client_from_config, reviews_collection, sample_reviews_polar, sample_reviews_varied, store_cache, load_cache, delete_cache`
New version working nicely 2023-02-03 22:27:44 +00:00			`from .analysis.nltk_sentiment import NLTKSentimentAnalyzer`
stop here for now 2023-02-04 00:36:42 +00:00			`from .analysis.tf_text import TensorflowSentimentAnalyzer`
stuff's working 2023-02-08 09:54:14 +00:00			`from .analysis.base import TrainingFailedError`
			`from .tokenizer import LowercaseTokenizer`
Make some progress 2023-02-01 16:46:25 +00:00			`from .log import install_log_handler`
Second commit 2023-02-01 03:20:09 +00:00
Implement basic Potts sentiment analyzer 2023-02-02 03:34:05 +00:00			`log = logging.getLogger(__name__)`

First commit 2023-02-01 01:33:42 +00:00
			`def main():`
stop here for now 2023-02-04 00:36:42 +00:00			`if len(tensorflow.config.list_physical_devices(device_type="GPU")) == 0:`
			`log.warning("Tensorflow reports no GPU acceleration available.")`
			`else:`
			`log.debug("Tensorflow successfully found GPU acceleration!")`

stuff's working 2023-02-08 09:54:14 +00:00			`try:`
			`delete_cache("./data/training")`
			`delete_cache("./data/evaluation")`
			`except FileNotFoundError:`
			`pass`

Getting closer... 2023-02-04 05:14:24 +00:00			`for dataset_func in [sample_reviews_polar, sample_reviews_varied]:`
Made good progress How does text vectorization in tensorflow work? 2023-02-05 16:40:22 +00:00			`for SentimentAnalyzer in [TensorflowSentimentAnalyzer, NLTKSentimentAnalyzer]:`
			`for Tokenizer in [`
			`# NLTKWordTokenizer,`
			`# PottsTokenizer,`
			`# PottsTokenizerWithNegation,`
			`LowercaseTokenizer,`
			`]:`
stuff's working 2023-02-08 09:54:14 +00:00			`while True:`
			`try:`
			`tokenizer = Tokenizer()`
			`model = SentimentAnalyzer(tokenizer=tokenizer)`
Made good progress How does text vectorization in tensorflow work? 2023-02-05 16:40:22 +00:00
stuff's working 2023-02-08 09:54:14 +00:00			`with mongo_client_from_config() as db:`
			`log.debug("Finding the reviews MongoDB collection...")`
			`collection = reviews_collection(db)`
Made good progress How does text vectorization in tensorflow work? 2023-02-05 16:40:22 +00:00
stuff's working 2023-02-08 09:54:14 +00:00			`try:`
			`training_cache = load_cache("./data/training")`
			`evaluation_cache = load_cache("./data/evaluation")`
			`except FileNotFoundError:`
			`log.debug("Gathering datasets...")`
			`reviews_training = dataset_func(collection=collection, amount=DATA_SET_SIZE.__wrapped__)`
			`reviews_evaluation = dataset_func(collection=collection, amount=DATA_SET_SIZE.__wrapped__)`
Made good progress How does text vectorization in tensorflow work? 2023-02-05 16:40:22 +00:00
stuff's working 2023-02-08 09:54:14 +00:00			`log.debug("Caching datasets...")`
			`store_cache(reviews_training, "./data/training")`
			`store_cache(reviews_evaluation, "./data/evaluation")`
			`del reviews_training`
			`del reviews_evaluation`
Made good progress How does text vectorization in tensorflow work? 2023-02-05 16:40:22 +00:00
stuff's working 2023-02-08 09:54:14 +00:00			`training_cache = load_cache("./data/training")`
			`evaluation_cache = load_cache("./data/evaluation")`
			`log.debug("Caches stored and loaded successfully!")`
			`else:`
			`log.debug("Caches loaded successfully!")`
Made good progress How does text vectorization in tensorflow work? 2023-02-05 16:40:22 +00:00
stuff's working 2023-02-08 09:54:14 +00:00			`log.info("Training model: %s", model)`
			`model.train(training_cache)`
			`log.info("Evaluating model: %s", model)`
			`evaluation_results = model.evaluate(evaluation_cache)`
			`log.info("%s", evaluation_results)`
Made good progress How does text vectorization in tensorflow work? 2023-02-05 16:40:22 +00:00
stuff's working 2023-02-08 09:54:14 +00:00			`except TrainingFailedError:`
			`log.error("Training failed, restarting with a different dataset.")`
			`continue`
			`else:`
			`log.info("Training")`
			`break`
			`finally:`
			`delete_cache("./data/training")`
			`delete_cache("./data/evaluation")`
First commit 2023-02-01 01:33:42 +00:00

			`if __name__ == "__main__":`
Make some progress 2023-02-01 16:46:25 +00:00			`install_log_handler()`
			`config.proxies.resolve()`
First commit 2023-02-01 01:33:42 +00:00			`main()`