From 3abba24ca244f438732e00ed9b2185ac9cf79f78 Mon Sep 17 00:00:00 2001 From: Stefano Pigozzi Date: Sun, 5 Feb 2023 17:40:22 +0100 Subject: [PATCH] Made good progress How does text vectorization in tensorflow work? --- .gitignore | 2 + .idea/runConfigurations/unimore_bda_6.xml | 1 + unimore-bda-6.iml | 2 + unimore_bda_6/__main__.py | 76 +++++++------ unimore_bda_6/analysis/nltk_sentiment.py | 11 +- unimore_bda_6/analysis/tf_text.py | 48 ++++---- unimore_bda_6/database/__init__.py | 5 + unimore_bda_6/database/cache.py | 66 +++++++++++ unimore_bda_6/database/collections.py | 41 +++++++ unimore_bda_6/database/connection.py | 32 ++++++ unimore_bda_6/database/datatypes.py | 49 ++++++++ .../{database.py => database/queries.py} | 107 ++---------------- unimore_bda_6/tokenizer/lower.py | 4 +- 13 files changed, 286 insertions(+), 158 deletions(-) create mode 100644 unimore_bda_6/database/__init__.py create mode 100644 unimore_bda_6/database/cache.py create mode 100644 unimore_bda_6/database/collections.py create mode 100644 unimore_bda_6/database/connection.py create mode 100644 unimore_bda_6/database/datatypes.py rename unimore_bda_6/{database.py => database/queries.py} (57%) diff --git a/.gitignore b/.gitignore index 8467da3..25241c6 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,8 @@ data/raw/ data/db/ data/nltk/ +data/training/ +data/evaluation/ ################## # Python ignores # diff --git a/.idea/runConfigurations/unimore_bda_6.xml b/.idea/runConfigurations/unimore_bda_6.xml index f113c82..df78a5b 100644 --- a/.idea/runConfigurations/unimore_bda_6.xml +++ b/.idea/runConfigurations/unimore_bda_6.xml @@ -4,6 +4,7 @@