From df4b9bb36837da73d360eb3961c8ca75a0720314 Mon Sep 17 00:00:00 2001
From: Stefano Pigozzi <me@steffo.eu>
Date: Thu, 20 May 2021 12:15:13 +0200
Subject: [PATCH] =?UTF-8?q?=F0=9F=94=A7=20Continue=20improving=20visualiza?=
 =?UTF-8?q?tion?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../interactive/BoxVisualizationStats.js      | 109 ++++++++++++++----
 .../interactive/BoxVisualizationWordcloud.js  |  10 +-
 nest_frontend/routes/PageRepository.js        |  10 ++
 nest_frontend/utils/countTweetWords.js        |  24 ++++
 .../utils/objectToWordcloudFormat.js          |  13 +++
 nest_frontend/utils/tokenizeTweetWords.js     |  35 ------
 6 files changed, 137 insertions(+), 64 deletions(-)
 create mode 100644 nest_frontend/utils/countTweetWords.js
 create mode 100644 nest_frontend/utils/objectToWordcloudFormat.js
 delete mode 100644 nest_frontend/utils/tokenizeTweetWords.js

diff --git a/nest_frontend/components/interactive/BoxVisualizationStats.js b/nest_frontend/components/interactive/BoxVisualizationStats.js
index 404bbce..f813ff0 100644
--- a/nest_frontend/components/interactive/BoxVisualizationStats.js
+++ b/nest_frontend/components/interactive/BoxVisualizationStats.js
@@ -2,31 +2,95 @@ import React, { useMemo } from "react"
 import FormLabelled from "../base/FormLabelled"
 import FormLabel from "../base/formparts/FormLabel"
 import BoxFullScrollable from "../base/BoxFullScrollable"
-import tokenizeTweetWords from "../../utils/tokenizeTweetWords"
+import tokenizeTweetWords from "../../utils/countTweetWords"
 
 
-export default function BoxVisualizationStats({ tweets, totalTweetCount, ...props }) {
+export default function BoxVisualizationStats({ tweets, words, totalTweetCount, ...props }) {
 
-    const words = useMemo(
-        () => tokenizeTweetWords(tweets),
+    const tweetCount = useMemo(
+        () => tweets.length,
         [tweets]
     )
 
-    const tweetCount = tweets.length
-    const tweetPct = tweetCount / totalTweetCount * 100
-    const tweetLocationCount = tweets.filter(tweet => tweet.location).length
-    const tweetLocationPct = tweetLocationCount / tweetCount * 100
-    const tweetContent = tweets.filter(tweet => tweet.content)
-    const tweetContentCount = tweetContent.length
-    const tweetContentPct = tweetContentCount / tweetCount * 100
-    const wordCount = words.map(word => word.value).reduce((a, b) => a+b)
-    const mostPopularWord = words.sort((wa, wb) => {
-        if(wa.value > wb.value) return -1
-        if(wa.value < wb.value) return 1
-        return 0
-    })[0].text
-    const users = [...new Set(tweets.map(tweet => tweet.poster))]
-    const usersCount = users.length
+    const tweetPct = useMemo(
+        () => tweetCount / totalTweetCount * 100,
+        [tweetCount, totalTweetCount]
+    )
+
+    const tweetLocationCount = useMemo(
+        () => tweets.filter(tweet => tweet.location).length,
+        [tweets]
+    )
+
+    const tweetLocationPct = useMemo(
+        () => tweetLocationCount / tweetCount * 100,
+        [tweetLocationCount, tweetCount]
+    )
+
+    const tweetContent = useMemo(
+        () => tweets.filter(tweet => tweet.content),
+        [tweets]
+    )
+
+    const tweetContentCount = useMemo(
+        () => tweetContent.length,
+        [tweetContent],
+    )
+
+    const tweetContentPct = useMemo(
+        () => tweetContentCount / tweetCount * 100,
+        [tweetContentCount, tweetCount],
+    )
+
+    console.debug(words)
+
+    const wordCount = useMemo(
+        () => words.map(word => word.value).reduce((a, b) => a+b),
+        [words]
+    )
+
+    const mostPopularWord = useMemo(
+        () => {
+            return words.sort((wa, wb) => {
+                if(wa.value > wb.value) return -1
+                if(wa.value < wb.value) return 1
+                return 0
+            })[0].text
+        },
+        [words]
+    )
+
+    const users = useMemo(
+        () => tweets.map(tweet => tweet.poster),
+        [tweets]
+    )
+
+    const uniqueUsers = useMemo(
+        () => [...new Set(users)],
+        [users]
+    )
+
+    const uniqueUsersCount = useMemo(
+        () => uniqueUsers.length,
+        [uniqueUsers]
+    )
+
+    const mostActiveUser = useMemo(
+        () => {
+            if(uniqueUsers.length === 0) return null
+            return uniqueUsers.map(user => {
+                return {
+                    user: user,
+                    count: tweets.filter(tweet => tweet.poster === user).length
+                }
+            }).sort((a, b) => {
+                if(a.count > b.count) return -1
+                if(a.count < b.count) return 1
+                return 0
+            })[0]
+        },
+        [uniqueUsers, tweets]
+    )
 
     // TODO: tweets with picture count
     // TODO: tweets with picture pct
@@ -68,8 +132,11 @@ export default function BoxVisualizationStats({ tweets, totalTweetCount, ...prop
                 <FormLabel text={"% of tweets with image"}>
                     <b>🚧</b>
                 </FormLabel>
-                <FormLabel text={"Users count"}>
-                    <b>{usersCount}</b>
+                <FormLabel text={"Unique posters"}>
+                    <b>{uniqueUsersCount}</b>
+                </FormLabel>
+                <FormLabel text={"Most active user"}>
+                    <b>{mostActiveUser.user} ({mostActiveUser.count} tweets)</b>
                 </FormLabel>
             </FormLabelled>
         </BoxFullScrollable>
diff --git a/nest_frontend/components/interactive/BoxVisualizationWordcloud.js b/nest_frontend/components/interactive/BoxVisualizationWordcloud.js
index 6806ff2..531d369 100644
--- a/nest_frontend/components/interactive/BoxVisualizationWordcloud.js
+++ b/nest_frontend/components/interactive/BoxVisualizationWordcloud.js
@@ -1,17 +1,11 @@
-import React, { useContext, useMemo } from "react"
+import React, { useContext } from "react"
 import BoxWordcloud from "../base/BoxWordcloud"
 import ContextLanguage from "../../contexts/ContextLanguage"
-import tokenizeTweetWords from "../../utils/tokenizeTweetWords"
 
 
-export default function BoxVisualizationWordcloud({ tweets = [], ...props }) {
+export default function BoxVisualizationWordcloud({ words, ...props }) {
     const {strings} = useContext(ContextLanguage)
 
-    const words = useMemo(
-        () => tokenizeTweetWords(tweets),
-        [tweets]
-    )
-
     return (
         <BoxWordcloud header={strings.wordcloud} words={words} {...props}/>
     )
diff --git a/nest_frontend/routes/PageRepository.js b/nest_frontend/routes/PageRepository.js
index 1aa2833..406c309 100644
--- a/nest_frontend/routes/PageRepository.js
+++ b/nest_frontend/routes/PageRepository.js
@@ -17,6 +17,9 @@ import BoxVisualizationMap from "../components/interactive/BoxVisualizationMap"
 import BoxVisualizationWordcloud from "../components/interactive/BoxVisualizationWordcloud"
 import BoxFull from "../components/base/BoxFull"
 import ContextLanguage from "../contexts/ContextLanguage"
+import tokenizeTweetWords from "../utils/countTweetWords"
+import countTweetWords from "../utils/countTweetWords"
+import objectToWordcloudFormat from "../utils/objectToWordcloudFormat"
 
 
 export default function PageRepository({ className, ...props }) {
@@ -52,6 +55,11 @@ export default function PageRepository({ className, ...props }) {
     )
     const tweets = tweetsBv.resources && tweetsBv.error ? [] : tweetsBv.resources
 
+    const words = useMemo(
+        () => objectToWordcloudFormat(countTweetWords(tweets)),
+        [tweets]
+    )
+
     let contents;
     if(!repositoryBr.firstLoad || !tweetsBv.firstLoad) {
         contents = <>
@@ -88,6 +96,7 @@ export default function PageRepository({ className, ...props }) {
                 <BoxVisualizationWordcloud
                     className={Style.Wordcloud}
                     tweets={tweets}
+                    words={words}
                 />
             : null}
             {visualizationTab === "histogram" ?
@@ -106,6 +115,7 @@ export default function PageRepository({ className, ...props }) {
                 <BoxVisualizationStats
                     className={Style.Wordcloud}
                     tweets={tweets}
+                    words={words}
                     totalTweetCount={tweets.length}
                 />
             : null}
diff --git a/nest_frontend/utils/countTweetWords.js b/nest_frontend/utils/countTweetWords.js
new file mode 100644
index 0000000..41d892d
--- /dev/null
+++ b/nest_frontend/utils/countTweetWords.js
@@ -0,0 +1,24 @@
+import sw from "stopword"
+
+
+const stopwords = [...sw.it, ...sw.en, "rt"]
+
+
+export default function countTweetWords(tweets = {}) {
+    let words = {}
+    for(const tweet of tweets) {
+        if(!tweet.content) {
+            continue
+        }
+        for(const word of tweet.content.toLowerCase().split(/\s+/)) {
+            if(stopwords.includes(word)) continue
+            if(word.startsWith("https://")) continue
+
+            if(!words.hasOwnProperty(word)) {
+                words[word] = 0
+            }
+            words[word] += 1
+        }
+    }
+    return words
+}
diff --git a/nest_frontend/utils/objectToWordcloudFormat.js b/nest_frontend/utils/objectToWordcloudFormat.js
new file mode 100644
index 0000000..373720f
--- /dev/null
+++ b/nest_frontend/utils/objectToWordcloudFormat.js
@@ -0,0 +1,13 @@
+export default function objectToWordcloudFormat(words) {
+    let result = []
+    for(const word in words) {
+        if(!words.hasOwnProperty(word)) {
+            continue
+        }
+        result.push({
+            text: word,
+            value: words[word]
+        })
+    }
+    return result
+}
\ No newline at end of file
diff --git a/nest_frontend/utils/tokenizeTweetWords.js b/nest_frontend/utils/tokenizeTweetWords.js
deleted file mode 100644
index ce48ff8..0000000
--- a/nest_frontend/utils/tokenizeTweetWords.js
+++ /dev/null
@@ -1,35 +0,0 @@
-import sw from "stopword"
-
-
-const stopwords = [...sw.it, ...sw.en, "rt"]
-
-
-export default function(tweets = {}) {
-    let preprocessedWords = {}
-    for(const tweet of tweets) {
-        if(!tweet.content) {
-            continue
-        }
-        for(const word of tweet.content.toLowerCase().split(/\s+/)) {
-            if(stopwords.includes(word)) continue
-            if(word.startsWith("https://")) continue
-
-            if(!preprocessedWords.hasOwnProperty(word)) {
-                preprocessedWords[word] = 0
-            }
-            preprocessedWords[word] += 1
-        }
-    }
-
-    let processedWords = []
-    for(const word in preprocessedWords) {
-        if(!preprocessedWords.hasOwnProperty(word)) {
-            continue
-        }
-        processedWords.push({
-            text: word,
-            value: preprocessedWords[word]
-        })
-    }
-    return processedWords
-}
\ No newline at end of file