From df4b9bb36837da73d360eb3961c8ca75a0720314 Mon Sep 17 00:00:00 2001 From: Stefano Pigozzi Date: Thu, 20 May 2021 12:15:13 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=A7=20Continue=20improving=20visualiza?= =?UTF-8?q?tion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../interactive/BoxVisualizationStats.js | 109 ++++++++++++++---- .../interactive/BoxVisualizationWordcloud.js | 10 +- nest_frontend/routes/PageRepository.js | 10 ++ nest_frontend/utils/countTweetWords.js | 24 ++++ .../utils/objectToWordcloudFormat.js | 13 +++ nest_frontend/utils/tokenizeTweetWords.js | 35 ------ 6 files changed, 137 insertions(+), 64 deletions(-) create mode 100644 nest_frontend/utils/countTweetWords.js create mode 100644 nest_frontend/utils/objectToWordcloudFormat.js delete mode 100644 nest_frontend/utils/tokenizeTweetWords.js diff --git a/nest_frontend/components/interactive/BoxVisualizationStats.js b/nest_frontend/components/interactive/BoxVisualizationStats.js index 404bbce..f813ff0 100644 --- a/nest_frontend/components/interactive/BoxVisualizationStats.js +++ b/nest_frontend/components/interactive/BoxVisualizationStats.js @@ -2,31 +2,95 @@ import React, { useMemo } from "react" import FormLabelled from "../base/FormLabelled" import FormLabel from "../base/formparts/FormLabel" import BoxFullScrollable from "../base/BoxFullScrollable" -import tokenizeTweetWords from "../../utils/tokenizeTweetWords" +import tokenizeTweetWords from "../../utils/countTweetWords" -export default function BoxVisualizationStats({ tweets, totalTweetCount, ...props }) { +export default function BoxVisualizationStats({ tweets, words, totalTweetCount, ...props }) { - const words = useMemo( - () => tokenizeTweetWords(tweets), + const tweetCount = useMemo( + () => tweets.length, [tweets] ) - const tweetCount = tweets.length - const tweetPct = tweetCount / totalTweetCount * 100 - const tweetLocationCount = tweets.filter(tweet => tweet.location).length - const tweetLocationPct = tweetLocationCount / tweetCount * 100 - const tweetContent = tweets.filter(tweet => tweet.content) - const tweetContentCount = tweetContent.length - const tweetContentPct = tweetContentCount / tweetCount * 100 - const wordCount = words.map(word => word.value).reduce((a, b) => a+b) - const mostPopularWord = words.sort((wa, wb) => { - if(wa.value > wb.value) return -1 - if(wa.value < wb.value) return 1 - return 0 - })[0].text - const users = [...new Set(tweets.map(tweet => tweet.poster))] - const usersCount = users.length + const tweetPct = useMemo( + () => tweetCount / totalTweetCount * 100, + [tweetCount, totalTweetCount] + ) + + const tweetLocationCount = useMemo( + () => tweets.filter(tweet => tweet.location).length, + [tweets] + ) + + const tweetLocationPct = useMemo( + () => tweetLocationCount / tweetCount * 100, + [tweetLocationCount, tweetCount] + ) + + const tweetContent = useMemo( + () => tweets.filter(tweet => tweet.content), + [tweets] + ) + + const tweetContentCount = useMemo( + () => tweetContent.length, + [tweetContent], + ) + + const tweetContentPct = useMemo( + () => tweetContentCount / tweetCount * 100, + [tweetContentCount, tweetCount], + ) + + console.debug(words) + + const wordCount = useMemo( + () => words.map(word => word.value).reduce((a, b) => a+b), + [words] + ) + + const mostPopularWord = useMemo( + () => { + return words.sort((wa, wb) => { + if(wa.value > wb.value) return -1 + if(wa.value < wb.value) return 1 + return 0 + })[0].text + }, + [words] + ) + + const users = useMemo( + () => tweets.map(tweet => tweet.poster), + [tweets] + ) + + const uniqueUsers = useMemo( + () => [...new Set(users)], + [users] + ) + + const uniqueUsersCount = useMemo( + () => uniqueUsers.length, + [uniqueUsers] + ) + + const mostActiveUser = useMemo( + () => { + if(uniqueUsers.length === 0) return null + return uniqueUsers.map(user => { + return { + user: user, + count: tweets.filter(tweet => tweet.poster === user).length + } + }).sort((a, b) => { + if(a.count > b.count) return -1 + if(a.count < b.count) return 1 + return 0 + })[0] + }, + [uniqueUsers, tweets] + ) // TODO: tweets with picture count // TODO: tweets with picture pct @@ -68,8 +132,11 @@ export default function BoxVisualizationStats({ tweets, totalTweetCount, ...prop 🚧 - - {usersCount} + + {uniqueUsersCount} + + + {mostActiveUser.user} ({mostActiveUser.count} tweets) diff --git a/nest_frontend/components/interactive/BoxVisualizationWordcloud.js b/nest_frontend/components/interactive/BoxVisualizationWordcloud.js index 6806ff2..531d369 100644 --- a/nest_frontend/components/interactive/BoxVisualizationWordcloud.js +++ b/nest_frontend/components/interactive/BoxVisualizationWordcloud.js @@ -1,17 +1,11 @@ -import React, { useContext, useMemo } from "react" +import React, { useContext } from "react" import BoxWordcloud from "../base/BoxWordcloud" import ContextLanguage from "../../contexts/ContextLanguage" -import tokenizeTweetWords from "../../utils/tokenizeTweetWords" -export default function BoxVisualizationWordcloud({ tweets = [], ...props }) { +export default function BoxVisualizationWordcloud({ words, ...props }) { const {strings} = useContext(ContextLanguage) - const words = useMemo( - () => tokenizeTweetWords(tweets), - [tweets] - ) - return ( ) diff --git a/nest_frontend/routes/PageRepository.js b/nest_frontend/routes/PageRepository.js index 1aa2833..406c309 100644 --- a/nest_frontend/routes/PageRepository.js +++ b/nest_frontend/routes/PageRepository.js @@ -17,6 +17,9 @@ import BoxVisualizationMap from "../components/interactive/BoxVisualizationMap" import BoxVisualizationWordcloud from "../components/interactive/BoxVisualizationWordcloud" import BoxFull from "../components/base/BoxFull" import ContextLanguage from "../contexts/ContextLanguage" +import tokenizeTweetWords from "../utils/countTweetWords" +import countTweetWords from "../utils/countTweetWords" +import objectToWordcloudFormat from "../utils/objectToWordcloudFormat" export default function PageRepository({ className, ...props }) { @@ -52,6 +55,11 @@ export default function PageRepository({ className, ...props }) { ) const tweets = tweetsBv.resources && tweetsBv.error ? [] : tweetsBv.resources + const words = useMemo( + () => objectToWordcloudFormat(countTweetWords(tweets)), + [tweets] + ) + let contents; if(!repositoryBr.firstLoad || !tweetsBv.firstLoad) { contents = <> @@ -88,6 +96,7 @@ export default function PageRepository({ className, ...props }) { : null} {visualizationTab === "histogram" ? @@ -106,6 +115,7 @@ export default function PageRepository({ className, ...props }) { : null} diff --git a/nest_frontend/utils/countTweetWords.js b/nest_frontend/utils/countTweetWords.js new file mode 100644 index 0000000..41d892d --- /dev/null +++ b/nest_frontend/utils/countTweetWords.js @@ -0,0 +1,24 @@ +import sw from "stopword" + + +const stopwords = [...sw.it, ...sw.en, "rt"] + + +export default function countTweetWords(tweets = {}) { + let words = {} + for(const tweet of tweets) { + if(!tweet.content) { + continue + } + for(const word of tweet.content.toLowerCase().split(/\s+/)) { + if(stopwords.includes(word)) continue + if(word.startsWith("https://")) continue + + if(!words.hasOwnProperty(word)) { + words[word] = 0 + } + words[word] += 1 + } + } + return words +} diff --git a/nest_frontend/utils/objectToWordcloudFormat.js b/nest_frontend/utils/objectToWordcloudFormat.js new file mode 100644 index 0000000..373720f --- /dev/null +++ b/nest_frontend/utils/objectToWordcloudFormat.js @@ -0,0 +1,13 @@ +export default function objectToWordcloudFormat(words) { + let result = [] + for(const word in words) { + if(!words.hasOwnProperty(word)) { + continue + } + result.push({ + text: word, + value: words[word] + }) + } + return result +} \ No newline at end of file diff --git a/nest_frontend/utils/tokenizeTweetWords.js b/nest_frontend/utils/tokenizeTweetWords.js deleted file mode 100644 index ce48ff8..0000000 --- a/nest_frontend/utils/tokenizeTweetWords.js +++ /dev/null @@ -1,35 +0,0 @@ -import sw from "stopword" - - -const stopwords = [...sw.it, ...sw.en, "rt"] - - -export default function(tweets = {}) { - let preprocessedWords = {} - for(const tweet of tweets) { - if(!tweet.content) { - continue - } - for(const word of tweet.content.toLowerCase().split(/\s+/)) { - if(stopwords.includes(word)) continue - if(word.startsWith("https://")) continue - - if(!preprocessedWords.hasOwnProperty(word)) { - preprocessedWords[word] = 0 - } - preprocessedWords[word] += 1 - } - } - - let processedWords = [] - for(const word in preprocessedWords) { - if(!preprocessedWords.hasOwnProperty(word)) { - continue - } - processedWords.push({ - text: word, - value: preprocessedWords[word] - }) - } - return processedWords -} \ No newline at end of file