1
Fork 0
mirror of https://github.com/pds-nest/nest.git synced 2024-11-22 04:54:18 +00:00

🔧 Continue improving visualization

This commit is contained in:
Steffo 2021-05-20 12:15:13 +02:00
parent b471fc17c0
commit df4b9bb368
Signed by: steffo
GPG key ID: 6965406171929D01
6 changed files with 137 additions and 64 deletions

View file

@ -2,31 +2,95 @@ import React, { useMemo } from "react"
import FormLabelled from "../base/FormLabelled"
import FormLabel from "../base/formparts/FormLabel"
import BoxFullScrollable from "../base/BoxFullScrollable"
import tokenizeTweetWords from "../../utils/tokenizeTweetWords"
import tokenizeTweetWords from "../../utils/countTweetWords"
export default function BoxVisualizationStats({ tweets, totalTweetCount, ...props }) {
export default function BoxVisualizationStats({ tweets, words, totalTweetCount, ...props }) {
const words = useMemo(
() => tokenizeTweetWords(tweets),
const tweetCount = useMemo(
() => tweets.length,
[tweets]
)
const tweetCount = tweets.length
const tweetPct = tweetCount / totalTweetCount * 100
const tweetLocationCount = tweets.filter(tweet => tweet.location).length
const tweetLocationPct = tweetLocationCount / tweetCount * 100
const tweetContent = tweets.filter(tweet => tweet.content)
const tweetContentCount = tweetContent.length
const tweetContentPct = tweetContentCount / tweetCount * 100
const wordCount = words.map(word => word.value).reduce((a, b) => a+b)
const mostPopularWord = words.sort((wa, wb) => {
const tweetPct = useMemo(
() => tweetCount / totalTweetCount * 100,
[tweetCount, totalTweetCount]
)
const tweetLocationCount = useMemo(
() => tweets.filter(tweet => tweet.location).length,
[tweets]
)
const tweetLocationPct = useMemo(
() => tweetLocationCount / tweetCount * 100,
[tweetLocationCount, tweetCount]
)
const tweetContent = useMemo(
() => tweets.filter(tweet => tweet.content),
[tweets]
)
const tweetContentCount = useMemo(
() => tweetContent.length,
[tweetContent],
)
const tweetContentPct = useMemo(
() => tweetContentCount / tweetCount * 100,
[tweetContentCount, tweetCount],
)
console.debug(words)
const wordCount = useMemo(
() => words.map(word => word.value).reduce((a, b) => a+b),
[words]
)
const mostPopularWord = useMemo(
() => {
return words.sort((wa, wb) => {
if(wa.value > wb.value) return -1
if(wa.value < wb.value) return 1
return 0
})[0].text
const users = [...new Set(tweets.map(tweet => tweet.poster))]
const usersCount = users.length
},
[words]
)
const users = useMemo(
() => tweets.map(tweet => tweet.poster),
[tweets]
)
const uniqueUsers = useMemo(
() => [...new Set(users)],
[users]
)
const uniqueUsersCount = useMemo(
() => uniqueUsers.length,
[uniqueUsers]
)
const mostActiveUser = useMemo(
() => {
if(uniqueUsers.length === 0) return null
return uniqueUsers.map(user => {
return {
user: user,
count: tweets.filter(tweet => tweet.poster === user).length
}
}).sort((a, b) => {
if(a.count > b.count) return -1
if(a.count < b.count) return 1
return 0
})[0]
},
[uniqueUsers, tweets]
)
// TODO: tweets with picture count
// TODO: tweets with picture pct
@ -68,8 +132,11 @@ export default function BoxVisualizationStats({ tweets, totalTweetCount, ...prop
<FormLabel text={"% of tweets with image"}>
<b>🚧</b>
</FormLabel>
<FormLabel text={"Users count"}>
<b>{usersCount}</b>
<FormLabel text={"Unique posters"}>
<b>{uniqueUsersCount}</b>
</FormLabel>
<FormLabel text={"Most active user"}>
<b>{mostActiveUser.user} ({mostActiveUser.count} tweets)</b>
</FormLabel>
</FormLabelled>
</BoxFullScrollable>

View file

@ -1,17 +1,11 @@
import React, { useContext, useMemo } from "react"
import React, { useContext } from "react"
import BoxWordcloud from "../base/BoxWordcloud"
import ContextLanguage from "../../contexts/ContextLanguage"
import tokenizeTweetWords from "../../utils/tokenizeTweetWords"
export default function BoxVisualizationWordcloud({ tweets = [], ...props }) {
export default function BoxVisualizationWordcloud({ words, ...props }) {
const {strings} = useContext(ContextLanguage)
const words = useMemo(
() => tokenizeTweetWords(tweets),
[tweets]
)
return (
<BoxWordcloud header={strings.wordcloud} words={words} {...props}/>
)

View file

@ -17,6 +17,9 @@ import BoxVisualizationMap from "../components/interactive/BoxVisualizationMap"
import BoxVisualizationWordcloud from "../components/interactive/BoxVisualizationWordcloud"
import BoxFull from "../components/base/BoxFull"
import ContextLanguage from "../contexts/ContextLanguage"
import tokenizeTweetWords from "../utils/countTweetWords"
import countTweetWords from "../utils/countTweetWords"
import objectToWordcloudFormat from "../utils/objectToWordcloudFormat"
export default function PageRepository({ className, ...props }) {
@ -52,6 +55,11 @@ export default function PageRepository({ className, ...props }) {
)
const tweets = tweetsBv.resources && tweetsBv.error ? [] : tweetsBv.resources
const words = useMemo(
() => objectToWordcloudFormat(countTweetWords(tweets)),
[tweets]
)
let contents;
if(!repositoryBr.firstLoad || !tweetsBv.firstLoad) {
contents = <>
@ -88,6 +96,7 @@ export default function PageRepository({ className, ...props }) {
<BoxVisualizationWordcloud
className={Style.Wordcloud}
tweets={tweets}
words={words}
/>
: null}
{visualizationTab === "histogram" ?
@ -106,6 +115,7 @@ export default function PageRepository({ className, ...props }) {
<BoxVisualizationStats
className={Style.Wordcloud}
tweets={tweets}
words={words}
totalTweetCount={tweets.length}
/>
: null}

View file

@ -0,0 +1,24 @@
import sw from "stopword"
const stopwords = [...sw.it, ...sw.en, "rt"]
export default function countTweetWords(tweets = {}) {
let words = {}
for(const tweet of tweets) {
if(!tweet.content) {
continue
}
for(const word of tweet.content.toLowerCase().split(/\s+/)) {
if(stopwords.includes(word)) continue
if(word.startsWith("https://")) continue
if(!words.hasOwnProperty(word)) {
words[word] = 0
}
words[word] += 1
}
}
return words
}

View file

@ -0,0 +1,13 @@
export default function objectToWordcloudFormat(words) {
let result = []
for(const word in words) {
if(!words.hasOwnProperty(word)) {
continue
}
result.push({
text: word,
value: words[word]
})
}
return result
}

View file

@ -1,35 +0,0 @@
import sw from "stopword"
const stopwords = [...sw.it, ...sw.en, "rt"]
export default function(tweets = {}) {
let preprocessedWords = {}
for(const tweet of tweets) {
if(!tweet.content) {
continue
}
for(const word of tweet.content.toLowerCase().split(/\s+/)) {
if(stopwords.includes(word)) continue
if(word.startsWith("https://")) continue
if(!preprocessedWords.hasOwnProperty(word)) {
preprocessedWords[word] = 0
}
preprocessedWords[word] += 1
}
}
let processedWords = []
for(const word in preprocessedWords) {
if(!preprocessedWords.hasOwnProperty(word)) {
continue
}
processedWords.push({
text: word,
value: preprocessedWords[word]
})
}
return processedWords
}