mirror of
https://github.com/pds-nest/nest.git
synced 2024-11-22 04:54:18 +00:00
🔧 Continue improving visualization
This commit is contained in:
parent
b471fc17c0
commit
df4b9bb368
6 changed files with 137 additions and 64 deletions
|
@ -2,31 +2,95 @@ import React, { useMemo } from "react"
|
|||
import FormLabelled from "../base/FormLabelled"
|
||||
import FormLabel from "../base/formparts/FormLabel"
|
||||
import BoxFullScrollable from "../base/BoxFullScrollable"
|
||||
import tokenizeTweetWords from "../../utils/tokenizeTweetWords"
|
||||
import tokenizeTweetWords from "../../utils/countTweetWords"
|
||||
|
||||
|
||||
export default function BoxVisualizationStats({ tweets, totalTweetCount, ...props }) {
|
||||
export default function BoxVisualizationStats({ tweets, words, totalTweetCount, ...props }) {
|
||||
|
||||
const words = useMemo(
|
||||
() => tokenizeTweetWords(tweets),
|
||||
const tweetCount = useMemo(
|
||||
() => tweets.length,
|
||||
[tweets]
|
||||
)
|
||||
|
||||
const tweetCount = tweets.length
|
||||
const tweetPct = tweetCount / totalTweetCount * 100
|
||||
const tweetLocationCount = tweets.filter(tweet => tweet.location).length
|
||||
const tweetLocationPct = tweetLocationCount / tweetCount * 100
|
||||
const tweetContent = tweets.filter(tweet => tweet.content)
|
||||
const tweetContentCount = tweetContent.length
|
||||
const tweetContentPct = tweetContentCount / tweetCount * 100
|
||||
const wordCount = words.map(word => word.value).reduce((a, b) => a+b)
|
||||
const mostPopularWord = words.sort((wa, wb) => {
|
||||
if(wa.value > wb.value) return -1
|
||||
if(wa.value < wb.value) return 1
|
||||
return 0
|
||||
})[0].text
|
||||
const users = [...new Set(tweets.map(tweet => tweet.poster))]
|
||||
const usersCount = users.length
|
||||
const tweetPct = useMemo(
|
||||
() => tweetCount / totalTweetCount * 100,
|
||||
[tweetCount, totalTweetCount]
|
||||
)
|
||||
|
||||
const tweetLocationCount = useMemo(
|
||||
() => tweets.filter(tweet => tweet.location).length,
|
||||
[tweets]
|
||||
)
|
||||
|
||||
const tweetLocationPct = useMemo(
|
||||
() => tweetLocationCount / tweetCount * 100,
|
||||
[tweetLocationCount, tweetCount]
|
||||
)
|
||||
|
||||
const tweetContent = useMemo(
|
||||
() => tweets.filter(tweet => tweet.content),
|
||||
[tweets]
|
||||
)
|
||||
|
||||
const tweetContentCount = useMemo(
|
||||
() => tweetContent.length,
|
||||
[tweetContent],
|
||||
)
|
||||
|
||||
const tweetContentPct = useMemo(
|
||||
() => tweetContentCount / tweetCount * 100,
|
||||
[tweetContentCount, tweetCount],
|
||||
)
|
||||
|
||||
console.debug(words)
|
||||
|
||||
const wordCount = useMemo(
|
||||
() => words.map(word => word.value).reduce((a, b) => a+b),
|
||||
[words]
|
||||
)
|
||||
|
||||
const mostPopularWord = useMemo(
|
||||
() => {
|
||||
return words.sort((wa, wb) => {
|
||||
if(wa.value > wb.value) return -1
|
||||
if(wa.value < wb.value) return 1
|
||||
return 0
|
||||
})[0].text
|
||||
},
|
||||
[words]
|
||||
)
|
||||
|
||||
const users = useMemo(
|
||||
() => tweets.map(tweet => tweet.poster),
|
||||
[tweets]
|
||||
)
|
||||
|
||||
const uniqueUsers = useMemo(
|
||||
() => [...new Set(users)],
|
||||
[users]
|
||||
)
|
||||
|
||||
const uniqueUsersCount = useMemo(
|
||||
() => uniqueUsers.length,
|
||||
[uniqueUsers]
|
||||
)
|
||||
|
||||
const mostActiveUser = useMemo(
|
||||
() => {
|
||||
if(uniqueUsers.length === 0) return null
|
||||
return uniqueUsers.map(user => {
|
||||
return {
|
||||
user: user,
|
||||
count: tweets.filter(tweet => tweet.poster === user).length
|
||||
}
|
||||
}).sort((a, b) => {
|
||||
if(a.count > b.count) return -1
|
||||
if(a.count < b.count) return 1
|
||||
return 0
|
||||
})[0]
|
||||
},
|
||||
[uniqueUsers, tweets]
|
||||
)
|
||||
|
||||
// TODO: tweets with picture count
|
||||
// TODO: tweets with picture pct
|
||||
|
@ -68,8 +132,11 @@ export default function BoxVisualizationStats({ tweets, totalTweetCount, ...prop
|
|||
<FormLabel text={"% of tweets with image"}>
|
||||
<b>🚧</b>
|
||||
</FormLabel>
|
||||
<FormLabel text={"Users count"}>
|
||||
<b>{usersCount}</b>
|
||||
<FormLabel text={"Unique posters"}>
|
||||
<b>{uniqueUsersCount}</b>
|
||||
</FormLabel>
|
||||
<FormLabel text={"Most active user"}>
|
||||
<b>{mostActiveUser.user} ({mostActiveUser.count} tweets)</b>
|
||||
</FormLabel>
|
||||
</FormLabelled>
|
||||
</BoxFullScrollable>
|
||||
|
|
|
@ -1,17 +1,11 @@
|
|||
import React, { useContext, useMemo } from "react"
|
||||
import React, { useContext } from "react"
|
||||
import BoxWordcloud from "../base/BoxWordcloud"
|
||||
import ContextLanguage from "../../contexts/ContextLanguage"
|
||||
import tokenizeTweetWords from "../../utils/tokenizeTweetWords"
|
||||
|
||||
|
||||
export default function BoxVisualizationWordcloud({ tweets = [], ...props }) {
|
||||
export default function BoxVisualizationWordcloud({ words, ...props }) {
|
||||
const {strings} = useContext(ContextLanguage)
|
||||
|
||||
const words = useMemo(
|
||||
() => tokenizeTweetWords(tweets),
|
||||
[tweets]
|
||||
)
|
||||
|
||||
return (
|
||||
<BoxWordcloud header={strings.wordcloud} words={words} {...props}/>
|
||||
)
|
||||
|
|
|
@ -17,6 +17,9 @@ import BoxVisualizationMap from "../components/interactive/BoxVisualizationMap"
|
|||
import BoxVisualizationWordcloud from "../components/interactive/BoxVisualizationWordcloud"
|
||||
import BoxFull from "../components/base/BoxFull"
|
||||
import ContextLanguage from "../contexts/ContextLanguage"
|
||||
import tokenizeTweetWords from "../utils/countTweetWords"
|
||||
import countTweetWords from "../utils/countTweetWords"
|
||||
import objectToWordcloudFormat from "../utils/objectToWordcloudFormat"
|
||||
|
||||
|
||||
export default function PageRepository({ className, ...props }) {
|
||||
|
@ -52,6 +55,11 @@ export default function PageRepository({ className, ...props }) {
|
|||
)
|
||||
const tweets = tweetsBv.resources && tweetsBv.error ? [] : tweetsBv.resources
|
||||
|
||||
const words = useMemo(
|
||||
() => objectToWordcloudFormat(countTweetWords(tweets)),
|
||||
[tweets]
|
||||
)
|
||||
|
||||
let contents;
|
||||
if(!repositoryBr.firstLoad || !tweetsBv.firstLoad) {
|
||||
contents = <>
|
||||
|
@ -88,6 +96,7 @@ export default function PageRepository({ className, ...props }) {
|
|||
<BoxVisualizationWordcloud
|
||||
className={Style.Wordcloud}
|
||||
tweets={tweets}
|
||||
words={words}
|
||||
/>
|
||||
: null}
|
||||
{visualizationTab === "histogram" ?
|
||||
|
@ -106,6 +115,7 @@ export default function PageRepository({ className, ...props }) {
|
|||
<BoxVisualizationStats
|
||||
className={Style.Wordcloud}
|
||||
tweets={tweets}
|
||||
words={words}
|
||||
totalTweetCount={tweets.length}
|
||||
/>
|
||||
: null}
|
||||
|
|
24
nest_frontend/utils/countTweetWords.js
Normal file
24
nest_frontend/utils/countTweetWords.js
Normal file
|
@ -0,0 +1,24 @@
|
|||
import sw from "stopword"
|
||||
|
||||
|
||||
const stopwords = [...sw.it, ...sw.en, "rt"]
|
||||
|
||||
|
||||
export default function countTweetWords(tweets = {}) {
|
||||
let words = {}
|
||||
for(const tweet of tweets) {
|
||||
if(!tweet.content) {
|
||||
continue
|
||||
}
|
||||
for(const word of tweet.content.toLowerCase().split(/\s+/)) {
|
||||
if(stopwords.includes(word)) continue
|
||||
if(word.startsWith("https://")) continue
|
||||
|
||||
if(!words.hasOwnProperty(word)) {
|
||||
words[word] = 0
|
||||
}
|
||||
words[word] += 1
|
||||
}
|
||||
}
|
||||
return words
|
||||
}
|
13
nest_frontend/utils/objectToWordcloudFormat.js
Normal file
13
nest_frontend/utils/objectToWordcloudFormat.js
Normal file
|
@ -0,0 +1,13 @@
|
|||
export default function objectToWordcloudFormat(words) {
|
||||
let result = []
|
||||
for(const word in words) {
|
||||
if(!words.hasOwnProperty(word)) {
|
||||
continue
|
||||
}
|
||||
result.push({
|
||||
text: word,
|
||||
value: words[word]
|
||||
})
|
||||
}
|
||||
return result
|
||||
}
|
|
@ -1,35 +0,0 @@
|
|||
import sw from "stopword"
|
||||
|
||||
|
||||
const stopwords = [...sw.it, ...sw.en, "rt"]
|
||||
|
||||
|
||||
export default function(tweets = {}) {
|
||||
let preprocessedWords = {}
|
||||
for(const tweet of tweets) {
|
||||
if(!tweet.content) {
|
||||
continue
|
||||
}
|
||||
for(const word of tweet.content.toLowerCase().split(/\s+/)) {
|
||||
if(stopwords.includes(word)) continue
|
||||
if(word.startsWith("https://")) continue
|
||||
|
||||
if(!preprocessedWords.hasOwnProperty(word)) {
|
||||
preprocessedWords[word] = 0
|
||||
}
|
||||
preprocessedWords[word] += 1
|
||||
}
|
||||
}
|
||||
|
||||
let processedWords = []
|
||||
for(const word in preprocessedWords) {
|
||||
if(!preprocessedWords.hasOwnProperty(word)) {
|
||||
continue
|
||||
}
|
||||
processedWords.push({
|
||||
text: word,
|
||||
value: preprocessedWords[word]
|
||||
})
|
||||
}
|
||||
return processedWords
|
||||
}
|
Loading…
Reference in a new issue