mirror of
https://github.com/Steffo99/unimore-bda-4.git
synced 2024-11-21 07:34:19 +00:00
First commit
This commit is contained in:
commit
c0d502f50d
34 changed files with 1449 additions and 0 deletions
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
/data/cratesio
|
||||
/data/neo4j/data
|
||||
/data/neo4j/logs
|
||||
/data/neo4j/run
|
||||
/data/neo4j/plugins
|
||||
/data/neo4j/import
|
8
.idea/.gitignore
vendored
Normal file
8
.idea/.gitignore
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
9
.idea/libraries/apoc_full.xml
Normal file
9
.idea/libraries/apoc_full.xml
Normal file
|
@ -0,0 +1,9 @@
|
|||
<component name="libraryTable">
|
||||
<library name="apoc-full">
|
||||
<CLASSES>
|
||||
<root url="jar://$PROJECT_DIR$/data/neo4j/plugins/apoc-full.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
</library>
|
||||
</component>
|
10
.idea/misc.xml
Normal file
10
.idea/misc.xml
Normal file
|
@ -0,0 +1,10 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="DiscordProjectSettings">
|
||||
<option name="show" value="ASK" />
|
||||
<option name="description" value="" />
|
||||
</component>
|
||||
<component name="ProjectRootManager">
|
||||
<output url="file://$PROJECT_DIR$/out" />
|
||||
</component>
|
||||
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
|
@ -0,0 +1,8 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/unimore-bda-4.iml" filepath="$PROJECT_DIR$/.idea/unimore-bda-4.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
9
.idea/unimore-bda-4.iml
Normal file
9
.idea/unimore-bda-4.iml
Normal file
|
@ -0,0 +1,9 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
4
.vscode/launch.json
vendored
Normal file
4
.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": []
|
||||
}
|
15
.vscode/tasks.json
vendored
Normal file
15
.vscode/tasks.json
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"label": "Neo4J",
|
||||
"icon": {
|
||||
"id": "database"
|
||||
},
|
||||
"type": "shell",
|
||||
"command": "${workspaceFolder}/scripts/run-db.sh",
|
||||
"problemMatcher": [],
|
||||
"isBackground": true,
|
||||
}
|
||||
]
|
||||
}
|
1
data/neo4j
Symbolic link
1
data/neo4j
Symbolic link
|
@ -0,0 +1 @@
|
|||
/home/steffo/.config/Neo4j Desktop/Application/relate-data/dbmss/dbms-de822d9c-8f2e-4f04-9646-ac8f2fb719c6/
|
BIN
media/cratesio-categories.png
Normal file
BIN
media/cratesio-categories.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.7 KiB |
BIN
media/cratesio-keywords.png
Normal file
BIN
media/cratesio-keywords.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 29 KiB |
1
media/query-categorygraph-solution.svg
Normal file
1
media/query-categorygraph-solution.svg
Normal file
File diff suppressed because one or more lines are too long
1
media/query-randfull-solution.svg
Normal file
1
media/query-randfull-solution.svg
Normal file
File diff suppressed because one or more lines are too long
1
media/query-selfdeps-solution.svg
Normal file
1
media/query-selfdeps-solution.svg
Normal file
File diff suppressed because one or more lines are too long
1
media/query-serdedeps-plan.svg
Normal file
1
media/query-serdedeps-plan.svg
Normal file
File diff suppressed because one or more lines are too long
1
media/query-serdedeps-solution.svg
Normal file
1
media/query-serdedeps-solution.svg
Normal file
File diff suppressed because one or more lines are too long
1
media/query-steffocrates-solution.svg
Normal file
1
media/query-steffocrates-solution.svg
Normal file
File diff suppressed because one or more lines are too long
6
scripts/alter-default-password.sh
Executable file
6
scripts/alter-default-password.sh
Executable file
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/env bash
|
||||
export NEO4J_USERNAME="neo4j"
|
||||
export NEO4J_PASSWORD="neo4j"
|
||||
|
||||
echo "Altering password..."
|
||||
cypher-shell --database="system" --non-interactive --fail-fast 'ALTER CURRENT USER SET PASSWORD FROM "neo4j" TO "unimore-big-data-analytics-4"'
|
7
scripts/create-neo4j-desktop-link.sh
Executable file
7
scripts/create-neo4j-desktop-link.sh
Executable file
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env bash
|
||||
repo=$(git rev-parse --show-toplevel)
|
||||
unlink "$repo/data/neo4j"
|
||||
ln -s "$1" "$repo/data/neo4j"
|
||||
|
||||
# Example call:
|
||||
# ./create-neo4j-desktop-link.sh "/home/steffo/.config/Neo4j Desktop/Application/relate-data/dbmss/dbms-13367bfc-b56d-418c-a9bd-c8c3932e1e0e"
|
15
scripts/fixup-data-files.sh
Executable file
15
scripts/fixup-data-files.sh
Executable file
|
@ -0,0 +1,15 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
repo=$(git rev-parse --show-toplevel)
|
||||
cwd=$(pwd)
|
||||
data_files=$(ls $repo/data/cratesio/*/data/*.csv)
|
||||
|
||||
cd "$repo"
|
||||
|
||||
for file in $data_files; do
|
||||
echo "Fixing data file $file..."
|
||||
basefilename=$(basename $file)
|
||||
sed --expression='s=\\=\\\\=g' $file > "$repo/data/neo4j/import/$basefilename"
|
||||
done
|
||||
|
||||
cd "$cwd"
|
18
scripts/import-cratesio.sh
Executable file
18
scripts/import-cratesio.sh
Executable file
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
export NEO4J_USERNAME="neo4j"
|
||||
export NEO4J_PASSWORD="unimore-big-data-analytics-4"
|
||||
|
||||
repo=$(git rev-parse --show-toplevel)
|
||||
cwd=$(pwd)
|
||||
import_scripts=$(echo $repo/scripts/import-cratesio/$1*.cypher | sort)
|
||||
|
||||
cd "$repo"
|
||||
|
||||
for file in $import_scripts; do
|
||||
echo "Executing $file..."
|
||||
cypher-shell --fail-at-end --format verbose < $file
|
||||
done
|
||||
|
||||
cd "$cwd"
|
66
scripts/import-cratesio/1-crates.cypher
Normal file
66
scripts/import-cratesio/1-crates.cypher
Normal file
|
@ -0,0 +1,66 @@
|
|||
CREATE RANGE INDEX index_crate_id IF NOT EXISTS
|
||||
FOR (crate:Crate)
|
||||
ON (crate.id);
|
||||
|
||||
CREATE RANGE INDEX index_crate_downloads IF NOT EXISTS
|
||||
FOR (crate:Crate)
|
||||
ON (crate.downloads);
|
||||
|
||||
CREATE RANGE INDEX index_crate_created_at IF NOT EXISTS
|
||||
FOR (crate:Crate)
|
||||
ON (crate.created_at);
|
||||
|
||||
CREATE RANGE INDEX index_crate_updated_at IF NOT EXISTS
|
||||
FOR (crate:Crate)
|
||||
ON (crate.updated_at);
|
||||
|
||||
CREATE TEXT INDEX index_crate_name IF NOT EXISTS
|
||||
FOR (crate:Crate)
|
||||
ON (crate.name);
|
||||
|
||||
LOAD CSV WITH HEADERS FROM "file:///crates.csv" AS line FIELDTERMINATOR ","
|
||||
CALL {
|
||||
WITH line
|
||||
MERGE (crate:Crate { id: toInteger(line.id) })
|
||||
SET
|
||||
crate.created_at = apoc.date.parse(line.created_at, "ms", "yyyy-MM-dd HH:mm:ss"),
|
||||
crate.updated_at = apoc.date.parse(line.updated_at, "ms", "yyyy-MM-dd HH:mm:ss"),
|
||||
crate.max_upload_size = toInteger(line.max_upload_size),
|
||||
crate.downloads = toInteger(line.downloads),
|
||||
crate.description = CASE trim(line.description)
|
||||
WHEN ""
|
||||
THEN null
|
||||
ELSE
|
||||
line.description
|
||||
END,
|
||||
crate.documentation = CASE trim(line.documentation)
|
||||
WHEN ""
|
||||
THEN null
|
||||
ELSE
|
||||
line.documentation
|
||||
END,
|
||||
crate.homepage = CASE trim(line.homepage)
|
||||
WHEN ""
|
||||
THEN null
|
||||
ELSE
|
||||
line.homepage
|
||||
END,
|
||||
crate.name = CASE trim(line.name)
|
||||
WHEN ""
|
||||
THEN null
|
||||
ELSE
|
||||
line.name
|
||||
END,
|
||||
crate.readme = CASE trim(line.readme)
|
||||
WHEN ""
|
||||
THEN null
|
||||
ELSE
|
||||
line.readme
|
||||
END,
|
||||
crate.repository = CASE trim(line.repository)
|
||||
WHEN ""
|
||||
THEN null
|
||||
ELSE
|
||||
line.repository
|
||||
END
|
||||
} IN TRANSACTIONS OF 10000 ROWS;
|
13
scripts/import-cratesio/2-keywords.cypher
Normal file
13
scripts/import-cratesio/2-keywords.cypher
Normal file
|
@ -0,0 +1,13 @@
|
|||
CREATE RANGE INDEX index_keyword_id IF NOT EXISTS
|
||||
FOR (keyword:Keyword)
|
||||
ON (keyword.id);
|
||||
|
||||
CREATE TEXT INDEX index_keyword_name IF NOT EXISTS
|
||||
FOR (keyword:Keyword)
|
||||
ON (keyword.name);
|
||||
|
||||
LOAD CSV WITH HEADERS FROM "file:///keywords.csv" AS line FIELDTERMINATOR ","
|
||||
MERGE (keyword:Keyword { id: toInteger(line.id) })
|
||||
SET
|
||||
keyword.created_at = apoc.date.parse(line.created_at, "ms", "yyyy-MM-dd HH:mm:ss"),
|
||||
keyword.name = line.keyword;
|
8
scripts/import-cratesio/3-crates_keywords.cypher
Normal file
8
scripts/import-cratesio/3-crates_keywords.cypher
Normal file
|
@ -0,0 +1,8 @@
|
|||
MATCH (:Crate)-[relation:IS_TAGGED_WITH]->(:Keyword)
|
||||
DELETE relation;
|
||||
|
||||
LOAD CSV WITH HEADERS FROM "file:///crates_keywords.csv" AS line FIELDTERMINATOR ","
|
||||
MATCH
|
||||
(crate:Crate {id: toInteger(line.crate_id)}),
|
||||
(keyword:Keyword {id: toInteger(line.keyword_id)})
|
||||
CREATE (crate)-[:IS_TAGGED_WITH]->(keyword);
|
50
scripts/import-cratesio/4-categories.cypher
Normal file
50
scripts/import-cratesio/4-categories.cypher
Normal file
|
@ -0,0 +1,50 @@
|
|||
CREATE RANGE INDEX index_category_id IF NOT EXISTS
|
||||
FOR (category:Category)
|
||||
ON (category.id);
|
||||
|
||||
CREATE TEXT INDEX index_category_name IF NOT EXISTS
|
||||
FOR (category:Category)
|
||||
ON (category.name);
|
||||
|
||||
CREATE TEXT INDEX index_category_slug IF NOT EXISTS
|
||||
FOR (category:Category)
|
||||
ON (category.slug);
|
||||
|
||||
CREATE TEXT INDEX index_category_leaf IF NOT EXISTS
|
||||
FOR (category:Category)
|
||||
ON (category.leaf);
|
||||
|
||||
MATCH (category:Category)
|
||||
DETACH DELETE category;
|
||||
|
||||
CREATE (
|
||||
:Category {
|
||||
name: "Root",
|
||||
created_at: datetime(),
|
||||
description: "Root category. Does not contain any category by itself.",
|
||||
id: 0,
|
||||
path: "root",
|
||||
slug: "root"
|
||||
}
|
||||
);
|
||||
|
||||
LOAD CSV WITH HEADERS FROM "file:///categories.csv" AS line
|
||||
CREATE (
|
||||
:Category {
|
||||
name: line.category,
|
||||
created_at: apoc.date.parse(line.created_at, "ms", "yyyy-MM-dd HH:mm:ss"),
|
||||
description: line.description,
|
||||
id: toInteger(line.id),
|
||||
path: line.path,
|
||||
slug: line.slug
|
||||
}
|
||||
);
|
||||
|
||||
MATCH (c:Category)
|
||||
WITH c, split(c.path, ".") AS path
|
||||
SET c.leaf = path[-1];
|
||||
|
||||
MATCH (c:Category)
|
||||
WITH c, split(c.path, ".") AS path
|
||||
MATCH (d:Category {leaf: path[-2]})
|
||||
CREATE (d)-[:CONTAINS]->(c);
|
8
scripts/import-cratesio/5-crates_categories.cypher
Normal file
8
scripts/import-cratesio/5-crates_categories.cypher
Normal file
|
@ -0,0 +1,8 @@
|
|||
MATCH (:Category)-[relation:CONTAINS]->(:Crate)
|
||||
DELETE relation;
|
||||
|
||||
LOAD CSV WITH HEADERS FROM "file:///crates_categories.csv" AS line FIELDTERMINATOR ","
|
||||
MATCH
|
||||
(crate:Crate {id: toInteger(line.crate_id)}),
|
||||
(category:Category {id: toInteger(line.category_id)})
|
||||
CREATE (category)-[:CONTAINS]->(crate);
|
23
scripts/import-cratesio/6-users.cypher
Normal file
23
scripts/import-cratesio/6-users.cypher
Normal file
|
@ -0,0 +1,23 @@
|
|||
CREATE RANGE INDEX index_user_id IF NOT EXISTS
|
||||
FOR (user:User)
|
||||
ON (user.id);
|
||||
|
||||
CREATE RANGE INDEX index_user_ghid IF NOT EXISTS
|
||||
FOR (user:User)
|
||||
ON (user.gh_id);
|
||||
|
||||
CREATE TEXT INDEX index_user_name IF NOT EXISTS
|
||||
FOR (user:User)
|
||||
ON (user.name);
|
||||
|
||||
CREATE TEXT INDEX index_user_fullname IF NOT EXISTS
|
||||
FOR (user:User)
|
||||
ON (user.full_name);
|
||||
|
||||
LOAD CSV WITH HEADERS FROM "file:///users.csv" AS line FIELDTERMINATOR ","
|
||||
MERGE (user:User { id: toInteger(line.id) })
|
||||
SET
|
||||
user.avatar = line.gh_avatar,
|
||||
user.gh_id = toInteger(line.gh_id),
|
||||
user.name = line.gh_login,
|
||||
user.full_name = line.name;
|
11
scripts/import-cratesio/7-crate_owners.cypher
Normal file
11
scripts/import-cratesio/7-crate_owners.cypher
Normal file
|
@ -0,0 +1,11 @@
|
|||
MATCH (:User)-[owns:OWNS]->(:Crate)
|
||||
DELETE owns;
|
||||
|
||||
LOAD CSV WITH HEADERS FROM "file:///crate_owners.csv" AS line FIELDTERMINATOR ","
|
||||
MATCH (crate:Crate { id: toInteger(line.crate_id) })
|
||||
MATCH (owner:User { id: toInteger(line.owner_id) })
|
||||
CREATE (owner)-[ownership:OWNS {
|
||||
created_at: apoc.date.parse(line.created_at, "ms", "yyyy-MM-dd HH:mm:ss"),
|
||||
created_by: toInteger(line.created_by),
|
||||
owner_kind: toInteger(line.owner_kind)
|
||||
}]->(crate);
|
50
scripts/import-cratesio/8-versions.cypher
Normal file
50
scripts/import-cratesio/8-versions.cypher
Normal file
|
@ -0,0 +1,50 @@
|
|||
CREATE LOOKUP INDEX index_version_checksum IF NOT EXISTS
|
||||
FOR (version:Version)
|
||||
ON (version.checksum);
|
||||
|
||||
CREATE RANGE INDEX index_version_size IF NOT EXISTS
|
||||
FOR (version:Version)
|
||||
ON (version.size);
|
||||
|
||||
CREATE RANGE INDEX index_version_created_at IF NOT EXISTS
|
||||
FOR (version:Version)
|
||||
ON (version.created_at);
|
||||
|
||||
CREATE RANGE INDEX index_version_downloads IF NOT EXISTS
|
||||
FOR (version:Version)
|
||||
ON (version.downloads);
|
||||
|
||||
CREATE RANGE INDEX index_version_id IF NOT EXISTS
|
||||
FOR (version:Version)
|
||||
ON (version.id);
|
||||
|
||||
CREATE TEXT INDEX index_version_name IF NOT EXISTS
|
||||
FOR (version:Version)
|
||||
ON (version.name);
|
||||
|
||||
LOAD CSV WITH HEADERS FROM "file:///versions.csv" AS line FIELDTERMINATOR ","
|
||||
CALL {
|
||||
WITH line
|
||||
MERGE (version:Version { id: toInteger(line.id) } )
|
||||
SET
|
||||
version.checksum = line.checksum,
|
||||
version.size = toInteger(line.crate_size),
|
||||
version.created_at = apoc.date.parse(line.created_at, "ms", "yyyy-MM-dd HH:mm:ss"),
|
||||
version.downloads = toInteger(line.downloads),
|
||||
version.license = line.license,
|
||||
version.features = line.features,
|
||||
version.links = line.links,
|
||||
version.name = line.num,
|
||||
version.is_yanked = CASE line.yanked
|
||||
WHEN "t"
|
||||
THEN true
|
||||
ELSE
|
||||
false
|
||||
END
|
||||
WITH line, version
|
||||
MATCH (crate:Crate { id: toInteger(line.crate_id) })
|
||||
MERGE (crate)-[:HAS_VERSION]->(version)
|
||||
WITH line, version
|
||||
MATCH (user:User { id: toInteger(line.published_by) })
|
||||
MERGE (user)-[:PUBLISHED]->(version)
|
||||
} IN TRANSACTIONS OF 10000 ROWS;
|
38
scripts/import-cratesio/9-dependencies.cypher
Normal file
38
scripts/import-cratesio/9-dependencies.cypher
Normal file
|
@ -0,0 +1,38 @@
|
|||
CREATE RANGE INDEX index_dependency_id IF NOT EXISTS
|
||||
FOR ()-[dependency:DEPENDS_ON]->()
|
||||
ON (dependency.id);
|
||||
|
||||
CREATE TEXT INDEX index_dependency_requirement IF NOT EXISTS
|
||||
FOR ()-[dependency:DEPENDS_ON]->()
|
||||
ON (dependency.requirement);
|
||||
|
||||
CREATE TEXT INDEX index_dependency_explicit_name IF NOT EXISTS
|
||||
FOR ()-[dependency:DEPENDS_ON]->()
|
||||
ON (dependency.explicit_name);
|
||||
|
||||
LOAD CSV WITH HEADERS FROM "file:///dependencies.csv" AS line FIELDTERMINATOR ","
|
||||
CALL {
|
||||
WITH line
|
||||
MATCH
|
||||
(version:Version { id: toInteger(line.version_id) }),
|
||||
(requirement:Crate { id: toInteger(line.crate_id) })
|
||||
MERGE (version)-[dependency:DEPENDS_ON]->(requirement)
|
||||
SET
|
||||
dependency.id = line.id,
|
||||
dependency.is_optional = CASE line.optional
|
||||
WHEN "t"
|
||||
THEN true
|
||||
ELSE
|
||||
false
|
||||
END,
|
||||
dependency.is_default = CASE line.default_features
|
||||
WHEN "t"
|
||||
THEN true
|
||||
ELSE
|
||||
false
|
||||
END,
|
||||
dependency.explicit_name = line.explicit_name,
|
||||
dependency.features = line.features,
|
||||
dependency.requirement = line.req,
|
||||
dependency.target = line.target
|
||||
} IN TRANSACTIONS OF 10000 ROWS;
|
4
scripts/run-db.sh
Executable file
4
scripts/run-db.sh
Executable file
|
@ -0,0 +1,4 @@
|
|||
#!/usr/bin/env bash
|
||||
repo=$(git rev-parse --show-toplevel)
|
||||
export NEO4J_HOME="$repo/data/neo4j"
|
||||
neo4j console
|
9
scripts/setup-apoc.sh
Executable file
9
scripts/setup-apoc.sh
Executable file
|
@ -0,0 +1,9 @@
|
|||
#!/usr/bin/env bash
|
||||
repo=$(git rev-parse --show-toplevel)
|
||||
|
||||
echo "Creating plugins directory..."
|
||||
mkdir --parents "$repo/data/neo4j/plugins"
|
||||
|
||||
echo "Installing Neo4j Apoc..."
|
||||
wget 'https://github.com/neo4j/apoc/releases/download/5.5.0/apoc-5.5.0-core.jar' --output-document="$repo/data/neo4j/plugins/apoc-core.jar"
|
||||
|
Loading…
Reference in a new issue