1
Fork 0
mirror of https://github.com/Steffo99/sdmx-sandbox.git synced 2025-01-03 04:34:19 +00:00

First commit

This commit is contained in:
Steffo 2021-03-12 12:07:11 +01:00
commit a217b0ee17
Signed by: steffo
GPG key ID: 6965406171929D01
9 changed files with 420 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
venv/

8
.idea/.gitignore vendored Normal file
View file

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/

6
.idea/discord.xml Normal file
View file

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="DiscordProjectSettings">
<option name="show" value="PROJECT_FILES" />
</component>
</project>

View file

@ -0,0 +1,55 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="InconsistentLineSeparators" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="JupyterPackageInspection" enabled="true" level="ERROR" enabled_by_default="true" />
<inspection_tool class="LessResolvedByNameOnly" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="LessUnresolvedMixin" enabled="true" level="ERROR" enabled_by_default="true" />
<inspection_tool class="LessUnresolvedVariable" enabled="true" level="ERROR" enabled_by_default="true" />
<inspection_tool class="LongLine" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
<inspection_tool class="PoetryPackageVersion" enabled="true" level="ERROR" enabled_by_default="true" />
<inspection_tool class="ProblematicWhitespace" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="PyAbstractClassInspection" enabled="true" level="ERROR" enabled_by_default="true" />
<inspection_tool class="PyAttributeOutsideInitInspection" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="PyAugmentAssignmentInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="PyBroadExceptionInspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
<inspection_tool class="PyDocstringTypesInspection" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="PyDunderSlotsInspection" enabled="true" level="ERROR" enabled_by_default="true" />
<inspection_tool class="PyFinalInspection" enabled="true" level="ERROR" enabled_by_default="true" />
<inspection_tool class="PyFromFutureImportInspection" enabled="true" level="ERROR" enabled_by_default="true" />
<inspection_tool class="PyGlobalUndefinedInspection" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="PyInconsistentIndentationInspection" enabled="true" level="ERROR" enabled_by_default="true" />
<inspection_tool class="PyInterpreterInspection" enabled="true" level="ERROR" enabled_by_default="true" />
<inspection_tool class="PyMandatoryEncodingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="PyMissingTypeHintsInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="PyNestedDecoratorsInspection" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="PyNoneFunctionAssignmentInspection" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="false" level="ERROR" enabled_by_default="false">
<option name="ignoredPackages">
<value>
<list size="0" />
</value>
</option>
</inspection_tool>
<inspection_tool class="PyRelativeImportInspection" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="PyReturnFromInitInspection" enabled="true" level="ERROR" enabled_by_default="true" />
<inspection_tool class="PySetFunctionToLiteralInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="PyStringFormatInspection" enabled="true" level="ERROR" enabled_by_default="true" />
<inspection_tool class="PyTrailingSemicolonInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="PyUnnecessaryBackslashInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="RegExpAnonymousGroup" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="RegExpEscapedMetaCharacter" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="RegExpOctalEscape" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="RegExpRedundantEscape" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="RegExpRedundantNestedCharacterClass" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="RegExpRepeatedSpace" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="RegExpUnexpectedAnchor" enabled="true" level="ERROR" enabled_by_default="true" />
<inspection_tool class="ShellCheck" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
<option name="processCode" value="true" />
<option name="processLiterals" value="true" />
<option name="processComments" value="true" />
</inspection_tool>
</profile>
</component>

9
.idea/misc.xml Normal file
View file

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ClojureProjectResolveSettings">
<currentScheme>IDE</currentScheme>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_15" project-jdk-name="Python 3.9 (sdmx-sandbox)" project-jdk-type="Python SDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

8
.idea/modules.xml Normal file
View file

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/sdmx-sandbox.iml" filepath="$PROJECT_DIR$/sdmx-sandbox.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml Normal file
View file

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

316
pandasdmx.ipynb Normal file
View file

@ -0,0 +1,316 @@
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# PandaSDMX\n",
"\n",
"- [Documentazione aggiornata (v1.4.1)](https://pandasdmx.readthedocs.io/en/latest/)\n",
"- [Esempio approfondito (ma non troppo aggiornato)](https://pandasdmx.readthedocs.io/en/latest/walkthrough.html#sdmx-workflow)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"## Installazione\n",
"\n",
"- L'ultima versione non funziona con Pydantic 1.8.1 ma richiede 1.7 ([dr-leo/pandaSDMX#204](https://github.com/dr-leo/pandaSDMX/issues/204))"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 1,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pandasdmx in ./venv/lib/python3.9/site-packages (1.4.1)\r\n",
"Requirement already satisfied: pydantic==1.7 in ./venv/lib/python3.9/site-packages (1.7)\r\n",
"Requirement already satisfied: requests>=2.7 in ./venv/lib/python3.9/site-packages (from pandasdmx) (2.25.1)\r\n",
"Requirement already satisfied: lxml>=3.6 in ./venv/lib/python3.9/site-packages (from pandasdmx) (4.6.2)\r\n",
"Requirement already satisfied: pandas>=1.0 in ./venv/lib/python3.9/site-packages (from pandasdmx) (1.2.3)\r\n",
"Requirement already satisfied: pytz>=2017.3 in ./venv/lib/python3.9/site-packages (from pandas>=1.0->pandasdmx) (2021.1)\r\n",
"Requirement already satisfied: numpy>=1.16.5 in ./venv/lib/python3.9/site-packages (from pandas>=1.0->pandasdmx) (1.20.1)\r\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in ./venv/lib/python3.9/site-packages (from pandas>=1.0->pandasdmx) (2.8.1)\r\n",
"Requirement already satisfied: six>=1.5 in ./venv/lib/python3.9/site-packages (from python-dateutil>=2.7.3->pandas>=1.0->pandasdmx) (1.15.0)\r\n",
"Requirement already satisfied: idna<3,>=2.5 in ./venv/lib/python3.9/site-packages (from requests>=2.7->pandasdmx) (2.10)\r\n",
"Requirement already satisfied: certifi>=2017.4.17 in ./venv/lib/python3.9/site-packages (from requests>=2.7->pandasdmx) (2020.12.5)\r\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in ./venv/lib/python3.9/site-packages (from requests>=2.7->pandasdmx) (1.26.3)\r\n",
"Requirement already satisfied: chardet<5,>=3.0.2 in ./venv/lib/python3.9/site-packages (from requests>=2.7->pandasdmx) (4.0.0)\r\n"
]
}
],
"source": [
"!pip install pandasdmx pydantic==1.7"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"## Esempio"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/mnt/tera/ext4/code/sdmx-sandbox/venv/lib/python3.9/site-packages/pandasdmx/remote.py:11: RuntimeWarning: optional dependency requests_cache is not installed; cache options to Session() have no effect\n",
" warn(\n"
]
}
],
"source": [
"import pandas\n",
"import pandasdmx\n",
"\n",
"# Per type annotations\n",
"import pandasdmx.message\n",
"import pandasdmx.model\n",
"import pandasdmx.source"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"È possibile selezionare tra più fonti di dati, tra i quali Eurostat:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 3,
"outputs": [
{
"data": {
"text/plain": "<pandasdmx.api.Request at 0x7f19b7fcba30>"
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eurostat: pandasdmx.Request = pandasdmx.Request(\"ESTAT\")\n",
"eurostat"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"Sembra che PandaSDMX implementi la funzionalità che cercavamo di ricerca metadati:"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 4,
"outputs": [
{
"data": {
"text/plain": "<pandasdmx.StructureMessage>\n <Header>\n id: 'IDREF351597'\n prepared: '2021-03-12T10:31:59.307000+00:00'\n receiver: <Agency Unknown>\n sender: <Agency Unknown>\n source: \n test: False\n response: <Response [200]>\n DataflowDefinition (6573): DS-018995 DS-022469 DS-032655 DS-043227 DS...\n DataStructureDefinition (6573): DSD_DS-018995 DSD_DS-022469 DSD_DS-03..."
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Scarica i metadati di TUTTI dataflow disponibili su Eurostat\n",
"# Ci mette qualche minuto: i dataflow sono 6573!\n",
"flow_msg: pandasdmx.message.Message = eurostat.dataflow()\n",
"flow_msg"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"source": [
"# Convertiamo i risultati in due Series di pandas, una con i dataflow e una con la loro relativa struttura\n",
"_dict: dict[str, pandas.Series] = flow_msg.to_pandas()\n",
"dataflows = _dict[\"dataflow\"]\n",
"structure = _dict[\"structure\"]\n",
"dataflows, structure"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
},
"execution_count": 13,
"outputs": [
{
"data": {
"text/plain": "(DS-018995 EU trade since 1988 by SITC\n DS-022469 EXTRA EU trade since 1999 by mode of transport...\n DS-032655 EU trade since 1988 by BEC\n DS-043227 EFTA trade since 1995 by SITC\n DS-066341 Sold production, exports and imports by PRODCO...\n ... \n yth_incl_120 Young people living in households with very lo...\n yth_part_010 Frequency of getting together with relatives o...\n yth_part_020 Frequency of contacts with relatives or friend...\n yth_part_030 Participation of young people in activities of...\n yth_volunt_010 Participation of young people in informal volu...\n Length: 6573, dtype: object,\n DSD_DS-018995 \n DSD_DS-022469 \n DSD_DS-032655 \n DSD_DS-043227 \n DSD_DS-066341 \n ..\n DSD_yth_incl_120 \n DSD_yth_part_010 \n DSD_yth_part_020 \n DSD_yth_part_030 \n DSD_yth_volunt_010 \n Length: 6573, dtype: object)"
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"cell_type": "code",
"execution_count": 17,
"outputs": [
{
"data": {
"text/plain": "educ_enrl1ad Students by ISCED level, study intensity and sex\neduc_enrl1at Students by ISCED level, type of institution a...\neduc_enrl1tl Students by ISCED level, age and sex\neduc_enrl5 Tertiary students (ISCED 5-6) by field of educ...\neduc_enrl6 Tertiary students (ISCED 5-6) non-citizens, n...\neduc_enrl8 Tertiary students (ISCED 5-6) by country of ci...\neduc_enrllng1 Students in ISCED 1-3 by modern foreign langua...\neduc_enrllng2 Students in ISCED 1-3 by number of modern fore...\neduc_fiaid Financial aid to students\neduc_ilev Distribution of pupils/ students by level\neduc_iste Pupil/ student - teacher ratio and average cla...\neduc_mofo_dst Foreign students by level of education and cou...\neduc_mofo_fld Foreign students by level and field of education\neduc_mofo_gen Foreign students by level of education and sex\neduc_mofo_orig Foreign students by level of education and cou...\neduc_momo_dst Students going abroad by level of education an...\neduc_momo_fld Students from abroad by level and field of edu...\neduc_momo_gen Students from abroad by level of education and...\neduc_momo_orig Students from abroad by level of education and...\neduc_outc_pisa Underachieving 15-year-old students by sex and...\neduc_renrlrg1 Students by level of education, orientation, s...\neduc_renrlrg3 Students by age, sex and NUTS 2 regions\neduc_thmob Student mobility\neduc_uoe_enra01 Pupils and students enrolled by education leve...\neduc_uoe_enra02 Pupils and students enrolled by education leve...\neduc_uoe_enra03 Pupils and students enrolled by education leve...\neduc_uoe_enra04 Pupils and students by education level - as % ...\neduc_uoe_enra05 Pupils and students in education by age groups...\neduc_uoe_enra06 Pupils and students in education aged 30 and o...\neduc_uoe_enra07 Expected school years of pupils and students b...\neduc_uoe_enra08 Students in post-compulsory education - as % o...\neduc_uoe_enra09 Students participation at the end of compulsor...\neduc_uoe_enra11 Pupils and students enrolled by education leve...\neduc_uoe_enra12 Pupils and students enrolled by sex, age and N...\neduc_uoe_enra13 Distribution of pupils and students enrolled i...\neduc_uoe_enra16 Pupils and students enrolled by education leve...\neduc_uoe_enrt01 Students enrolled in tertiary education by edu...\neduc_uoe_enrt02 Students enrolled in tertiary education by edu...\neduc_uoe_enrt03 Students enrolled in tertiary education by edu...\neduc_uoe_enrt04 Distribution of students enrolled at tertiary ...\neduc_uoe_enrt05 Ratio of the proportion of tertiary students o...\neduc_uoe_enrt06 Students enrolled in tertiary education by edu...\neduc_uoe_enrt07 Students in tertiary education by age groups -...\neduc_uoe_enrt08 Students in tertiary education - as % of 20-24...\neduc_uoe_fina01 Financial aid to students by education level -...\neduc_uoe_fine09 Public expenditure on education per pupil/stud...\neduc_uoe_fine10 Pupils and students enrolled by education leve...\neduc_uoe_fini04 Annual expenditure on educational institutions...\neduc_uoe_fini06 Ratio of annual expenditure per student at the...\neduc_uoe_mobs01 Mobile students from abroad enrolled by educat...\neduc_uoe_mobs02 Mobile students from abroad enrolled by educat...\neduc_uoe_mobs03 Share of mobile students from abroad enrolled ...\neduc_uoe_mobs04 Distribution of mobile students from abroad en...\neduc_uoe_perp04 Ratio of pupils and students to teachers and a...\nhrst_fl_tefor Participation of foreign students in tertiary ...\ntsc00028 Doctorate students in science and technology f...\ndtype: object"
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Cerchiamo nella Series i dataflows la cui descrizione contiene \"student\"\n",
"# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.str.contains.html\n",
"student_dataflows = dataflows[dataflows.str.contains(\"student\", case=False)]\n",
"student_dataflows"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 33,
"outputs": [
{
"data": {
"text/plain": "(<DataflowDefinition ESTAT:educ_enrl1at(1.0): Students by ISCED level, type of institution and study intensity>,\n <DataStructureDefinition ESTAT:DSD_educ_enrl1at(1.0)>)"
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Prendiamo il primo e andiamo a scaricare i dati corrispondenti\n",
"dataflow_label = student_dataflows.index[0]\n",
"data_msg: pandasdmx.message.Message = eurostat.dataflow(dataflow_label)\n",
"dataflow: pandasdmx.model.DataflowDefinition = data_msg.dataflow[dataflow_label]\n",
"structure: pandasdmx.source.DataStructureDefinition = dataflow.structure\n",
"dataflow, structure\n",
"\n",
"# Si possono usare i dati della struttura richiesti in precedenza?"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 36,
"outputs": [
{
"data": {
"text/plain": "([], [], [], [])"
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Analizziamo la struttura dei dati\n",
"# È composta da:\n",
"# - dimensioni\n",
"# - attributi\n",
"# - misure\n",
"# Cosa cambia tra uno e l'altro?\n",
"\n",
"structure.dimensions.components, structure.attributes.components, structure.measures.components, structure.annotations"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

11
sdmx-sandbox.iml Normal file
View file

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>