diff --git a/.github/workflows/lint-actions.yaml b/.github/workflows/lint-actions.yaml index 49acdd9..6b8be37 100644 --- a/.github/workflows/lint-actions.yaml +++ b/.github/workflows/lint-actions.yaml @@ -9,8 +9,7 @@ on: pull_request: jobs: - run-linters: - name: Black & mypy + Black: runs-on: ubuntu-latest steps: - name: Set up python @@ -28,10 +27,29 @@ jobs: virtualenvs-path: ~/local/share/virtualenvs - run: poetry install --without develop,doc,test - name: Run linters - uses: wearerequired/lint-action@v2 + run: | + black src tests + + mypy: + runs-on: ubuntu-latest + steps: + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 with: - black: true - mypy: true + python-version: '3.11' + - name: Check out Git repository + uses: actions/checkout@v3 + - name: Install and configure Poetry + uses: snok/install-poetry@v1 + with: + version: 1.4.2 + virtualenvs-create: false + virtualenvs-path: ~/local/share/virtualenvs + - run: poetry install --without develop,doc + - name: Run linters + run: | + mypy src tests ruff: runs-on: ubuntu-latest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2afced9..fba3ea3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,13 +25,13 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.0.277 + rev: v0.0.284 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - repo: https://github.com/psf/black - rev: 23.3.0 + rev: 23.7.0 hooks: - id: black args: [--config=pyproject.toml] @@ -40,7 +40,7 @@ repos: - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks - rev: v2.9.0 + rev: v2.10.0 hooks: - id: pretty-format-ini args: [--autofix] @@ -61,7 +61,7 @@ repos: - types-requests - repo: https://github.com/frnmst/md-toc - rev: 8.1.9 + rev: 8.2.0 hooks: - id: md-toc @@ -76,6 +76,6 @@ repos: - id: validate-html - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.23.2 + rev: 0.24.0 hooks: - id: check-github-workflows diff --git a/Jupyter/mongoDB/configuration.py b/Jupyter/mongoDB/configuration.py index c7dcded..477ecdc 100644 --- a/Jupyter/mongoDB/configuration.py +++ b/Jupyter/mongoDB/configuration.py @@ -1,5 +1,6 @@ -HOSTNAME="stagingdbtransparenzreg.ioappzs.mongodb.net" -DATABASE="transparenzregister" -PORT=None -USERNAME="db_user" -PASSWORD="secret_password" +"""Placholder to login int the FH db.""" +HOSTNAME = "stagingdbtransparenzreg.ioappzs.mongodb.net" +DATABASE = "transparenzregister" +PORT = None +USERNAME = "db_user" +PASSWORD = "secret_password" # noqa: S105 diff --git a/Jupyter/mongoDB/configurationFH.py b/Jupyter/mongoDB/configurationFH.py index 49493ec..1455831 100644 --- a/Jupyter/mongoDB/configurationFH.py +++ b/Jupyter/mongoDB/configurationFH.py @@ -1,5 +1,6 @@ -HOSTNAME="172.17.38.210" -DATABASE="transparenzregister" -PORT=30217 -USERNAME="root" -PASSWORD="secret_password" \ No newline at end of file +"""Placholder to login int the FH db.""" +HOSTNAME = "172.17.38.210" +DATABASE = "transparenzregister" +PORT = 30217 +USERNAME = "root" +PASSWORD = "secret_password" # noqa: S105 diff --git a/documentations/meeting-notes/Meeting_2023-08-03.md b/documentations/meeting-notes/Meeting_2023-08-03.md new file mode 100644 index 0000000..74cb8ca --- /dev/null +++ b/documentations/meeting-notes/Meeting_2023-08-03.md @@ -0,0 +1,27 @@ +# Weekly *X*: 03.08.2023 + +## Teilnehmer +- Prof. Arinir +- Tristan Nolde +- Tim Ronneburg (Protokollant) +- Sebastian Zeleny + +## Themen + +- Präsentieren der Ergebnisse der letzten Wochen: + - Named Entity Recognition + - Vorstellung Datenbank auf dem FH-Cluster: + - Mongo Connector + - Datenspeicherung auf dem Cluster +- Weitere Vorgehensweise: + - Idee: Kleine Workshops/Teams + - In 2er Teams die einzelnen Funktionen über Feature Branches erstellen + +## Abgeleitete Action Items + +| Action Item | Verantwortlicher | Deadline | +|-------------|------------------|-----------------| +| Mergen aller Branches zu jedem neuen Termin mit Herrn Arinir | Jeder | jedes Weekly | +| Erstellen der Pipelines | Sebastian, Tristan und Tim | nächstes Weekly | +| Erstellen der Development Datenbank-Instanzen je Entwickler | Sebastian, Tristan und Tim | nächstes Weekly | +| Anlegen der relationalen Postgres DB via Script auf den FH-Cluster | Sebastian, Tristan und Tim | nächstes Weekly | diff --git a/documentations/seminararbeiten/Verflechtungsanalyse/Graph.png b/documentations/seminararbeiten/Verflechtungsanalyse/Graph.png new file mode 100644 index 0000000..fb7f0ae Binary files /dev/null and b/documentations/seminararbeiten/Verflechtungsanalyse/Graph.png differ diff --git a/documentations/seminararbeiten/Verflechtungsanalyse/Verflechtungsanalyse des Transparenzregisters.pdf b/documentations/seminararbeiten/Verflechtungsanalyse/Verflechtungsanalyse des Transparenzregisters.pdf new file mode 100644 index 0000000..8fd8dbd Binary files /dev/null and b/documentations/seminararbeiten/Verflechtungsanalyse/Verflechtungsanalyse des Transparenzregisters.pdf differ diff --git a/documentations/seminararbeiten/Verflechtungsanalyse/companies.csv b/documentations/seminararbeiten/Verflechtungsanalyse/companies.csv new file mode 100644 index 0000000..0097ec9 --- /dev/null +++ b/documentations/seminararbeiten/Verflechtungsanalyse/companies.csv @@ -0,0 +1,12 @@ +id;label;type;branche +1;Porsche Automobil Holding;Company;Automobilhersteller +2;Volkswagen AG;Company;Automobilhersteller +3;Volkswagen;Company;Automobilhersteller +4;Audi;Company;Automobilhersteller +5;Seat;Company;Automobilhersteller +6;Skoda Auto;Company;Automobilhersteller +7;Porsche AG;Company;Automobilhersteller +8;Lamborghini;Company;Automobilhersteller +9;Bentley;Company;Automobilhersteller +10;Forvia;Company;Automobilzulieferer +11;Hella;Company;Automobilzulieferer \ No newline at end of file diff --git a/documentations/seminararbeiten/Verflechtungsanalyse/metrics/betweeness_networkx.html b/documentations/seminararbeiten/Verflechtungsanalyse/metrics/betweeness_networkx.html new file mode 100644 index 0000000..f4ac680 --- /dev/null +++ b/documentations/seminararbeiten/Verflechtungsanalyse/metrics/betweeness_networkx.html @@ -0,0 +1,180 @@ + + + + + + + + + +
+

+
+ + + + + + +
+

+
+ + + + + +
+ + +
+
+ + + +
+ + + + + \ No newline at end of file diff --git a/documentations/seminararbeiten/Verflechtungsanalyse/metrics/closeness_networkx.html b/documentations/seminararbeiten/Verflechtungsanalyse/metrics/closeness_networkx.html new file mode 100644 index 0000000..d177118 --- /dev/null +++ b/documentations/seminararbeiten/Verflechtungsanalyse/metrics/closeness_networkx.html @@ -0,0 +1,180 @@ + + + + + + + + + +
+

+
+ + + + + + +
+

+
+ + + + + +
+ + +
+
+ + + +
+ + + + + \ No newline at end of file diff --git a/documentations/seminararbeiten/Verflechtungsanalyse/metrics/degree_networkx.html b/documentations/seminararbeiten/Verflechtungsanalyse/metrics/degree_networkx.html new file mode 100644 index 0000000..57aade6 --- /dev/null +++ b/documentations/seminararbeiten/Verflechtungsanalyse/metrics/degree_networkx.html @@ -0,0 +1,180 @@ + + + + + + + + + +
+

+
+ + + + + + +
+

+
+ + + + + +
+ + +
+
+ + + +
+ + + + + \ No newline at end of file diff --git a/documentations/seminararbeiten/Verflechtungsanalyse/metrics/edges_path_networkx.html b/documentations/seminararbeiten/Verflechtungsanalyse/metrics/edges_path_networkx.html new file mode 100644 index 0000000..9e68207 --- /dev/null +++ b/documentations/seminararbeiten/Verflechtungsanalyse/metrics/edges_path_networkx.html @@ -0,0 +1,180 @@ + + + + + + + + + +
+

+
+ + + + + + +
+

+
+ + + + + +
+ + +
+
+ + + +
+ + + + + \ No newline at end of file diff --git a/documentations/seminararbeiten/Verflechtungsanalyse/metrics/eigenvector_networkx.html b/documentations/seminararbeiten/Verflechtungsanalyse/metrics/eigenvector_networkx.html new file mode 100644 index 0000000..8da2b79 --- /dev/null +++ b/documentations/seminararbeiten/Verflechtungsanalyse/metrics/eigenvector_networkx.html @@ -0,0 +1,180 @@ + + + + + + + + + +
+

+
+ + + + + + +
+

+
+ + + + + +
+ + +
+
+ + + +
+ + + + + \ No newline at end of file diff --git a/documentations/seminararbeiten/Verflechtungsanalyse/metrics/pagerank_networkx.html b/documentations/seminararbeiten/Verflechtungsanalyse/metrics/pagerank_networkx.html new file mode 100644 index 0000000..507039f --- /dev/null +++ b/documentations/seminararbeiten/Verflechtungsanalyse/metrics/pagerank_networkx.html @@ -0,0 +1,180 @@ + + + + + + + + + +
+

+
+ + + + + + +
+

+
+ + + + + +
+ + +
+
+ + + +
+ + + + + \ No newline at end of file diff --git a/documentations/seminararbeiten/Verflechtungsanalyse/mockup_verflechtungsanalyse_with_networkx.ipynb b/documentations/seminararbeiten/Verflechtungsanalyse/mockup_verflechtungsanalyse_with_networkx.ipynb new file mode 100644 index 0000000..7d07996 --- /dev/null +++ b/documentations/seminararbeiten/Verflechtungsanalyse/mockup_verflechtungsanalyse_with_networkx.ipynb @@ -0,0 +1,654 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Networkx und Pyvis - Minimal Working Example" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Referenzen: \n", + "- [Networkx Dokumentation](https://networkx.org/documentation/stable/)\n", + "- [Pyvis Dokumentation](https://pyvis.readthedocs.io/en/latest/index.html)\n", + "- [Introduction to Python for Humanists](https://python-textbook.pythonhumanities.com/06_sna/06_01_05_networkx_pyvis.html)\n", + "\n", + "\n", + "Networkx ist eine Python Bibliothek zur Erstellung und Analyse von Netzwerken. Pyvis ist eine Python Bibliothek zur interaktiven Visualisierung von Netzwerkgraphen. Beide können mit `pip` installiert werden. " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: networkx in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (3.0)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "[notice] A new release of pip is available: 23.1.1 -> 23.1.2\n", + "[notice] To update, run: python.exe -m pip install --upgrade pip\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pyvis in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (0.3.2)\n", + "Requirement already satisfied: ipython>=5.3.0 in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pyvis) (8.4.0)\n", + "Requirement already satisfied: jinja2>=2.9.6 in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pyvis) (3.1.2)\n", + "Requirement already satisfied: jsonpickle>=1.4.1 in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pyvis) (3.0.1)\n", + "Requirement already satisfied: networkx>=1.11 in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pyvis) (3.0)\n", + "Requirement already satisfied: backcall in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from ipython>=5.3.0->pyvis) (0.2.0)\n", + "Requirement already satisfied: decorator in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from ipython>=5.3.0->pyvis) (5.1.1)\n", + "Requirement already satisfied: jedi>=0.16 in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from ipython>=5.3.0->pyvis) (0.18.1)\n", + "Requirement already satisfied: matplotlib-inline in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from ipython>=5.3.0->pyvis) (0.1.3)\n", + "Requirement already satisfied: pickleshare in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from ipython>=5.3.0->pyvis) (0.7.5)\n", + "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from ipython>=5.3.0->pyvis) (3.0.30)\n", + "Requirement already satisfied: pygments>=2.4.0 in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from ipython>=5.3.0->pyvis) (2.12.0)\n", + "Requirement already satisfied: setuptools>=18.5 in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from ipython>=5.3.0->pyvis) (58.1.0)\n", + "Requirement already satisfied: stack-data in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from ipython>=5.3.0->pyvis) (0.3.0)\n", + "Requirement already satisfied: traitlets>=5 in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from ipython>=5.3.0->pyvis) (5.7.1)\n", + "Requirement already satisfied: colorama in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from ipython>=5.3.0->pyvis) (0.4.5)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from jinja2>=2.9.6->pyvis) (2.1.1)\n", + "Requirement already satisfied: parso<0.9.0,>=0.8.0 in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from jedi>=0.16->ipython>=5.3.0->pyvis) (0.8.3)\n", + "Requirement already satisfied: wcwidth in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=5.3.0->pyvis) (0.2.5)\n", + "Requirement already satisfied: executing in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from stack-data->ipython>=5.3.0->pyvis) (0.8.3)\n", + "Requirement already satisfied: asttokens in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from stack-data->ipython>=5.3.0->pyvis) (2.0.5)\n", + "Requirement already satisfied: pure-eval in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from stack-data->ipython>=5.3.0->pyvis) (0.2.2)\n", + "Requirement already satisfied: six in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from asttokens->stack-data->ipython>=5.3.0->pyvis) (1.16.0)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "[notice] A new release of pip is available: 23.1.1 -> 23.1.2\n", + "[notice] To update, run: python.exe -m pip install --upgrade pip\n" + ] + } + ], + "source": [ + "# install networkx and pyvis using pip\n", + "!pip install networkx\n", + "!pip install pyvis" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Panda Dataframe mit Beispieldaten\n", + "\n", + "Um ein Netzwerk aufbauen zu können, brauchen wir Daten für die Knoten (nodes) und Kanten (edges). Die Daten speichern wir jeweils in einem Panda Dataframe. Pandas kann ebenfalls mit `pip` installiert werden. " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pandas in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (1.4.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (2022.1)\n", + "Requirement already satisfied: numpy>=1.18.5 in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from pandas) (1.23.0)\n", + "Requirement already satisfied: six>=1.5 in c:\\users\\tim\\appdata\\local\\programs\\python\\python39\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "[notice] A new release of pip is available: 23.1.1 -> 23.1.2\n", + "[notice] To update, run: python.exe -m pip install --upgrade pip\n" + ] + } + ], + "source": [ + "# install pandas using pip\n", + "!pip install pandas" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Die Knoten unseres Netzwerks sollen die Unternehmen und Personen darstellen. Eine `id` ermöglicht die eindeutige Identifizierung eines Knoten und hilft Duplikate zu vermeiden. Um Unternehmen von Personen differenzieren zu können, wurde zusätzlich die Information `type` aufgenommen. Sie dient in unserem Beispiel dazu, die Form des Knoten zu bestimmen. Durch `label` bekommt der Knoten eine für den User verständliche Bezeichnung. Weitere Informationen, wie zum Beispiel `branche`, können später für das Mouse Over oder die Größe oder Farbe der Knoten verwendet werden. \n", + "\n", + "Um in einem späteren Schritt die Attribute der Knoten an das Netzwerk zu übergeben, generieren wir zusätzlich eine Spalte `shape`, eine Spalte `color` und eine Spalte `title`." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " id label type branche shape \\\n", + "0 1 Porsche Automobil Holding Company Automobilhersteller dot \n", + "1 2 Volkswagen AG Company Automobilhersteller dot \n", + "2 3 Volkswagen Company Automobilhersteller dot \n", + "3 4 Audi Company Automobilhersteller dot \n", + "4 5 Seat Company Automobilhersteller dot \n", + "\n", + " color title \n", + "0 #729b79ff Porsche Automobil Holding\\nAutomobilhersteller \n", + "1 #729b79ff Volkswagen AG\\nAutomobilhersteller \n", + "2 #729b79ff Volkswagen\\nAutomobilhersteller \n", + "3 #729b79ff Audi\\nAutomobilhersteller \n", + "4 #729b79ff Seat\\nAutomobilhersteller \n" + ] + } + ], + "source": [ + "# import pandas\n", + "import pandas as pd\n", + "\n", + "# create dataframe based on the sample data\n", + "df_nodes = pd.read_csv('companies.csv', sep = ';')\n", + "\n", + "# define shape based on the type\n", + "node_shape = {'Company': 'dot', 'Person': 'triangle'}\n", + "df_nodes['shape'] = df_nodes['type'].map(node_shape)\n", + "\n", + "# define color based on branche\n", + "node_color = {'Automobilhersteller': ' #729b79ff', 'Automobilzulieferer': '#475b63ff', 'Branche 3': '#f3e8eeff', 'Branche 4': '#bacdb0ff', 'Branche 5': '#2e2c2fff'}\n", + "df_nodes['color'] = df_nodes['branche'].map(node_color)\n", + "\n", + "# add information column that can be used for the mouse over in the graph\n", + "df_nodes = df_nodes.fillna('')\n", + "df_nodes['title'] = df_nodes['label'] + '\\n' + df_nodes['branche']\n", + "\n", + "# show first five entries of the dataframe\n", + "print(df_nodes.head())" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Die Kanten visualisieren die Beziehungen zwischen den Unternehmen und Personen. Um in Pyvis eine Kante darzustellen braucht es minimal die Information zwischen welchen beiden Knoten eine Kante dargestellt werden soll. In den Beispieldaten entspricht dies `from` und `to`. Es wird jeweils auf die eindeutige `id` der jeweiligen Knoten referenziert. `label` bezeichnet hier die Art der Beziehung, z.B. AR = Aufsichtsrat. " + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " from to label\n", + "0 2 1 part_of\n", + "1 3 1 part_of\n", + "2 4 1 part_of\n", + "3 5 1 part_of\n", + "4 6 1 part_of\n" + ] + } + ], + "source": [ + "# create dataframe based on the sample data\n", + "df_edges = pd.read_csv('relations.csv', sep = ';')\n", + "\n", + "# show first five entries of the dataframe\n", + "print(df_edges.head())" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Erstellung eines Netzwerks mit networkx\n", + "\n", + "Zur Erstellung des Netzwerks nutzen wir `networkx`, da diese Bibliothek bessere Analysemöglichkeiten hat als `pyvis`. Das mit `networkx` erstellte Netzwerk können wir später an `pyvis` zur interaktiven Visualisierung übergeben werden. \n", + "\n", + "Wir erstellen die Knoten und Kanten auf Basis unsere beiden Dataframes." + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# import networkx\n", + "import networkx as nx\n", + "import matplotlib.pyplot as plt\n", + "\n", + "fig = plt.figure(figsize=(12,12))\n", + "ax = plt.subplot(111)\n", + "ax.set_title('Graph - Shapes', fontsize=10)\n", + "\n", + "# initiate graph\n", + "graph = nx.MultiGraph()\n", + "\n", + "# create edges from dataframe\n", + "graph = nx.from_pandas_edgelist(df_edges, source = 'from', target = 'to', edge_attr= 'label')\n", + "\n", + "# update node attributes from dataframe\n", + "nodes_attr = df_nodes.set_index('id').to_dict(orient = 'index')\n", + "nx.set_node_attributes(graph, nodes_attr)\n", + "\n", + "\n", + "pos = nx.spring_layout(graph)\n", + "nx.draw(graph, pos, node_size=1500, node_color='yellow', font_size=8, font_weight='bold')\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n", + "plt.savefig(\"Graph.png\", format=\"PNG\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualisierung des Netzwerks mit pyvis\n", + "\n", + "Für die Visualisierung importieren wir `Network` von `pyvis.network` und initialisiern das `pyvis` Netzwerk. Mit der Methode `from_nx` können wir das `networkx` Netzwerk übergeben. \n", + "\n", + "Die Größe der Knoten bestimmen wir je nach Auswahl entweder aufgrund der Anzahl der Verbindungen zu anderen Knoten oder anhand der Eigenvektor-Zentralität. Knoten mit vielen Verbindungen bzw. höherer Zentralität werden größer dargestellt." + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "# visualize using pyvis\n", + "from pyvis.network import Network\n", + "\n", + "def create_centrality_graph(df, measure_type, save_path):\n", + " # initiate network\n", + " net = Network(directed=False, neighborhood_highlight=True, bgcolor = \"white\", font_color=\"black\")\n", + "\n", + " # pass networkx graph to pyvis\n", + " net.from_nx(graph)\n", + "\n", + " # set edge options \n", + " net.inherit_edge_colors(False)\n", + " net.set_edge_smooth('dynamic')\n", + "\n", + " adj_list = net.get_adj_list()\n", + "\n", + " measure_vector = {}\n", + "\n", + " if measure_type == \"eigenvector\":\n", + " measure_vector = nx.eigenvector_centrality(graph)\n", + " df[\"eigenvector\"] = measure_vector.values()\n", + " if measure_type == \"degree\":\n", + " measure_vector = nx.degree_centrality(graph)\n", + " df[\"degree\"] = measure_vector.values()\n", + " if measure_type == \"betweeness\":\n", + " measure_vector = nx.betweenness_centrality(graph)\n", + " df[\"betweeness\"] = measure_vector.values()\n", + " if measure_type == \"closeness\":\n", + " measure_vector = nx.closeness_centrality(graph)\n", + " df[\"closeness\"] = measure_vector.values()\n", + " if measure_type == \"pagerank\":\n", + " measure_vector = nx.pagerank(graph)\n", + " df[\"pagerank\"] = measure_vector.values()\n", + " if measure_type == \"average_degree\":\n", + " measure_vector = nx.average_degree_connectivity(graph)\n", + " # df[\"average_degree\"] = measure_vector.values()\n", + " print(measure_vector.values())\n", + " \n", + "\n", + " # calculate and update size of the nodes depending on their number of edges\n", + " for node_id, neighbors in adj_list.items():\n", + " \n", + " # df[\"edges\"] = measure_vector.values()\n", + " \n", + " if measure_type == \"edges\":\n", + " size = 10 #len(neighbors)*5 \n", + " else:\n", + " size = measure_vector[node_id]*50 \n", + " next((node.update({'size': size}) for node in net.nodes if node['id'] == node_id), None)\n", + "\n", + " # set the node distance and spring lenght using repulsion\n", + " net.repulsion(node_distance=150, spring_length=50)\n", + "\n", + " # activate physics buttons to further explore the available solvers:\n", + " # barnesHut, forceAtlas2Based, repulsion, hierarchicalRepulsion\n", + " net.show_buttons(filter_=['physics'])\n", + "\n", + " # save graph as HTML\n", + " net.save_graph(save_path)\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alle zusammen ausführen und ein DataFram erstellen mit allen Nodes je nach Kennzahl aufgeteilt." + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
eigenvectordegreebetweenessclosenesspagerank
00.2417030.10.0000000.4166670.053313
10.6330390.60.7777780.6666670.280074
20.2417030.10.0000000.4166670.053313
30.4494590.40.6444440.6250000.187550
40.2417030.10.0000000.4166670.053313
50.2417030.10.0000000.4166670.053313
60.2417030.10.0000000.4166670.053313
70.1716110.10.0000000.4000000.053491
80.1716110.10.0000000.4000000.053491
90.0767070.10.0000000.3125000.056939
100.2009000.20.2000000.4347830.101890
\n", + "
" + ], + "text/plain": [ + " eigenvector degree betweeness closeness pagerank\n", + "0 0.241703 0.1 0.000000 0.416667 0.053313\n", + "1 0.633039 0.6 0.777778 0.666667 0.280074\n", + "2 0.241703 0.1 0.000000 0.416667 0.053313\n", + "3 0.449459 0.4 0.644444 0.625000 0.187550\n", + "4 0.241703 0.1 0.000000 0.416667 0.053313\n", + "5 0.241703 0.1 0.000000 0.416667 0.053313\n", + "6 0.241703 0.1 0.000000 0.416667 0.053313\n", + "7 0.171611 0.1 0.000000 0.400000 0.053491\n", + "8 0.171611 0.1 0.000000 0.400000 0.053491\n", + "9 0.076707 0.1 0.000000 0.312500 0.056939\n", + "10 0.200900 0.2 0.200000 0.434783 0.101890" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "centrality_comparison_df = pd.DataFrame()\n", + "\n", + "eigenvector_path = \"./metrics/eigenvector_networkx.html\"\n", + "degree_path = \"./metrics/degree_networkx.html\"\n", + "betweeness_path = \"./metrics/betweeness_networkx.html\"\n", + "closeness_path = \"./metrics/closeness_networkx.html\"\n", + "pagerank_path = \"./metrics/pagerank_networkx.html\"\n", + "average_degree_path = \"./metrics/average_degree_path_networkx.html\"\n", + "edges_path = \"./metrics/edges_path_networkx.html\"\n", + "\n", + "create_centrality_graph(centrality_comparison_df, \"eigenvector\", eigenvector_path)\n", + "create_centrality_graph(centrality_comparison_df, \"degree\", degree_path)\n", + "create_centrality_graph(centrality_comparison_df, \"betweeness\", betweeness_path)\n", + "create_centrality_graph(centrality_comparison_df, \"closeness\", closeness_path)\n", + "create_centrality_graph(centrality_comparison_df, \"pagerank\", pagerank_path)\n", + "# create_centrality_graph(centrality_comparison_df, \"average_degree\", average_degree_path)\n", + "create_centrality_graph(centrality_comparison_df, \"edges\", edges_path)\n", + "\n", + "centrality_comparison_df" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.18181818181818182\n", + "4\n", + "2.327272727272727\n", + "Graph with 11 nodes and 10 edges\n", + "0.0\n", + "{1: 5.0, 6: 1.5, 4: 2.5, 2: 2.5}\n", + "{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}\n", + "[]\n" + ] + } + ], + "source": [ + "print(nx.density(graph))\n", + "print(nx.diameter(graph))\n", + "print(nx.average_shortest_path_length(graph))\n", + "print(nx.k_core(graph))\n", + "print(nx.average_clustering(graph))\n", + "print(nx.average_degree_connectivity(graph))\n", + "# print(nx.community.modularity(graph, [{ 1, 2}]))\n", + "print(max(nx.connected_components(graph)))\n", + "s = [graph.subgraph(c).copy() for c in nx.connected_components(graph)]\n", + "print(s)" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "ename": "AssertionError", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAssertionError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32mc:\\Users\\Tim\\Documents\\Master\\Semester 4\\Projektgruppe\\aki_prj23_transparenzregister\\documentations\\seminararbeiten\\Verflechtungsanalyse\\mockup_verflechtungsanalyse_with_networkx.ipynb Cell 17\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m net \u001b[39m=\u001b[39m Network(directed\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m, neighborhood_highlight\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m, bgcolor \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mwhite\u001b[39m\u001b[39m\"\u001b[39m, font_color\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mblack\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m 3\u001b[0m \u001b[39m# pass networkx graph to pyvis\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m net\u001b[39m.\u001b[39;49mfrom_nx(s)\n\u001b[0;32m 6\u001b[0m \u001b[39m# set edge options \u001b[39;00m\n\u001b[0;32m 7\u001b[0m net\u001b[39m.\u001b[39minherit_edge_colors(\u001b[39mFalse\u001b[39;00m)\n", + "File \u001b[1;32mc:\\Users\\Tim\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\pyvis\\network.py:689\u001b[0m, in \u001b[0;36mNetwork.from_nx\u001b[1;34m(self, nx_graph, node_size_transf, edge_weight_transf, default_node_size, default_edge_weight, show_edge_weights, edge_scaling)\u001b[0m\n\u001b[0;32m 660\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mfrom_nx\u001b[39m(\u001b[39mself\u001b[39m, nx_graph, node_size_transf\u001b[39m=\u001b[39m(\u001b[39mlambda\u001b[39;00m x: x), edge_weight_transf\u001b[39m=\u001b[39m(\u001b[39mlambda\u001b[39;00m x: x),\n\u001b[0;32m 661\u001b[0m default_node_size \u001b[39m=\u001b[39m\u001b[39m10\u001b[39m, default_edge_weight\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m, show_edge_weights\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m, edge_scaling\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m):\n\u001b[0;32m 662\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[0;32m 663\u001b[0m \u001b[39m This method takes an exisitng Networkx graph and translates\u001b[39;00m\n\u001b[0;32m 664\u001b[0m \u001b[39m it to a PyVis graph format that can be accepted by the VisJs\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 687\u001b[0m \u001b[39m >>> nt.show(\"nx.html\")\u001b[39;00m\n\u001b[0;32m 688\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 689\u001b[0m \u001b[39massert\u001b[39;00m(\u001b[39misinstance\u001b[39m(nx_graph, nx\u001b[39m.\u001b[39mGraph))\n\u001b[0;32m 690\u001b[0m edges\u001b[39m=\u001b[39mnx_graph\u001b[39m.\u001b[39medges(data \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m)\n\u001b[0;32m 691\u001b[0m nodes\u001b[39m=\u001b[39mnx_graph\u001b[39m.\u001b[39mnodes(data \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m)\n", + "\u001b[1;31mAssertionError\u001b[0m: " + ] + } + ], + "source": [ + "net = Network(directed=False, neighborhood_highlight=True, bgcolor = \"white\", font_color=\"black\")\n", + "\n", + "# pass networkx graph to pyvis\n", + "net.from_nx(s)\n", + "\n", + "# set edge options \n", + "net.inherit_edge_colors(False)\n", + "net.set_edge_smooth('dynamic')\n", + "\n", + "adj_list = net.get_adj_list()\n", + "\n", + "# calculate and update size of the nodes depending on their number of edges\n", + "for node_id, neighbors in adj_list.items():\n", + " \n", + " # df[\"edges\"] = measure_vector.values()\n", + " \n", + " \n", + " size = 10 #len(neighbors)*5 \n", + " \n", + " next((node.update({'size': size}) for node in net.nodes if node['id'] == node_id), None)\n", + "\n", + "# set the node distance and spring lenght using repulsion\n", + "net.repulsion(node_distance=150, spring_length=50)\n", + "\n", + "# activate physics buttons to further explore the available solvers:\n", + "# barnesHut, forceAtlas2Based, repulsion, hierarchicalRepulsion\n", + "net.show_buttons(filter_=['physics'])\n", + "\n", + "# save graph as HTML\n", + "net.save_graph(\"./metrics/connected_components_networkx.html\")" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" + }, + "kernelspec": { + "display_name": "Python 3.10.1 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/documentations/seminararbeiten/Verflechtungsanalyse/relations.csv b/documentations/seminararbeiten/Verflechtungsanalyse/relations.csv new file mode 100644 index 0000000..bd436e4 --- /dev/null +++ b/documentations/seminararbeiten/Verflechtungsanalyse/relations.csv @@ -0,0 +1,11 @@ +from;to;label +2;1;part_of +3;1;part_of +4;1;part_of +5;1;part_of +6;1;part_of +7;1;part_of +8;4;part_of +9;4;part_of +11;10;part_of +10;4;supplierer \ No newline at end of file