From 41f2c9f995e5463a49ec1bd5fbfcc23021f21323 Mon Sep 17 00:00:00 2001 From: Philipp Horstenkamp Date: Wed, 4 Oct 2023 20:03:47 +0200 Subject: [PATCH] Executing black over all jupyter notebook (#190) Reverting black for the jupyter notebooks gets old. Can we just run black over all of them? --- .../AI-models/Sentiment Analysis/vader.ipynb | 41 +- Jupyter/API-tests/API news_research_Tim.ipynb | 22 +- Jupyter/API-tests/API research_Tristan.ipynb | 12 +- .../01_Transparenzregister_Zeitdaten.ipynb | 50 +-- .../11_Create_Tables_with_SQL-Alchemy.ipynb | 393 ++++++++++-------- .../01_Connect_to_Database.ipynb | 46 +- ...ect_to_Database_publish_Company_Data.ipynb | 33 +- .../03-1_Publish_Finance_Testdata_BASF.ipynb | 33 +- ...3-2_Publish-Finance_Testdata_Telekom.ipynb | 33 +- .../03-2_Publish_Finance_Testdata_EON.ipynb | 33 +- .../04_First_Query.ipynb | 3 +- .../Visualisierung_plotly.ipynb | 162 +++++--- .../Visualisierung_networkx_pyvis.ipynb | 130 +++--- .../networkx_pyvis.ipynb | 63 +-- ...p_verflechtungsanalyse_with_networkx.ipynb | 91 ++-- 15 files changed, 658 insertions(+), 487 deletions(-) diff --git a/Jupyter/AI-models/Sentiment Analysis/vader.ipynb b/Jupyter/AI-models/Sentiment Analysis/vader.ipynb index d9ca21e..e533203 100644 --- a/Jupyter/AI-models/Sentiment Analysis/vader.ipynb +++ b/Jupyter/AI-models/Sentiment Analysis/vader.ipynb @@ -82,7 +82,7 @@ "# Download the lexicon\n", "nltk.download(\"vader_lexicon\")\n", "\n", - "# Import the lexicon \n", + "# Import the lexicon\n", "from nltk.sentiment.vader import SentimentIntensityAnalyzer\n", "\n", "# Create an instance of SentimentIntensityAnalyzer\n", @@ -132,12 +132,22 @@ "text_df = pd.DataFrame(\n", " [\n", " {\"text\": \"Microsoft fails to hit profit expectations.\"},\n", - " {\"text\": \"Confidence continues to prevail on the stock market, as the performance of the DAX shows.\"},\n", + " {\n", + " \"text\": \"Confidence continues to prevail on the stock market, as the performance of the DAX shows.\"\n", + " },\n", " {\"text\": \"Stocks rallied and the British pound gained.\"},\n", - " {\"text\": \"Meyer Burger now serves Australian market and presents itself at Smart Energy Expo in Sydney.\"},\n", - " {\"text\": \"Meyer Burger enters Australian market and exhibits at Smart Energy Expo in Sydney.\"},\n", - " {\"text\": \"J&T Express Vietnam helps local craft villages increase their reach.\"},\n", - " {\"text\": \"7 experts recommend the stock for purchase, 1 expert recommends holding the stock.\"},\n", + " {\n", + " \"text\": \"Meyer Burger now serves Australian market and presents itself at Smart Energy Expo in Sydney.\"\n", + " },\n", + " {\n", + " \"text\": \"Meyer Burger enters Australian market and exhibits at Smart Energy Expo in Sydney.\"\n", + " },\n", + " {\n", + " \"text\": \"J&T Express Vietnam helps local craft villages increase their reach.\"\n", + " },\n", + " {\n", + " \"text\": \"7 experts recommend the stock for purchase, 1 expert recommends holding the stock.\"\n", + " },\n", " {\"text\": \"Microsoft share falls.\"},\n", " {\"text\": \"Microsoft share is rising.\"},\n", " ]\n", @@ -262,22 +272,21 @@ ], "source": [ "def format_output(output_dict):\n", - " \n", - " polarity = \"neutral\"\n", + " polarity = \"neutral\"\n", "\n", - " if(output_dict['compound']>= 0.05):\n", - " polarity = \"positive\"\n", + " if output_dict[\"compound\"] >= 0.05:\n", + " polarity = \"positive\"\n", "\n", - " elif(output_dict['compound']<= -0.05):\n", - " polarity = \"negative\"\n", + " elif output_dict[\"compound\"] <= -0.05:\n", + " polarity = \"negative\"\n", "\n", - " return polarity\n", + " return polarity\n", "\n", "\n", "def predict_sentiment(text):\n", - " \n", - " output_dict = sent_analyzer.polarity_scores(text)\n", - " return output_dict\n", + " output_dict = sent_analyzer.polarity_scores(text)\n", + " return output_dict\n", + "\n", "\n", "# Run the predictions\n", "text_df[\"vader_prediction\"] = text_df[\"text\"].apply(predict_sentiment)\n", diff --git a/Jupyter/API-tests/API news_research_Tim.ipynb b/Jupyter/API-tests/API news_research_Tim.ipynb index 5f66219..05c0880 100644 --- a/Jupyter/API-tests/API news_research_Tim.ipynb +++ b/Jupyter/API-tests/API news_research_Tim.ipynb @@ -31,8 +31,9 @@ "outputs": [], "source": [ "import requests\n", - "url = 'https://www.tagesschau.de/api2/'\n", - "r = requests.get(url +'homepage')\n", + "\n", + "url = \"https://www.tagesschau.de/api2/\"\n", + "r = requests.get(url + \"homepage\")\n", "r = r.json()" ] }, @@ -76,10 +77,11 @@ "source": [ "# Aggregieren der Titel und Beschreibungen\n", "import pandas as pd\n", + "\n", "data = {\"titles\": [], \"description\": []}\n", - "for i in range(len(r[\"news\"])): \n", - " data[\"titles\"].append(r[\"news\"][i][\"title\"])\n", - " data[\"description\"].append(r[\"news\"][i][\"content\"][0][\"value\"])\n", + "for i in range(len(r[\"news\"])):\n", + " data[\"titles\"].append(r[\"news\"][i][\"title\"])\n", + " data[\"description\"].append(r[\"news\"][i][\"content\"][0][\"value\"])\n", "df = pd.DataFrame(data)\n", "print(df.__len__)" ] @@ -19323,7 +19325,7 @@ } ], "source": [ - "r = requests.get(url +'news').json()\n", + "r = requests.get(url + \"news\").json()\n", "r" ] }, @@ -19355,8 +19357,8 @@ ], "source": [ "data = {\"titles\": []}\n", - "for i in range(len(r[\"news\"])): \n", - " data[\"titles\"].append(r[\"news\"][i][\"title\"])\n", + "for i in range(len(r[\"news\"])):\n", + " data[\"titles\"].append(r[\"news\"][i][\"title\"])\n", "# data[\"description\"].append(r[\"news\"][i][\"content\"][0][\"value\"])\n", "df = pd.DataFrame(data)\n", "print(df)" @@ -19391,7 +19393,7 @@ ], "source": [ "date = \"230425\"\n", - "r = requests.get(url +'newsfeed-101~_date-{date}.json').json()\n", + "r = requests.get(url + \"newsfeed-101~_date-{date}.json\").json()\n", "r" ] }, @@ -19989,7 +19991,7 @@ "pageSize = 5\n", "resultPage = 2\n", "print(url)\n", - "txt = f'search/?searchText={searchtxt}&pageSize={pageSize}&resultPage={resultPage}'\n", + "txt = f\"search/?searchText={searchtxt}&pageSize={pageSize}&resultPage={resultPage}\"\n", "r = requests.get(url + txt).json()\n", "r" ] diff --git a/Jupyter/API-tests/API research_Tristan.ipynb b/Jupyter/API-tests/API research_Tristan.ipynb index 0980fa4..dc77309 100644 --- a/Jupyter/API-tests/API research_Tristan.ipynb +++ b/Jupyter/API-tests/API research_Tristan.ipynb @@ -51,6 +51,7 @@ ], "source": [ "from deutschland.bundesanzeiger import Bundesanzeiger\n", + "\n", "ba = Bundesanzeiger()\n", "# search term\n", "data = ba.get_reports(\"Atos IT-Dienstleistung & Beratung GmbH\")\n", @@ -73,11 +74,13 @@ ], "source": [ "# Note: There can be multiple \"Aufsichtsrat\" entries per Company, the API however does only return one because the keys are overwritten\n", - "jahresabschluss = data['Jahresabschluss zum Geschäftsjahr vom 01.01.2019 bis zum 31.12.2019']\n", + "jahresabschluss = data[\n", + " \"Jahresabschluss zum Geschäftsjahr vom 01.01.2019 bis zum 31.12.2019\"\n", + "]\n", "\n", "# Note: Although the report includes the entire text it lacks the formatting that would make extracting information a lot easier as the data is wrapped inside a originally\n", "with open(\"./jahresabschluss-example.txt\", \"w\") as file:\n", - " file.write(jahresabschluss['report'])\n", + " file.write(jahresabschluss[\"report\"])\n", "print(jahresabschluss.keys())" ] }, @@ -96,6 +99,7 @@ ], "source": [ "from deutschland.handelsregister import Handelsregister\n", + "\n", "hr = Handelsregister()\n", "\n", "results = hr.search(keywords=\"BLUECHILLED Verwaltungs GmbH\")\n", @@ -128,6 +132,7 @@ "source": [ "# SQLite export\n", "import sqlite3\n", + "\n", "con = sqlite3.connect(\"../data/openregister.db\")" ] }, @@ -176,7 +181,7 @@ ], "source": [ "schema = cur.execute(\"SELECT name FROM sqlite_master WHERE type='table';\")\n", - "schema.fetchall()\n" + "schema.fetchall()" ] }, { @@ -414,6 +419,7 @@ ], "source": [ "import pandas as pd\n", + "\n", "df = pd.read_sql_query(\"SELECT * FROM company LIMIT 100\", con)\n", "df.head()" ] diff --git a/Jupyter/Timeseries/01_Transparenzregister_Zeitdaten.ipynb b/Jupyter/Timeseries/01_Transparenzregister_Zeitdaten.ipynb index 735625c..438b682 100644 --- a/Jupyter/Timeseries/01_Transparenzregister_Zeitdaten.ipynb +++ b/Jupyter/Timeseries/01_Transparenzregister_Zeitdaten.ipynb @@ -68,6 +68,7 @@ "import numpy as np\n", "import pandas as pd\n", "import ipywidgets as widgets\n", + "\n", "pd.options.plotting.backend = \"plotly\"" ] }, @@ -86,9 +87,9 @@ "metadata": {}, "outputs": [], "source": [ - "dfEON=pd.read_csv('EON_Data.csv', index_col=0, sep=';') \n", - "dfBASF=pd.read_csv('BASF_Data.csv', index_col=0, sep=';') \n", - "dfTELEKOM=pd.read_csv('TELEKOM_Data.csv', index_col=0, sep=';') " + "dfEON = pd.read_csv(\"EON_Data.csv\", index_col=0, sep=\";\")\n", + "dfBASF = pd.read_csv(\"BASF_Data.csv\", index_col=0, sep=\";\")\n", + "dfTELEKOM = pd.read_csv(\"TELEKOM_Data.csv\", index_col=0, sep=\";\")" ] }, { @@ -112,7 +113,7 @@ } ], "source": [ - "#select a specific year\n", + "# select a specific year\n", "dfTELEKOM.loc[2016]" ] }, @@ -148,33 +149,34 @@ ], "source": [ "def get_Data(company, metric):\n", - " if company=='BASF':\n", - " dfSelect=dfBASF\n", - " print('BASF')\n", - " if company=='EON':\n", - " dfSelect=dfEON\n", - " print('EON') \n", - " if company=='Telekom':\n", - " dfSelect=dfTELEKOM\n", - " print('Telekom') \n", - " fig=dfSelect.plot()\n", - " fig.show() \n", + " if company == \"BASF\":\n", + " dfSelect = dfBASF\n", + " print(\"BASF\")\n", + " if company == \"EON\":\n", + " dfSelect = dfEON\n", + " print(\"EON\")\n", + " if company == \"Telekom\":\n", + " dfSelect = dfTELEKOM\n", + " print(\"Telekom\")\n", + " fig = dfSelect.plot()\n", + " fig.show()\n", " return\n", "\n", - "W_company=widgets.Dropdown(\n", - " options=['BASF', 'EON', 'Telekom'],\n", - " value='BASF',\n", - " description='Company:',\n", + "\n", + "W_company = widgets.Dropdown(\n", + " options=[\"BASF\", \"EON\", \"Telekom\"],\n", + " value=\"BASF\",\n", + " description=\"Company:\",\n", " disabled=False,\n", ")\n", - "W_metric=widgets.Dropdown(\n", - " options=['EBIT', 'EBITDA', 'Volume'],\n", - " value='Volume',\n", - " description='Metric:',\n", + "W_metric = widgets.Dropdown(\n", + " options=[\"EBIT\", \"EBITDA\", \"Volume\"],\n", + " value=\"Volume\",\n", + " description=\"Metric:\",\n", " disabled=False,\n", ")\n", "\n", - "out=widgets.interact(get_Data, company=W_company, metric=W_metric)" + "out = widgets.interact(get_Data, company=W_company, metric=W_metric)" ] }, { diff --git a/documentations/seminararbeiten/Datenspeicherung/Jupyter/11_Create_Tables_with_SQL-Alchemy.ipynb b/documentations/seminararbeiten/Datenspeicherung/Jupyter/11_Create_Tables_with_SQL-Alchemy.ipynb index 16b162b..acd9e8f 100644 --- a/documentations/seminararbeiten/Datenspeicherung/Jupyter/11_Create_Tables_with_SQL-Alchemy.ipynb +++ b/documentations/seminararbeiten/Datenspeicherung/Jupyter/11_Create_Tables_with_SQL-Alchemy.ipynb @@ -44,7 +44,7 @@ " username=\"postgres\",\n", " password=\"postgres\",\n", " host=\"localhost\",\n", - " database=\"postgres\"\n", + " database=\"postgres\",\n", ")\n", "\n", "engine = create_engine(url)" @@ -57,7 +57,7 @@ "metadata": {}, "outputs": [], "source": [ - "#connect to database\n", + "# connect to database\n", "connection = engine.connect()" ] }, @@ -77,12 +77,13 @@ "metadata": {}, "outputs": [], "source": [ - "#create an object *district_court* which inherits attributes from Base-class\n", + "# create an object *district_court* which inherits attributes from Base-class\n", "Base = declarative_base()\n", "\n", + "\n", "class DistrictCourt(Base):\n", - " __tablename__ = 'district_court'\n", - " \n", + " __tablename__ = \"district_court\"\n", + "\n", " id = Column(Integer(), primary_key=True)\n", " city = Column(String(100), nullable=False)\n", " name = Column(String(100), nullable=False)" @@ -106,10 +107,12 @@ "outputs": [], "source": [ "class Company(Base):\n", - " __tablename__ = 'company'\n", + " __tablename__ = \"company\"\n", "\n", " hr = Column(Integer(), nullable=False, primary_key=True)\n", - " court_id = Column(Integer, ForeignKey(\"district_court.id\"), nullable=False, primary_key=True)\n", + " court_id = Column(\n", + " Integer, ForeignKey(\"district_court.id\"), nullable=False, primary_key=True\n", + " )\n", " name = Column(String(100), nullable=False)\n", " street = Column(String(100), nullable=False)\n", " zip = Column(Integer(), nullable=False)\n", @@ -117,7 +120,7 @@ " sector = Column(String(100), nullable=False)\n", "\n", " __table_args__ = (\n", - " PrimaryKeyConstraint('hr', 'court_id', name='pk_company_hr_court'),\n", + " PrimaryKeyConstraint(\"hr\", \"court_id\", name=\"pk_company_hr_court\"),\n", " )" ] }, @@ -139,7 +142,7 @@ } ], "source": [ - "#check if table-object is created\n", + "# check if table-object is created\n", "Company.__table__" ] }, @@ -151,7 +154,7 @@ "outputs": [], "source": [ "class Finance(Base):\n", - " __tablename__ = 'finance'\n", + " __tablename__ = \"finance\"\n", "\n", " id = Column(Integer, primary_key=True)\n", " company_hr = Column(Integer)\n", @@ -170,7 +173,9 @@ " company = relationship(\"Company\")\n", "\n", " __table_args__ = (\n", - " ForeignKeyConstraint([company_hr, company_court], [Company.hr, Company.court_id]),\n", + " ForeignKeyConstraint(\n", + " [company_hr, company_court], [Company.hr, Company.court_id]\n", + " ),\n", " )" ] }, @@ -181,25 +186,35 @@ "metadata": {}, "outputs": [], "source": [ - "#create own enumeration type and sentiment object\n", - "sentiment_type=Enum(\"employee_voting\",\"sustainability\",\"environmental_aspects\",\"perception\", name=\"sentiment_type\", create_type=False)\n", + "# create own enumeration type and sentiment object\n", + "sentiment_type = Enum(\n", + " \"employee_voting\",\n", + " \"sustainability\",\n", + " \"environmental_aspects\",\n", + " \"perception\",\n", + " name=\"sentiment_type\",\n", + " create_type=False,\n", + ")\n", + "\n", "\n", "class Sentiment(Base):\n", - " __tablename__ = 'sentiment'\n", + " __tablename__ = \"sentiment\"\n", "\n", " id = Column(Integer(), primary_key=True)\n", - " #company_hr = mapped_column(ForeignKey(\"company.hr\"))\n", - " #company_court = mapped_column(ForeignKey(\"company.court_id\"))\n", + " # company_hr = mapped_column(ForeignKey(\"company.hr\"))\n", + " # company_court = mapped_column(ForeignKey(\"company.court_id\"))\n", " company_hr = Column(Integer)\n", " company_court = Column(Integer)\n", " date = Column(DateTime(), default=datetime.now)\n", - " type = Column(sentiment_type,nullable=False)\n", - " value =Column(Float(),nullable=False)\n", - " source=Column(String(100))\n", - " \n", - " sentiment = relationship('Company')\n", + " type = Column(sentiment_type, nullable=False)\n", + " value = Column(Float(), nullable=False)\n", + " source = Column(String(100))\n", + "\n", + " sentiment = relationship(\"Company\")\n", " __table_args__ = (\n", - " ForeignKeyConstraint([company_hr, company_court], [Company.hr, Company.court_id]),\n", + " ForeignKeyConstraint(\n", + " [company_hr, company_court], [Company.hr, Company.court_id]\n", + " ),\n", " )" ] }, @@ -210,14 +225,14 @@ "metadata": {}, "outputs": [], "source": [ - "#create person object\n", + "# create person object\n", "class Person(Base):\n", - " __tablename__ = 'person'\n", + " __tablename__ = \"person\"\n", "\n", " id = Column(Integer(), primary_key=True)\n", - " name=Column(String(100), nullable=False)\n", - " surname=Column(String(100), nullable=False)\n", - " works_for=Column(String(100))" + " name = Column(String(100), nullable=False)\n", + " surname = Column(String(100), nullable=False)\n", + " works_for = Column(String(100))" ] }, { @@ -227,27 +242,39 @@ "metadata": {}, "outputs": [], "source": [ - "#create own relation type and person_relation object\n", - "rel_type=Enum(\"Executive\",\"Auditor\",\"Supervisory_Board\",\"Managing_Director\",\"Authorized_Representive\",\"Final_Auditor\", name=\"rel_type\", create_type=False)\n", + "# create own relation type and person_relation object\n", + "rel_type = Enum(\n", + " \"Executive\",\n", + " \"Auditor\",\n", + " \"Supervisory_Board\",\n", + " \"Managing_Director\",\n", + " \"Authorized_Representive\",\n", + " \"Final_Auditor\",\n", + " name=\"rel_type\",\n", + " create_type=False,\n", + ")\n", + "\n", "\n", "class Person_Relation(Base):\n", - " __tablename__ = 'person_relation'\n", + " __tablename__ = \"person_relation\"\n", "\n", " id = Column(Integer(), primary_key=True)\n", - " #company_hr = mapped_column(ForeignKey(\"company.hr\"))\n", - " #company_court = mapped_column(ForeignKey(\"company.court_id\"))\n", + " # company_hr = mapped_column(ForeignKey(\"company.hr\"))\n", + " # company_court = mapped_column(ForeignKey(\"company.court_id\"))\n", " company_hr = Column(Integer)\n", " company_court = Column(Integer)\n", " person_id = mapped_column(ForeignKey(\"person.id\"))\n", " date_from = Column(DateTime(), default=datetime.now)\n", - " date_to = Column(DateTime(), default=datetime.now) \n", - " relation=Column(rel_type, nullable=False)\n", - " \n", - " #company = relationship(\"Company\")\n", - " #person = relationship(\"Person\", foreign_keys=[person_id])\n", - " #company = relationship('Company', foreign_keys=[company_hr,company_court])\n", + " date_to = Column(DateTime(), default=datetime.now)\n", + " relation = Column(rel_type, nullable=False)\n", + "\n", + " # company = relationship(\"Company\")\n", + " # person = relationship(\"Person\", foreign_keys=[person_id])\n", + " # company = relationship('Company', foreign_keys=[company_hr,company_court])\n", " __table_args__ = (\n", - " ForeignKeyConstraint([company_hr, company_court], [Company.hr, Company.court_id]),\n", + " ForeignKeyConstraint(\n", + " [company_hr, company_court], [Company.hr, Company.court_id]\n", + " ),\n", " )" ] }, @@ -258,22 +285,30 @@ "metadata": {}, "outputs": [], "source": [ - "#create own relation type and company_relation object\n", - "rel_type_comp=Enum(\"participates_with\",\"has_shares_of\",\"is_supplied_by\",\"works_with\", name=\"rel_type_comp\", create_type=False)\n", + "# create own relation type and company_relation object\n", + "rel_type_comp = Enum(\n", + " \"participates_with\",\n", + " \"has_shares_of\",\n", + " \"is_supplied_by\",\n", + " \"works_with\",\n", + " name=\"rel_type_comp\",\n", + " create_type=False,\n", + ")\n", + "\n", "\n", "class Company_Relation(Base):\n", - " __tablename__ = 'company_relation'\n", + " __tablename__ = \"company_relation\"\n", "\n", " id = Column(Integer(), primary_key=True)\n", - " company1_id = Column(Integer,nullable=False)\n", - " company2_id= Column(Integer,nullable=False)\n", + " company1_id = Column(Integer, nullable=False)\n", + " company2_id = Column(Integer, nullable=False)\n", " date_from = Column(DateTime(), default=datetime.now)\n", - " date_to = Column(DateTime(), default=datetime.now) \n", - " relation=Column(rel_type_comp, nullable=False)\n", - " \n", - " #company = relationship(\"Company\")\n", + " date_to = Column(DateTime(), default=datetime.now)\n", + " relation = Column(rel_type_comp, nullable=False)\n", "\n", - " __table_args__ = {'extend_existing': True}" + " # company = relationship(\"Company\")\n", + "\n", + " __table_args__ = {\"extend_existing\": True}" ] }, { @@ -320,7 +355,7 @@ "metadata": {}, "outputs": [], "source": [ - "df=pd.read_csv('Amtsgerichte.csv', sep=';') \n" + "df = pd.read_csv(\"Amtsgerichte.csv\", sep=\";\")" ] }, { @@ -331,11 +366,9 @@ "outputs": [], "source": [ "for i in range(len(df)):\n", - " #get data from dataframe\n", - " court=DistrictCourt( \n", - " city = str(df['Stadt'].iloc[i]),\n", - " name = str(df['Name'].iloc[i]))\n", - " \n", + " # get data from dataframe\n", + " court = DistrictCourt(city=str(df[\"Stadt\"].iloc[i]), name=str(df[\"Name\"].iloc[i]))\n", + "\n", " session.add(court)\n", " session.commit()" ] @@ -355,7 +388,7 @@ "metadata": {}, "outputs": [], "source": [ - "df=pd.read_csv('01_Stammdaten_Unternehmen_HR2.csv', sep=';',encoding=\"ISO-8859-1\")" + "df = pd.read_csv(\"01_Stammdaten_Unternehmen_HR2.csv\", sep=\";\", encoding=\"ISO-8859-1\")" ] }, { @@ -406,15 +439,16 @@ ], "source": [ "for i in range(len(df)):\n", - " #get data from dataframe\n", - " comp=Company( \n", - " hr= int(df['HR'].iloc[i]),\n", - " court_id= int(df['Amtsgericht'].iloc[i]),\n", - " name = str(df['Name'].iloc[i]),\n", - " street = str(df['Strasse'].iloc[i]),\n", - " zip = int(df['PLZ'].iloc[i]),\n", - " city = str(df['Stadt'].iloc[i]),\n", - " sector=str(df['Branche'].iloc[i]))\n", + " # get data from dataframe\n", + " comp = Company(\n", + " hr=int(df[\"HR\"].iloc[i]),\n", + " court_id=int(df[\"Amtsgericht\"].iloc[i]),\n", + " name=str(df[\"Name\"].iloc[i]),\n", + " street=str(df[\"Strasse\"].iloc[i]),\n", + " zip=int(df[\"PLZ\"].iloc[i]),\n", + " city=str(df[\"Stadt\"].iloc[i]),\n", + " sector=str(df[\"Branche\"].iloc[i]),\n", + " )\n", " session.add(comp)\n", " session.commit()" ] @@ -445,7 +479,7 @@ } ], "source": [ - "df=pd.read_csv('BASF_Data.csv', sep=';', decimal=\",\",encoding=\"ISO-8859-1\") \n", + "df = pd.read_csv(\"BASF_Data.csv\", sep=\";\", decimal=\",\", encoding=\"ISO-8859-1\")\n", "df.columns" ] }, @@ -459,21 +493,22 @@ "from datetime import datetime\n", "\n", "for i in range(len(df)):\n", - " #get data from dataframe\n", - " fin=Finance( \n", - " company_hr = int(df['Company_HR'].iloc[i]),\n", - " company_court = int(df['Company_Court'].iloc[i]),\n", - " date=datetime.strptime(str(df['Jahr'].iloc[i]), '%Y'),\n", - " total_volume=str(df['Umsatz'].iloc[i]),\n", - " ebit=str(df['Ebit'].iloc[i]) ,\n", - " ebitda=str(df['EBITDA'].iloc[i]),\n", - " ebit_margin=null(),\n", - " total_balance=null(),\n", - " equity=null(),\n", - " debt=null(),\n", - " return_on_equity=null(),\n", - " capital_turnover_rate=null())\n", - " \n", + " # get data from dataframe\n", + " fin = Finance(\n", + " company_hr=int(df[\"Company_HR\"].iloc[i]),\n", + " company_court=int(df[\"Company_Court\"].iloc[i]),\n", + " date=datetime.strptime(str(df[\"Jahr\"].iloc[i]), \"%Y\"),\n", + " total_volume=str(df[\"Umsatz\"].iloc[i]),\n", + " ebit=str(df[\"Ebit\"].iloc[i]),\n", + " ebitda=str(df[\"EBITDA\"].iloc[i]),\n", + " ebit_margin=null(),\n", + " total_balance=null(),\n", + " equity=null(),\n", + " debt=null(),\n", + " return_on_equity=null(),\n", + " capital_turnover_rate=null(),\n", + " )\n", + "\n", " session.add(fin)\n", " session.commit()" ] @@ -504,7 +539,7 @@ } ], "source": [ - "df=pd.read_csv('Telekom_Data.csv', sep=';',decimal=',',encoding=\"ISO-8859-1\") \n", + "df = pd.read_csv(\"Telekom_Data.csv\", sep=\";\", decimal=\",\", encoding=\"ISO-8859-1\")\n", "df.columns" ] }, @@ -516,21 +551,22 @@ "outputs": [], "source": [ "for i in range(len(df)):\n", - " #get data from dataframe\n", - " fin=Finance( \n", - " company_hr = int(df['Company_HR'].iloc[i]),\n", - " company_court = int(df['Company_Court'].iloc[i]), \n", - " date=datetime.strptime(str(df['Jahr'].iloc[i]), '%Y'),\n", - " total_volume=str(df['Umsatz'].iloc[i]),\n", - " ebit=str(df['Ebit'].iloc[i]) ,\n", - " ebitda=str(df['EBITDA'].iloc[i]),\n", - " ebit_margin=null(),\n", - " total_balance=null(),\n", - " equity=null(),\n", - " debt=null(),\n", - " return_on_equity=null(),\n", - " capital_turnover_rate=null())\n", - " \n", + " # get data from dataframe\n", + " fin = Finance(\n", + " company_hr=int(df[\"Company_HR\"].iloc[i]),\n", + " company_court=int(df[\"Company_Court\"].iloc[i]),\n", + " date=datetime.strptime(str(df[\"Jahr\"].iloc[i]), \"%Y\"),\n", + " total_volume=str(df[\"Umsatz\"].iloc[i]),\n", + " ebit=str(df[\"Ebit\"].iloc[i]),\n", + " ebitda=str(df[\"EBITDA\"].iloc[i]),\n", + " ebit_margin=null(),\n", + " total_balance=null(),\n", + " equity=null(),\n", + " debt=null(),\n", + " return_on_equity=null(),\n", + " capital_turnover_rate=null(),\n", + " )\n", + "\n", " session.add(fin)\n", " session.commit()" ] @@ -561,7 +597,7 @@ } ], "source": [ - "df=pd.read_csv('EON_Data.csv', sep=';',decimal=',',encoding=\"ISO-8859-1\") \n", + "df = pd.read_csv(\"EON_Data.csv\", sep=\";\", decimal=\",\", encoding=\"ISO-8859-1\")\n", "df.columns" ] }, @@ -575,21 +611,22 @@ "outputs": [], "source": [ "for i in range(len(df)):\n", - " #get data from dataframe\n", - " fin=Finance( \n", - " company_hr = int(df['Company_HR'].iloc[i]),\n", - " company_court = int(df['Company_Court'].iloc[i]),\n", - " date=datetime.strptime(str(df['Jahr'].iloc[i]), '%Y'),\n", - " total_volume=str(df['Umsatz'].iloc[i]),\n", - " ebit=str(df['Ebit'].iloc[i]) ,\n", - " ebitda=str(df['EBITDA'].iloc[i]),\n", - " ebit_margin=null(),\n", - " total_balance=null(),\n", - " equity=null(),\n", - " debt=null(),\n", - " return_on_equity=null(),\n", - " capital_turnover_rate=null())\n", - " \n", + " # get data from dataframe\n", + " fin = Finance(\n", + " company_hr=int(df[\"Company_HR\"].iloc[i]),\n", + " company_court=int(df[\"Company_Court\"].iloc[i]),\n", + " date=datetime.strptime(str(df[\"Jahr\"].iloc[i]), \"%Y\"),\n", + " total_volume=str(df[\"Umsatz\"].iloc[i]),\n", + " ebit=str(df[\"Ebit\"].iloc[i]),\n", + " ebitda=str(df[\"EBITDA\"].iloc[i]),\n", + " ebit_margin=null(),\n", + " total_balance=null(),\n", + " equity=null(),\n", + " debt=null(),\n", + " return_on_equity=null(),\n", + " capital_turnover_rate=null(),\n", + " )\n", + "\n", " session.add(fin)\n", " session.commit()" ] @@ -628,7 +665,7 @@ } ], "source": [ - "df=pd.read_csv('person1000.csv', sep=';',decimal=',',encoding=\"ISO-8859-1\") \n", + "df = pd.read_csv(\"person1000.csv\", sep=\";\", decimal=\",\", encoding=\"ISO-8859-1\")\n", "df.columns" ] }, @@ -640,12 +677,9 @@ "outputs": [], "source": [ "for i in range(len(df)):\n", - " #get data from dataframe\n", - " per=Person( \n", - " name = str(df['Name'].iloc[i]),\n", - " surname = str(df['Surname'].iloc[i])\n", - ")\n", - " \n", + " # get data from dataframe\n", + " per = Person(name=str(df[\"Name\"].iloc[i]), surname=str(df[\"Surname\"].iloc[i]))\n", + "\n", " session.add(per)\n", " session.commit()" ] @@ -667,53 +701,70 @@ "source": [ "import random\n", "\n", - "relation=['Executive',\n", - "'Auditor',\n", - "'Supervisory_Board',\n", - "'Managing_Director',\n", - "'Authorized_Representive',\n", - "'Final_Auditor'\n", + "relation = [\n", + " \"Executive\",\n", + " \"Auditor\",\n", + " \"Supervisory_Board\",\n", + " \"Managing_Director\",\n", + " \"Authorized_Representive\",\n", + " \"Final_Auditor\",\n", "]\n", "\n", - "hr_court=[\n", - "(12334,2),\n", - "(64566,2),\n", - "(5433,3),\n", - "(12334,4),\n", - "(12336,5),\n", - "(555,6),\n", - "(555,7),\n", - "(12384,8),\n", - "(64345,9),\n", - "(4344,1),\n", - "(866,1),\n", - "(9875,1)\n", + "hr_court = [\n", + " (12334, 2),\n", + " (64566, 2),\n", + " (5433, 3),\n", + " (12334, 4),\n", + " (12336, 5),\n", + " (555, 6),\n", + " (555, 7),\n", + " (12384, 8),\n", + " (64345, 9),\n", + " (4344, 1),\n", + " (866, 1),\n", + " (9875, 1),\n", "]\n", "\n", - "edges=[]\n", + "edges = []\n", "\n", - "#create amount of combinations\n", + "# create amount of combinations\n", "for i in range(2000):\n", - " rand_comp=random.randint(0,11)\n", - " comp_hr=hr_court[rand_comp][0]\n", - " comp_court=hr_court[rand_comp][1]\n", - " \n", - " rand_person=random.randint(1,999)\n", - " rand_relation=random.randint(0,5)\n", - " rand_year_start=random.randint(2005,2023)\n", - " if rand_year_start<2023:\n", - " year_to=rand_year_start+1\n", + " rand_comp = random.randint(0, 11)\n", + " comp_hr = hr_court[rand_comp][0]\n", + " comp_court = hr_court[rand_comp][1]\n", + "\n", + " rand_person = random.randint(1, 999)\n", + " rand_relation = random.randint(0, 5)\n", + " rand_year_start = random.randint(2005, 2023)\n", + " if rand_year_start < 2023:\n", + " year_to = rand_year_start + 1\n", " else:\n", " pass\n", " # year_to=None\n", - " \n", - " #edges.append((rand_company,df['Name'].iloc[rand_person],rand_year_start,year_to,relation[rand_relation])) \n", - " edges.append((comp_hr,comp_court,rand_person,int(rand_year_start),year_to,relation[rand_relation])) \n", - " \n", - "#edges.to_csv('edges.csv')\n", - "col=['Company_HR','Company_Court','Person_ID','Year_From','Year_To','Relation_Type']\n", - "dfEdges=pd.DataFrame(edges,columns=col)\n", - "dfEdges.to_csv('edges.csv')" + "\n", + " # edges.append((rand_company,df['Name'].iloc[rand_person],rand_year_start,year_to,relation[rand_relation]))\n", + " edges.append(\n", + " (\n", + " comp_hr,\n", + " comp_court,\n", + " rand_person,\n", + " int(rand_year_start),\n", + " year_to,\n", + " relation[rand_relation],\n", + " )\n", + " )\n", + "\n", + "# edges.to_csv('edges.csv')\n", + "col = [\n", + " \"Company_HR\",\n", + " \"Company_Court\",\n", + " \"Person_ID\",\n", + " \"Year_From\",\n", + " \"Year_To\",\n", + " \"Relation_Type\",\n", + "]\n", + "dfEdges = pd.DataFrame(edges, columns=col)\n", + "dfEdges.to_csv(\"edges.csv\")" ] }, { @@ -912,7 +963,7 @@ } ], "source": [ - "df=pd.read_csv('edges.csv', sep=',') \n", + "df = pd.read_csv(\"edges.csv\", sep=\",\")\n", "df" ] }, @@ -924,16 +975,16 @@ "outputs": [], "source": [ "for i in range(len(df)):\n", - " #get data from dataframe\n", - " edges=Person_Relation( \n", - " company_hr = int(df['Company_HR'].iloc[i]),\n", - " company_court = int(df['Company_Court'].iloc[i]),\n", - " person_id = int(df['Person_ID'].iloc[i]),\n", - " date_from=datetime.strptime(str(df['Year_From'].iloc[i]), '%Y'),\n", - " date_to=datetime.strptime(str(df['Year_To'].iloc[i]), '%Y'),\n", - " relation = str(df['Relation_Type'].iloc[i])\n", - ")\n", - " \n", + " # get data from dataframe\n", + " edges = Person_Relation(\n", + " company_hr=int(df[\"Company_HR\"].iloc[i]),\n", + " company_court=int(df[\"Company_Court\"].iloc[i]),\n", + " person_id=int(df[\"Person_ID\"].iloc[i]),\n", + " date_from=datetime.strptime(str(df[\"Year_From\"].iloc[i]), \"%Y\"),\n", + " date_to=datetime.strptime(str(df[\"Year_To\"].iloc[i]), \"%Y\"),\n", + " relation=str(df[\"Relation_Type\"].iloc[i]),\n", + " )\n", + "\n", " session.add(edges)\n", " session.commit()" ] @@ -1084,7 +1135,7 @@ "\n", "print(\"total_volume|ebit|ebitda\")\n", "for n in Fin:\n", - " print(n.total_volume, n.ebit, n.ebitda)\n" + " print(n.total_volume, n.ebit, n.ebitda)" ] }, { @@ -1094,7 +1145,7 @@ "metadata": {}, "outputs": [], "source": [ - "users = session.query(User).filter(User.name == 'John').all()" + "users = session.query(User).filter(User.name == \"John\").all()" ] }, { diff --git a/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/01_Connect_to_Database.ipynb b/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/01_Connect_to_Database.ipynb index 6c14e0c..4b6a0e0 100644 --- a/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/01_Connect_to_Database.ipynb +++ b/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/01_Connect_to_Database.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "#pip install psycopg2" + "# pip install psycopg2" ] }, { @@ -17,7 +17,7 @@ "metadata": {}, "outputs": [], "source": [ - "#pip install --user ipython-sql" + "# pip install --user ipython-sql" ] }, { @@ -55,10 +55,8 @@ ], "source": [ "conn = psycopg2.connect(\n", - " host=\"localhost\",\n", - " database=\"transparenz\",\n", - " user=\"postgres\",\n", - " password=\"postgres\")\n", + " host=\"localhost\", database=\"transparenz\", user=\"postgres\", password=\"postgres\"\n", + ")\n", "\n", "print(\"Database connected successfully\")" ] @@ -89,11 +87,12 @@ ], "source": [ "conn.autocommit = True\n", - "#create a table\n", + "# create a table\n", "cur = conn.cursor() # creating a cursor\n", - " \n", + "\n", "# executing queries to create table\n", - "cur.execute(\"\"\"\n", + "cur.execute(\n", + " \"\"\"\n", "CREATE TABLE company\n", "(\n", " ID SERIAL PRIMARY KEY NOT NULL,\n", @@ -102,12 +101,13 @@ " ZIP INT NOT NULL,\n", " CITY VARCHAR(100) NOT NULL,\n", " SECTOR VARCHAR(200) NOT NULL)\n", - "\"\"\")\n", - " \n", + "\"\"\"\n", + ")\n", + "\n", "# commit the changes\n", - "conn.commit() # <--- makes sure the change is shown in the database\n", - "#conn.close()\n", - "#cur.close()\n", + "conn.commit() # <--- makes sure the change is shown in the database\n", + "# conn.close()\n", + "# cur.close()\n", "print(\"Table Created successfully\")" ] }, @@ -135,11 +135,12 @@ ], "source": [ "conn.autocommit = True\n", - "#create a table\n", + "# create a table\n", "cur = conn.cursor() # creating a cursor\n", - " \n", + "\n", "# executing queries to create table\n", - "cur.execute(\"\"\"\n", + "cur.execute(\n", + " \"\"\"\n", "CREATE TABLE finance\n", "(\n", " FINANCE_ID SERIAL PRIMARY KEY NOT NULL,\n", @@ -147,12 +148,13 @@ " KIND_OF VARCHAR(50) NOT NULL,\n", " DATE DATE NOT NULL,\n", " SUM FLOAT NOT NULL)\n", - "\"\"\")\n", - " \n", + "\"\"\"\n", + ")\n", + "\n", "# commit the changes\n", - "conn.commit() # <--- makes sure the change is shown in the database\n", - "#conn.close()\n", - "#cur.close()\n", + "conn.commit() # <--- makes sure the change is shown in the database\n", + "# conn.close()\n", + "# cur.close()\n", "print(\"Table Created successfully\")" ] }, diff --git a/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/02_Connect_to_Database_publish_Company_Data.ipynb b/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/02_Connect_to_Database_publish_Company_Data.ipynb index 7a23bcb..a965fd2 100644 --- a/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/02_Connect_to_Database_publish_Company_Data.ipynb +++ b/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/02_Connect_to_Database_publish_Company_Data.ipynb @@ -10,6 +10,7 @@ "import numpy as np\n", "import pandas as pd\n", "import ipywidgets as widgets\n", + "\n", "pd.options.plotting.backend = \"plotly\"" ] }, @@ -20,7 +21,7 @@ "metadata": {}, "outputs": [], "source": [ - "df=pd.read_csv('01_Stammdaten_Unternehmen.csv', sep=';') " + "df = pd.read_csv(\"01_Stammdaten_Unternehmen.csv\", sep=\";\")" ] }, { @@ -240,10 +241,8 @@ ], "source": [ "conn = psycopg2.connect(\n", - " host=\"localhost\",\n", - " database=\"transparenz\",\n", - " user=\"postgres\",\n", - " password=\"postgres\")\n", + " host=\"localhost\", database=\"transparenz\", user=\"postgres\", password=\"postgres\"\n", + ")\n", "\n", "print(\"Database connected successfully\")" ] @@ -267,18 +266,18 @@ "\n", "\n", "for i in range(len(df)):\n", - " #get data from dataframe\n", - " name=str(df['Name'].iloc[i])\n", - " street=str(df['Straße'].iloc[i])\n", - " zipcode=int(df['PLZ'].iloc[i])\n", - " city=str(df['Stadt'].iloc[i])\n", - " sector=str(df['Branche'].iloc[i])\n", - " \n", - " postgres_insert_query = \"\"\" INSERT INTO company (NAME,STREET, ZIP, CITY,SECTOR) VALUES (%s,%s,%s,%s,%s)\"\"\" \n", - " \n", - " record_to_insert = (name,street,zipcode,city,sector)\n", - " cur.execute(postgres_insert_query, record_to_insert) \n", - " \n", + " # get data from dataframe\n", + " name = str(df[\"Name\"].iloc[i])\n", + " street = str(df[\"Straße\"].iloc[i])\n", + " zipcode = int(df[\"PLZ\"].iloc[i])\n", + " city = str(df[\"Stadt\"].iloc[i])\n", + " sector = str(df[\"Branche\"].iloc[i])\n", + "\n", + " postgres_insert_query = \"\"\" INSERT INTO company (NAME,STREET, ZIP, CITY,SECTOR) VALUES (%s,%s,%s,%s,%s)\"\"\"\n", + "\n", + " record_to_insert = (name, street, zipcode, city, sector)\n", + " cur.execute(postgres_insert_query, record_to_insert)\n", + "\n", "conn.commit()\n", "conn.close()" ] diff --git a/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/03-1_Publish_Finance_Testdata_BASF.ipynb b/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/03-1_Publish_Finance_Testdata_BASF.ipynb index 57f3586..948939e 100644 --- a/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/03-1_Publish_Finance_Testdata_BASF.ipynb +++ b/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/03-1_Publish_Finance_Testdata_BASF.ipynb @@ -23,6 +23,7 @@ "import numpy as np\n", "import pandas as pd\n", "import ipywidgets as widgets\n", + "\n", "pd.options.plotting.backend = \"plotly\"" ] }, @@ -33,7 +34,7 @@ "metadata": {}, "outputs": [], "source": [ - "df=pd.read_csv('BASF_Data_NewOrder.csv', sep=';', decimal=\",\") " + "df = pd.read_csv(\"BASF_Data_NewOrder.csv\", sep=\";\", decimal=\",\")" ] }, { @@ -528,10 +529,8 @@ ], "source": [ "conn = psycopg2.connect(\n", - " host=\"localhost\",\n", - " database=\"transparenz\",\n", - " user=\"postgres\",\n", - " password=\"postgres\")\n", + " host=\"localhost\", database=\"transparenz\", user=\"postgres\", password=\"postgres\"\n", + ")\n", "\n", "print(\"Database connected successfully\")" ] @@ -553,20 +552,22 @@ "source": [ "cur = conn.cursor()\n", "\n", - "PK_ID=8 #BASF hat den PK 8, deshalb wird dieser manuell hinzugefügt\n", + "PK_ID = 8 # BASF hat den PK 8, deshalb wird dieser manuell hinzugefügt\n", "\n", "\n", "for i in range(len(df)):\n", - " #get data from dataframe\n", - " kind_of=str(df['Metrik'].iloc[i])\n", - " date=str(df['Datum'].iloc[i])\n", - " amount=float(df['Summe [Milliarden €]'].iloc[i])\n", - " \n", - " postgres_insert_query = \"\"\" INSERT INTO finance (company_id,kind_of, date, sum) VALUES (%s,%s,%s,%s)\"\"\" \n", - " record_to_insert = (PK_ID,kind_of,date,amount)\n", - " cur.execute(postgres_insert_query, record_to_insert) \n", - " #print(postgres_insert_query, record_to_insert)\n", - " \n", + " # get data from dataframe\n", + " kind_of = str(df[\"Metrik\"].iloc[i])\n", + " date = str(df[\"Datum\"].iloc[i])\n", + " amount = float(df[\"Summe [Milliarden €]\"].iloc[i])\n", + "\n", + " postgres_insert_query = (\n", + " \"\"\" INSERT INTO finance (company_id,kind_of, date, sum) VALUES (%s,%s,%s,%s)\"\"\"\n", + " )\n", + " record_to_insert = (PK_ID, kind_of, date, amount)\n", + " cur.execute(postgres_insert_query, record_to_insert)\n", + " # print(postgres_insert_query, record_to_insert)\n", + "\n", "conn.commit()\n", "conn.close()" ] diff --git a/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/03-2_Publish-Finance_Testdata_Telekom.ipynb b/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/03-2_Publish-Finance_Testdata_Telekom.ipynb index 27330e1..b90865e 100644 --- a/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/03-2_Publish-Finance_Testdata_Telekom.ipynb +++ b/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/03-2_Publish-Finance_Testdata_Telekom.ipynb @@ -10,6 +10,7 @@ "import numpy as np\n", "import pandas as pd\n", "import ipywidgets as widgets\n", + "\n", "pd.options.plotting.backend = \"plotly\"" ] }, @@ -20,7 +21,7 @@ "metadata": {}, "outputs": [], "source": [ - "df=pd.read_csv('Telekom_Data_NewOrder.csv', sep=';',decimal=',') " + "df = pd.read_csv(\"Telekom_Data_NewOrder.csv\", sep=\";\", decimal=\",\")" ] }, { @@ -403,10 +404,8 @@ ], "source": [ "conn = psycopg2.connect(\n", - " host=\"localhost\",\n", - " database=\"transparenz\",\n", - " user=\"postgres\",\n", - " password=\"postgres\")\n", + " host=\"localhost\", database=\"transparenz\", user=\"postgres\", password=\"postgres\"\n", + ")\n", "\n", "print(\"Database connected successfully\")" ] @@ -428,20 +427,22 @@ "source": [ "cur = conn.cursor()\n", "\n", - "PK_ID=5 #BASF hat den PK 8, deshalb wird dieser manuell hinzugefügt\n", + "PK_ID = 5 # BASF hat den PK 8, deshalb wird dieser manuell hinzugefügt\n", "\n", "\n", "for i in range(len(df)):\n", - " #get data from dataframe\n", - " kind_of=str(df['Metrik'].iloc[i])\n", - " date=str(df['Datum'].iloc[i])\n", - " amount=float(df['Summe [Milliarden €]'].iloc[i])\n", - " \n", - " postgres_insert_query = \"\"\" INSERT INTO finance (company_id,kind_of, date, sum) VALUES (%s,%s,%s,%s)\"\"\" \n", - " record_to_insert = (PK_ID,kind_of,date,amount)\n", - " cur.execute(postgres_insert_query, record_to_insert) \n", - " #print(postgres_insert_query, record_to_insert)\n", - " \n", + " # get data from dataframe\n", + " kind_of = str(df[\"Metrik\"].iloc[i])\n", + " date = str(df[\"Datum\"].iloc[i])\n", + " amount = float(df[\"Summe [Milliarden €]\"].iloc[i])\n", + "\n", + " postgres_insert_query = (\n", + " \"\"\" INSERT INTO finance (company_id,kind_of, date, sum) VALUES (%s,%s,%s,%s)\"\"\"\n", + " )\n", + " record_to_insert = (PK_ID, kind_of, date, amount)\n", + " cur.execute(postgres_insert_query, record_to_insert)\n", + " # print(postgres_insert_query, record_to_insert)\n", + "\n", "conn.commit()\n", "conn.close()" ] diff --git a/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/03-2_Publish_Finance_Testdata_EON.ipynb b/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/03-2_Publish_Finance_Testdata_EON.ipynb index 96d3223..e4df914 100644 --- a/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/03-2_Publish_Finance_Testdata_EON.ipynb +++ b/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/03-2_Publish_Finance_Testdata_EON.ipynb @@ -10,6 +10,7 @@ "import numpy as np\n", "import pandas as pd\n", "import ipywidgets as widgets\n", + "\n", "pd.options.plotting.backend = \"plotly\"" ] }, @@ -20,7 +21,7 @@ "metadata": {}, "outputs": [], "source": [ - "df=pd.read_csv('EON_Data_NewOrder.csv', sep=';',decimal=',') " + "df = pd.read_csv(\"EON_Data_NewOrder.csv\", sep=\";\", decimal=\",\")" ] }, { @@ -340,10 +341,8 @@ ], "source": [ "conn = psycopg2.connect(\n", - " host=\"localhost\",\n", - " database=\"transparenz\",\n", - " user=\"postgres\",\n", - " password=\"postgres\")\n", + " host=\"localhost\", database=\"transparenz\", user=\"postgres\", password=\"postgres\"\n", + ")\n", "\n", "print(\"Database connected successfully\")" ] @@ -365,20 +364,22 @@ "source": [ "cur = conn.cursor()\n", "\n", - "PK_ID=9 #BASF hat den PK 8, deshalb wird dieser manuell hinzugefügt\n", + "PK_ID = 9 # BASF hat den PK 8, deshalb wird dieser manuell hinzugefügt\n", "\n", "\n", "for i in range(len(df)):\n", - " #get data from dataframe\n", - " kind_of=str(df['Metrik'].iloc[i])\n", - " date=str(df['Datum'].iloc[i])\n", - " amount=float(df['Summe [Milliarden €]'].iloc[i])\n", - " \n", - " postgres_insert_query = \"\"\" INSERT INTO finance (company_id,kind_of, date, sum) VALUES (%s,%s,%s,%s)\"\"\" \n", - " record_to_insert = (PK_ID,kind_of,date,amount)\n", - " cur.execute(postgres_insert_query, record_to_insert) \n", - " #print(postgres_insert_query, record_to_insert)\n", - " \n", + " # get data from dataframe\n", + " kind_of = str(df[\"Metrik\"].iloc[i])\n", + " date = str(df[\"Datum\"].iloc[i])\n", + " amount = float(df[\"Summe [Milliarden €]\"].iloc[i])\n", + "\n", + " postgres_insert_query = (\n", + " \"\"\" INSERT INTO finance (company_id,kind_of, date, sum) VALUES (%s,%s,%s,%s)\"\"\"\n", + " )\n", + " record_to_insert = (PK_ID, kind_of, date, amount)\n", + " cur.execute(postgres_insert_query, record_to_insert)\n", + " # print(postgres_insert_query, record_to_insert)\n", + "\n", "conn.commit()\n", "conn.close()" ] diff --git a/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/04_First_Query.ipynb b/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/04_First_Query.ipynb index 0901098..bd5ec50 100644 --- a/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/04_First_Query.ipynb +++ b/documentations/seminararbeiten/Datenspeicherung/Jupyter/Notebooks_with_SQL_and_preliminary_data/04_First_Query.ipynb @@ -17,6 +17,7 @@ "import numpy as np\n", "import pandas as pd\n", "import ipywidgets as widgets\n", + "\n", "pd.options.plotting.backend = \"plotly\"" ] }, @@ -48,7 +49,7 @@ "metadata": {}, "outputs": [], "source": [ - "#load sql extension\n", + "# load sql extension\n", "%load_ext sql" ] }, diff --git a/documentations/seminararbeiten/Datenvisualisierung/Diagramme_plotly/Visualisierung_plotly.ipynb b/documentations/seminararbeiten/Datenvisualisierung/Diagramme_plotly/Visualisierung_plotly.ipynb index 3313ea4..d22adc6 100644 --- a/documentations/seminararbeiten/Datenvisualisierung/Diagramme_plotly/Visualisierung_plotly.ipynb +++ b/documentations/seminararbeiten/Datenvisualisierung/Diagramme_plotly/Visualisierung_plotly.ipynb @@ -79,16 +79,20 @@ "import numpy as np\n", "\n", "# create sample data for one company\n", - "data = {'Jahr': ['2017', '2018', '2019', '2020', '2021', '2022'],\n", - " 'Umsatz': [19, 23, 30, 42, 37, 45]}\n", + "data = {\n", + " \"Jahr\": [\"2017\", \"2018\", \"2019\", \"2020\", \"2021\", \"2022\"],\n", + " \"Umsatz\": [19, 23, 30, 42, 37, 45],\n", + "}\n", "# save as pandas dataframe\n", "df = pd.DataFrame.from_dict(data)\n", "\n", "# create sample data for a second company\n", - "data2 = {'Jahr': ['2017', '2018', '2019', '2020', '2021', '2022'],\n", - " 'Umsatz': [15, 21, 33, 28, 27, 30]}\n", + "data2 = {\n", + " \"Jahr\": [\"2017\", \"2018\", \"2019\", \"2020\", \"2021\", \"2022\"],\n", + " \"Umsatz\": [15, 21, 33, 28, 27, 30],\n", + "}\n", "# save as pandas dataframe\n", - "df2 = pd.DataFrame.from_dict(data2)\n" + "df2 = pd.DataFrame.from_dict(data2)" ] }, { @@ -1006,12 +1010,12 @@ ], "source": [ "# create bar plot\n", - "fig_saeule = px.bar(df, y = 'Umsatz', labels = {'index': '', 'Umsatz': ''})\n", + "fig_saeule = px.bar(df, y=\"Umsatz\", labels={\"index\": \"\", \"Umsatz\": \"\"})\n", "# set color\n", - "fig_saeule.update_traces(marker_color = '#00509b')\n", + "fig_saeule.update_traces(marker_color=\"#00509b\")\n", "\n", "# save as image\n", - "fig_saeule.write_image('Saeule.png')\n", + "fig_saeule.write_image(\"Saeule.png\")\n", "# show in notebook\n", "fig_saeule.show()" ] @@ -1924,12 +1928,12 @@ ], "source": [ "# create horizontal bar plot\n", - "fig_balken = px.bar(df, x = 'Umsatz', labels = {'index': '', 'Umsatz': ''}, orientation='h')\n", + "fig_balken = px.bar(df, x=\"Umsatz\", labels={\"index\": \"\", \"Umsatz\": \"\"}, orientation=\"h\")\n", "# set color\n", - "fig_balken.update_traces(marker_color = '#00509b')\n", + "fig_balken.update_traces(marker_color=\"#00509b\")\n", "\n", "# save as image\n", - "fig_balken.write_image('Balken.png')\n", + "fig_balken.write_image(\"Balken.png\")\n", "# show in notebook\n", "fig_balken.show()" ] @@ -2841,14 +2845,19 @@ } ], "source": [ - "\n", "# sreate bar plot with named labels and title\n", - "fig_saeule_titel = px.bar(df, x = 'Jahr', y = 'Umsatz', labels = {'Umsatz': 'Umsatz in Mio.€'}, title = 'Umsatzentwicklung von Unternehmen A')\n", + "fig_saeule_titel = px.bar(\n", + " df,\n", + " x=\"Jahr\",\n", + " y=\"Umsatz\",\n", + " labels={\"Umsatz\": \"Umsatz in Mio.€\"},\n", + " title=\"Umsatzentwicklung von Unternehmen A\",\n", + ")\n", "# set color\n", - "fig_saeule_titel.update_traces(marker_color = '#00509b')\n", + "fig_saeule_titel.update_traces(marker_color=\"#00509b\")\n", "\n", "# save as image\n", - "fig_saeule_titel.write_image('Saeule_Titel.png')\n", + "fig_saeule_titel.write_image(\"Saeule_Titel.png\")\n", "# show in notebook\n", "fig_saeule_titel.show()" ] @@ -3764,27 +3773,31 @@ ], "source": [ "# create figure\n", - "fig_saeule_zwei= go.Figure()\n", + "fig_saeule_zwei = go.Figure()\n", "\n", "# add trace for company 1\n", - "fig_saeule_zwei.add_trace(go.Bar(x = df['Jahr'], y = df['Umsatz'], name = 'A', marker_color=\"#00509b\"))\n", + "fig_saeule_zwei.add_trace(\n", + " go.Bar(x=df[\"Jahr\"], y=df[\"Umsatz\"], name=\"A\", marker_color=\"#00509b\")\n", + ")\n", "\n", "# add trace for company 2\n", - "fig_saeule_zwei.add_trace(go.Bar(x = df2['Jahr'], y = df2['Umsatz'], name = 'B', marker_color = \"#6f7072\"))\n", + "fig_saeule_zwei.add_trace(\n", + " go.Bar(x=df2[\"Jahr\"], y=df2[\"Umsatz\"], name=\"B\", marker_color=\"#6f7072\")\n", + ")\n", "\n", "# update layout to grouped\n", - "fig_saeule_zwei.update_layout(barmode='group')\n", + "fig_saeule_zwei.update_layout(barmode=\"group\")\n", "\n", "# set title and labels\n", "fig_saeule_zwei.update_layout(\n", - " title = \"Vergleich der Umsatzentwicklung\",\n", - " xaxis_title = \"Jahr\",\n", - " yaxis_title = \"Umsatz in Mio.€\",\n", - " legend_title = \"Unternehmen\",\n", + " title=\"Vergleich der Umsatzentwicklung\",\n", + " xaxis_title=\"Jahr\",\n", + " yaxis_title=\"Umsatz in Mio.€\",\n", + " legend_title=\"Unternehmen\",\n", ")\n", "\n", "# save as image\n", - "fig_saeule_zwei.write_image('Saeule_Zwei.png')\n", + "fig_saeule_zwei.write_image(\"Saeule_Zwei.png\")\n", "# show in notebook\n", "fig_saeule_zwei.show()" ] @@ -4697,9 +4710,9 @@ ], "source": [ "# create line plot\n", - "fig_line = px.line(df, y = df['Umsatz'], labels = {'index':'', 'Umsatz': ''})\n", + "fig_line = px.line(df, y=df[\"Umsatz\"], labels={\"index\": \"\", \"Umsatz\": \"\"})\n", "# set color\n", - "fig_line.update_traces(line_color = '#00509b')\n", + "fig_line.update_traces(line_color=\"#00509b\")\n", "# save as image\n", "fig_line.write_image(\"Linie.png\")\n", "# show in network\n", @@ -5617,15 +5630,31 @@ "# create figure\n", "fig_line = go.Figure()\n", "# add trace for company 1\n", - "fig_line.add_trace(go.Scatter(x = df['Jahr'], y = df['Umsatz'], name = 'A', line_color = '#00509b', marker_color = '#00509b'))\n", + "fig_line.add_trace(\n", + " go.Scatter(\n", + " x=df[\"Jahr\"],\n", + " y=df[\"Umsatz\"],\n", + " name=\"A\",\n", + " line_color=\"#00509b\",\n", + " marker_color=\"#00509b\",\n", + " )\n", + ")\n", "# add trace for company 2\n", - "fig_line.add_trace(go.Scatter(x = df2['Jahr'], y = df2['Umsatz'], name = 'B', line_color = '#6f7072', marker_color = '#6f7072'))\n", + "fig_line.add_trace(\n", + " go.Scatter(\n", + " x=df2[\"Jahr\"],\n", + " y=df2[\"Umsatz\"],\n", + " name=\"B\",\n", + " line_color=\"#6f7072\",\n", + " marker_color=\"#6f7072\",\n", + " )\n", + ")\n", "# set title and labels\n", "fig_line.update_layout(\n", - " title = \"Vergleich der Umsatzentwicklung\",\n", - " xaxis_title = \"Jahr\",\n", - " yaxis_title = \"Umsatz in Mio.€\",\n", - " legend_title = \"Unternehmen\",\n", + " title=\"Vergleich der Umsatzentwicklung\",\n", + " xaxis_title=\"Jahr\",\n", + " yaxis_title=\"Umsatz in Mio.€\",\n", + " legend_title=\"Unternehmen\",\n", ")\n", "# save as image\n", "fig_line.write_image(\"Linie_Vergleich.png\")\n", @@ -6534,13 +6563,15 @@ ], "source": [ "# create sample data\n", - "x = [1,2,2,3,5,5,6,6,6,7,8,8,8,10,10,10,7,4,3,4,9,6]\n", - "y = [1,2,3,3,3,4,5,5,6,6,7,7,8,8,9,10,2,10,8,6,6,4]\n", + "x = [1, 2, 2, 3, 5, 5, 6, 6, 6, 7, 8, 8, 8, 10, 10, 10, 7, 4, 3, 4, 9, 6]\n", + "y = [1, 2, 3, 3, 3, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 2, 10, 8, 6, 6, 4]\n", "\n", "# create scatter plot\n", - "scatter = go.Figure(data=go.Scatter(x=x, y=y, mode='markers', marker = {'color': '#00509b'}))\n", + "scatter = go.Figure(\n", + " data=go.Scatter(x=x, y=y, mode=\"markers\", marker={\"color\": \"#00509b\"})\n", + ")\n", "# save as image\n", - "scatter.write_image('Streudiagramm.png')\n", + "scatter.write_image(\"Streudiagramm.png\")\n", "# show in netbook\n", "scatter.show()" ] @@ -7445,13 +7476,15 @@ "source": [ "# create sample data\n", "sentiment = [0.1, 0.3, 0.6]\n", - "scores = ['negativ', 'neutral', 'positiv']\n", + "scores = [\"negativ\", \"neutral\", \"positiv\"]\n", "# create pie chart\n", - "fig_kreis = px.pie(values=sentiment, color = scores, color_discrete_map={'negativ':'black',\n", - " 'neutral':'#6f7072',\n", - " 'positiv':'#00509b'})\n", + "fig_kreis = px.pie(\n", + " values=sentiment,\n", + " color=scores,\n", + " color_discrete_map={\"negativ\": \"black\", \"neutral\": \"#6f7072\", \"positiv\": \"#00509b\"},\n", + ")\n", "# save as image\n", - "fig_kreis.write_image('Kreis.png')\n", + "fig_kreis.write_image(\"Kreis.png\")\n", "# show in notebook\n", "fig_kreis.show()" ] @@ -8356,14 +8389,22 @@ } ], "source": [ - "# create figure \n", - "fig_sentiment = px.pie(values=sentiment, names=scores, color = scores, color_discrete_map={'negativ':'lightcoral',\n", - " 'neutral':'moccasin',\n", - " 'positiv':'darkseagreen'}, title = 'Stimmungsanalyse basierend auf Nachrichtenartikel X')\n", + "# create figure\n", + "fig_sentiment = px.pie(\n", + " values=sentiment,\n", + " names=scores,\n", + " color=scores,\n", + " color_discrete_map={\n", + " \"negativ\": \"lightcoral\",\n", + " \"neutral\": \"moccasin\",\n", + " \"positiv\": \"darkseagreen\",\n", + " },\n", + " title=\"Stimmungsanalyse basierend auf Nachrichtenartikel X\",\n", + ")\n", "# change line color\n", - "fig_sentiment.update_traces(marker = dict(line=dict(color='#000000', width=2)))\n", + "fig_sentiment.update_traces(marker=dict(line=dict(color=\"#000000\", width=2)))\n", "# save as image\n", - "fig_sentiment.write_image('Kreis_Sentiment.png')\n", + "fig_sentiment.write_image(\"Kreis_Sentiment.png\")\n", "# show in notebook\n", "fig_sentiment.show()" ] @@ -8408,34 +8449,39 @@ "compound = 0.75\n", "angle = (compound * 100 - 50) * 1.8\n", "\n", - "x_length = (np.sin(np.radians(angle))*1.2)/2\n", - "y_length = (np.cos(np.radians(angle))*1.2)/2\n", + "x_length = (np.sin(np.radians(angle)) * 1.2) / 2\n", + "y_length = (np.cos(np.radians(angle)) * 1.2) / 2\n", "\n", "if compound < 0.5:\n", " y_tail = y_length\n", " y_head = -y_length\n", " x_tail = x_length\n", - " x_head = - x_length\n", + " x_head = -x_length\n", "else:\n", " y_tail = -y_length\n", " y_head = y_length\n", - " x_tail = - x_length\n", + " x_tail = -x_length\n", " x_head = x_length\n", "\n", "\n", "dx = x_head - x_tail\n", "dy = y_head - y_tail\n", "\n", - "fig, ax = plt.subplots() \n", - "arrow = patches.FancyArrowPatch((x_tail, y_tail), (x_head, y_head),\n", - " mutation_scale=100, ec = 'darkseagreen', fc = 'darkseagreen')\n", + "fig, ax = plt.subplots()\n", + "arrow = patches.FancyArrowPatch(\n", + " (x_tail, y_tail),\n", + " (x_head, y_head),\n", + " mutation_scale=100,\n", + " ec=\"darkseagreen\",\n", + " fc=\"darkseagreen\",\n", + ")\n", "ax.add_patch(arrow)\n", - "plt.axis('off')\n", + "plt.axis(\"off\")\n", "ax.set_xlim([-1, 1])\n", "ax.set_ylim([-1, 1])\n", - "ax.set_title('Aktueller Stimmungstrend', fontsize=20)\n", - "fig.suptitle(' ', fontsize=24)\n", - "fig.savefig('Pfeil.png')\n" + "ax.set_title(\"Aktueller Stimmungstrend\", fontsize=20)\n", + "fig.suptitle(\" \", fontsize=24)\n", + "fig.savefig(\"Pfeil.png\")" ] } ], diff --git a/documentations/seminararbeiten/Datenvisualisierung/Diagramme_pyvis/Visualisierung_networkx_pyvis.ipynb b/documentations/seminararbeiten/Datenvisualisierung/Diagramme_pyvis/Visualisierung_networkx_pyvis.ipynb index e432baf..ae141a4 100644 --- a/documentations/seminararbeiten/Datenvisualisierung/Diagramme_pyvis/Visualisierung_networkx_pyvis.ipynb +++ b/documentations/seminararbeiten/Datenvisualisierung/Diagramme_pyvis/Visualisierung_networkx_pyvis.ipynb @@ -92,39 +92,40 @@ "G = nx.MultiGraph()\n", "\n", "# create list of nodes with attributes as a dictionary\n", - "nodes = [(1, {'label': 'Firma 1', 'branche': 'Branche 1', 'land': 'Land 1'}), \n", - " (2, {'label': 'Firma 2', 'branche': 'Branche 1', 'land': 'Land 2'}),\n", - " (3, {'label': 'Firma 3', 'branche': 'Branche 1', 'land': 'Land 3'}),\n", - " (4, {'label': 'Firma 4', 'branche': 'Branche 2', 'land': 'Land 4'}),\n", - " (5, {'label': 'Firma 5', 'branche': 'Branche 2', 'land': 'Land 1'}),\n", - " (6, {'label': 'Firma 6', 'branche': 'Branche 2', 'land': 'Land 3'}),\n", - " (7, {'label': 'Firma 7', 'branche': 'Branche 3', 'land': 'Land 3'}),\n", - " (8, {'label': 'Firma 8', 'branche': 'Branche 3', 'land': 'Land 2'}),\n", - " (9, {'label': 'Firma 9', 'branche': 'Branche 4', 'land': 'Land 1'}),\n", - " (10, {'label': 'Firma 10', 'branche': 'Branche 4', 'land': 'Land 4'}),\n", - " ]\n", + "nodes = [\n", + " (1, {\"label\": \"Firma 1\", \"branche\": \"Branche 1\", \"land\": \"Land 1\"}),\n", + " (2, {\"label\": \"Firma 2\", \"branche\": \"Branche 1\", \"land\": \"Land 2\"}),\n", + " (3, {\"label\": \"Firma 3\", \"branche\": \"Branche 1\", \"land\": \"Land 3\"}),\n", + " (4, {\"label\": \"Firma 4\", \"branche\": \"Branche 2\", \"land\": \"Land 4\"}),\n", + " (5, {\"label\": \"Firma 5\", \"branche\": \"Branche 2\", \"land\": \"Land 1\"}),\n", + " (6, {\"label\": \"Firma 6\", \"branche\": \"Branche 2\", \"land\": \"Land 3\"}),\n", + " (7, {\"label\": \"Firma 7\", \"branche\": \"Branche 3\", \"land\": \"Land 3\"}),\n", + " (8, {\"label\": \"Firma 8\", \"branche\": \"Branche 3\", \"land\": \"Land 2\"}),\n", + " (9, {\"label\": \"Firma 9\", \"branche\": \"Branche 4\", \"land\": \"Land 1\"}),\n", + " (10, {\"label\": \"Firma 10\", \"branche\": \"Branche 4\", \"land\": \"Land 4\"}),\n", + "]\n", "\n", "# create list of edges with attributes as a dictionary\n", "edges = [\n", - " (1, 2, {'label': 'beziehung1'}), \n", - " (5, 2, {'label': 'beziehung2'}), \n", - " (1, 3, {'label': 'beziehung3'}), \n", - " (2, 4, {'label': 'beziehung3'}), \n", - " (2, 6, {'label': 'beziehung4'}), \n", - " (2, 5, {'label': 'beziehung4'}),\n", - " (8, 10, {'label': 'beziehung4'}),\n", - " (9, 10, {'label': 'beziehung3'}), \n", - " (3, 7, {'label': 'beziehung2'}), \n", - " (6, 8, {'label': 'beziehung1'}), \n", - " (6, 9, {'label': 'beziehung1'}), \n", - " (1, 6, {'label': 'beziehung2'})\n", - " ]\n", + " (1, 2, {\"label\": \"beziehung1\"}),\n", + " (5, 2, {\"label\": \"beziehung2\"}),\n", + " (1, 3, {\"label\": \"beziehung3\"}),\n", + " (2, 4, {\"label\": \"beziehung3\"}),\n", + " (2, 6, {\"label\": \"beziehung4\"}),\n", + " (2, 5, {\"label\": \"beziehung4\"}),\n", + " (8, 10, {\"label\": \"beziehung4\"}),\n", + " (9, 10, {\"label\": \"beziehung3\"}),\n", + " (3, 7, {\"label\": \"beziehung2\"}),\n", + " (6, 8, {\"label\": \"beziehung1\"}),\n", + " (6, 9, {\"label\": \"beziehung1\"}),\n", + " (1, 6, {\"label\": \"beziehung2\"}),\n", + "]\n", "\n", "# add nodes to the graph\n", "G.add_nodes_from(nodes)\n", "\n", "# add edges to the graph, to hide arrow heads of the edges use option arrows = 'false'\n", - "G.add_edges_from(edges, arrows = 'false')" + "G.add_edges_from(edges, arrows=\"false\")" ] }, { @@ -147,7 +148,9 @@ "outputs": [], "source": [ "for node in G.nodes:\n", - " G.nodes[node]['title'] = G.nodes[node]['label'] + '\\n' + 'Anzahl Verbindungen: ' + str(G.degree[node])" + " G.nodes[node][\"title\"] = (\n", + " G.nodes[node][\"label\"] + \"\\n\" + \"Anzahl Verbindungen: \" + str(G.degree[node])\n", + " )" ] }, { @@ -206,16 +209,16 @@ "outputs": [], "source": [ "# scaling the size of the nodes by 5*degree\n", - "scale = 5 \n", + "scale = 5\n", "\n", "# getting all nodes and their number of connections\n", "d = dict(G.degree)\n", "\n", "# updating dict\n", - "d.update((x, scale*(y+1)) for x, y in d.items())\n", + "d.update((x, scale * (y + 1)) for x, y in d.items())\n", "\n", "# setting size attribute according to created dictionary\n", - "nx.set_node_attributes(G,d,'size')" + "nx.set_node_attributes(G, d, \"size\")" ] }, { @@ -236,10 +239,17 @@ "from pyvis.network import Network\n", "\n", "# create network, 'directed = true' allows multiple edges between nodes\n", - "nt = Network('1000px', '1000px', neighborhood_highlight=True, notebook=True, cdn_resources='in_line', directed=True)\n", + "nt = Network(\n", + " \"1000px\",\n", + " \"1000px\",\n", + " neighborhood_highlight=True,\n", + " notebook=True,\n", + " cdn_resources=\"in_line\",\n", + " directed=True,\n", + ")\n", "\n", "# populates the nodes and edges data structures\n", - "nt.from_nx(G)\n" + "nt.from_nx(G)" ] }, { @@ -275,25 +285,28 @@ "outputs": [], "source": [ "# define new function that sets the color of the nodes\n", - "def color_type (net, type):\n", - " ''' color_type sets the color of a network depending on an attribute of the nodes\n", - " net: network\n", - " type: 'branche' or 'land' '''\n", + "def color_type(net, type):\n", + " \"\"\"color_type sets the color of a network depending on an attribute of the nodes\n", + " net: network\n", + " type: 'branche' or 'land'\"\"\"\n", "\n", - " colormap = {'Branche 1': '#87CEEB',\n", - " 'Branche 2': '#0f4c81',\n", - " 'Branche 3': '#B2FFFF', \n", - " 'Branche 4': '#191970',\n", - " 'Land 1': '#F8D568', \n", - " 'Land 2': '#F58025', \n", - " 'Land 3': '#CC5500', \n", - " 'Land 4': '#C0362C'}\n", + " colormap = {\n", + " \"Branche 1\": \"#87CEEB\",\n", + " \"Branche 2\": \"#0f4c81\",\n", + " \"Branche 3\": \"#B2FFFF\",\n", + " \"Branche 4\": \"#191970\",\n", + " \"Land 1\": \"#F8D568\",\n", + " \"Land 2\": \"#F58025\",\n", + " \"Land 3\": \"#CC5500\",\n", + " \"Land 4\": \"#C0362C\",\n", + " }\n", " for node in net.nodes:\n", - " node['color'] = colormap[node[type]]\n", + " node[\"color\"] = colormap[node[type]]\n", " return net\n", "\n", + "\n", "# set color based on attribute\n", - "nt = color_type(nt, 'branche')" + "nt = color_type(nt, \"branche\")" ] }, { @@ -310,8 +323,8 @@ "metadata": {}, "outputs": [], "source": [ - "# set all edge colors \n", - "nt.options.edges.color = 'grey'" + "# set all edge colors\n", + "nt.options.edges.color = \"grey\"" ] }, { @@ -360,13 +373,20 @@ ], "source": [ "# activate physics options to try out different solver\n", - "#nt.show_buttons(filter_=['physics'])\n", + "# nt.show_buttons(filter_=['physics'])\n", "\n", "# set physics options\n", - "nt.barnes_hut(gravity=-8000, central_gravity=0.3, spring_length=200, spring_strength=0.1, damping=0.09, overlap=0)\n", + "nt.barnes_hut(\n", + " gravity=-8000,\n", + " central_gravity=0.3,\n", + " spring_length=200,\n", + " spring_strength=0.1,\n", + " damping=0.09,\n", + " overlap=0,\n", + ")\n", "\n", "# create html and save in same folder\n", - "nt.show('Netzwerk_Verflechtungsanalyse.html')" + "nt.show(\"Netzwerk_Verflechtungsanalyse.html\")" ] }, { @@ -416,16 +436,16 @@ "from pyvis.network import Network\n", "\n", "sn = nx.Graph()\n", - "sn_nodes = [1,2,3,4,5,6,7]\n", - "sn_edges = [(1,4),(2,4),(3,4),(4,5),(5,6),(5,7)]\n", + "sn_nodes = [1, 2, 3, 4, 5, 6, 7]\n", + "sn_edges = [(1, 4), (2, 4), (3, 4), (4, 5), (5, 6), (5, 7)]\n", "\n", - "sn.add_nodes_from(sn_nodes, color = '#00509b')\n", + "sn.add_nodes_from(sn_nodes, color=\"#00509b\")\n", "sn.add_edges_from(sn_edges)\n", "\n", - "net = Network('1000px', '1000px', notebook=True, cdn_resources='in_line')\n", + "net = Network(\"1000px\", \"1000px\", notebook=True, cdn_resources=\"in_line\")\n", "\n", "net.from_nx(sn)\n", - "net.show('Netzwerk.html')\n" + "net.show(\"Netzwerk.html\")" ] } ], diff --git a/documentations/seminararbeiten/Datenvisualisierung/Erstes_Beispiel_Netzwerk/networkx_pyvis.ipynb b/documentations/seminararbeiten/Datenvisualisierung/Erstes_Beispiel_Netzwerk/networkx_pyvis.ipynb index 776b62a..7312223 100644 --- a/documentations/seminararbeiten/Datenvisualisierung/Erstes_Beispiel_Netzwerk/networkx_pyvis.ipynb +++ b/documentations/seminararbeiten/Datenvisualisierung/Erstes_Beispiel_Netzwerk/networkx_pyvis.ipynb @@ -82,19 +82,25 @@ "import pandas as pd\n", "\n", "# create dataframe based on the sample data\n", - "df_nodes = pd.read_csv('nodes.csv', sep = ';')\n", + "df_nodes = pd.read_csv(\"nodes.csv\", sep=\";\")\n", "\n", "# define shape based on the type\n", - "node_shape = {'Company': 'dot', 'Person': 'triangle'}\n", - "df_nodes['shape'] = df_nodes['type'].map(node_shape)\n", + "node_shape = {\"Company\": \"dot\", \"Person\": \"triangle\"}\n", + "df_nodes[\"shape\"] = df_nodes[\"type\"].map(node_shape)\n", "\n", "# define color based on branche\n", - "node_color = {'Branche 1': ' #f3e8eeff', 'Branche 2': '#bacdb0ff', 'Branche 3': '#729b79ff', 'Branche 4': '#475b63ff', 'Branche 5': '#2e2c2fff'}\n", - "df_nodes['color'] = df_nodes['branche'].map(node_color)\n", + "node_color = {\n", + " \"Branche 1\": \" #f3e8eeff\",\n", + " \"Branche 2\": \"#bacdb0ff\",\n", + " \"Branche 3\": \"#729b79ff\",\n", + " \"Branche 4\": \"#475b63ff\",\n", + " \"Branche 5\": \"#2e2c2fff\",\n", + "}\n", + "df_nodes[\"color\"] = df_nodes[\"branche\"].map(node_color)\n", "\n", "# add information column that can be used for the mouse over in the graph\n", - "df_nodes = df_nodes.fillna('')\n", - "df_nodes['title'] = df_nodes['label'] + '\\n' + df_nodes['branche']\n", + "df_nodes = df_nodes.fillna(\"\")\n", + "df_nodes[\"title\"] = df_nodes[\"label\"] + \"\\n\" + df_nodes[\"branche\"]\n", "\n", "# show first five entries of the dataframe\n", "print(df_nodes.head())" @@ -127,7 +133,7 @@ ], "source": [ "# create dataframe based on the sample data\n", - "df_edges = pd.read_csv('edges.csv', sep = ';')\n", + "df_edges = pd.read_csv(\"edges.csv\", sep=\";\")\n", "\n", "# show first five entries of the dataframe\n", "print(df_edges.head())" @@ -157,10 +163,10 @@ "graph = nx.MultiGraph()\n", "\n", "# create edges from dataframe\n", - "graph = nx.from_pandas_edgelist(df_edges, source = 'from', target = 'to', edge_attr= 'label')\n", + "graph = nx.from_pandas_edgelist(df_edges, source=\"from\", target=\"to\", edge_attr=\"label\")\n", "\n", "# update node attributes from dataframe\n", - "nodes_attr = df_nodes.set_index('id').to_dict(orient = 'index')\n", + "nodes_attr = df_nodes.set_index(\"id\").to_dict(orient=\"index\")\n", "nx.set_node_attributes(graph, nodes_attr)" ] }, @@ -193,11 +199,11 @@ " # create empty dictionary\n", " dict = {}\n", " # get node id\n", - " dict['id'] = node\n", + " dict[\"id\"] = node\n", " # get k-neighbours for k=1,2,3, subtract -1 since output of single_source_shortest_path_length contains node itself\n", - " dict['k=1'] = len(nx.single_source_shortest_path_length(graph, node, cutoff=1))-1\n", - " dict['k=2'] = len(nx.single_source_shortest_path_length(graph, node, cutoff=2))-1\n", - " dict['k=3'] = len(nx.single_source_shortest_path_length(graph, node, cutoff=3))-1\n", + " dict[\"k=1\"] = len(nx.single_source_shortest_path_length(graph, node, cutoff=1)) - 1\n", + " dict[\"k=2\"] = len(nx.single_source_shortest_path_length(graph, node, cutoff=2)) - 1\n", + " dict[\"k=3\"] = len(nx.single_source_shortest_path_length(graph, node, cutoff=3)) - 1\n", " # append list for each node\n", " k_neighbours.append(dict)\n", "\n", @@ -225,40 +231,45 @@ "from pyvis.network import Network\n", "\n", "# initiate network\n", - "net = Network(directed=False, neighborhood_highlight=True, bgcolor = \"white\", font_color=\"black\")\n", + "net = Network(\n", + " directed=False, neighborhood_highlight=True, bgcolor=\"white\", font_color=\"black\"\n", + ")\n", "\n", "# pass networkx graph to pyvis\n", "net.from_nx(graph)\n", "\n", - "# set edge options \n", + "# set edge options\n", "net.inherit_edge_colors(False)\n", - "net.set_edge_smooth('dynamic')\n", + "net.set_edge_smooth(\"dynamic\")\n", "\n", "# chose size format\n", - "size_type = 'edges' # select 'edges' or 'eigen'\n", + "size_type = \"edges\" # select 'edges' or 'eigen'\n", "\n", "adj_list = net.get_adj_list()\n", "\n", - "if size_type == 'eigen':\n", + "if size_type == \"eigen\":\n", " eigenvector = nx.eigenvector_centrality(graph)\n", "\n", "# calculate and update size of the nodes depending on their number of edges\n", "for node_id, neighbors in adj_list.items():\n", - " if size_type == 'edges':\n", - " size = len(neighbors)*5\n", - " if size_type == 'eigen':\n", - " size = eigenvector[node_id]*200\n", - " next((node.update({'size': size}) for node in net.nodes if node['id'] == node_id), None)\n", + " if size_type == \"edges\":\n", + " size = len(neighbors) * 5\n", + " if size_type == \"eigen\":\n", + " size = eigenvector[node_id] * 200\n", + " next(\n", + " (node.update({\"size\": size}) for node in net.nodes if node[\"id\"] == node_id),\n", + " None,\n", + " )\n", "\n", "# set the node distance and spring lenght using repulsion\n", "net.repulsion(node_distance=250, spring_length=150)\n", "\n", "# activate physics buttons to further explore the available solvers:\n", "# barnesHut, forceAtlas2Based, repulsion, hierarchicalRepulsion\n", - "net.show_buttons(filter_=['physics'])\n", + "net.show_buttons(filter_=[\"physics\"])\n", "\n", "# save graph as HTML\n", - "net.save_graph('networkx_pyvis.html')\n" + "net.save_graph(\"networkx_pyvis.html\")" ] }, { diff --git a/documentations/seminararbeiten/Verflechtungsanalyse/mockup_verflechtungsanalyse_with_networkx.ipynb b/documentations/seminararbeiten/Verflechtungsanalyse/mockup_verflechtungsanalyse_with_networkx.ipynb index 7d07996..fd53fa8 100644 --- a/documentations/seminararbeiten/Verflechtungsanalyse/mockup_verflechtungsanalyse_with_networkx.ipynb +++ b/documentations/seminararbeiten/Verflechtungsanalyse/mockup_verflechtungsanalyse_with_networkx.ipynb @@ -169,19 +169,25 @@ "import pandas as pd\n", "\n", "# create dataframe based on the sample data\n", - "df_nodes = pd.read_csv('companies.csv', sep = ';')\n", + "df_nodes = pd.read_csv(\"companies.csv\", sep=\";\")\n", "\n", "# define shape based on the type\n", - "node_shape = {'Company': 'dot', 'Person': 'triangle'}\n", - "df_nodes['shape'] = df_nodes['type'].map(node_shape)\n", + "node_shape = {\"Company\": \"dot\", \"Person\": \"triangle\"}\n", + "df_nodes[\"shape\"] = df_nodes[\"type\"].map(node_shape)\n", "\n", "# define color based on branche\n", - "node_color = {'Automobilhersteller': ' #729b79ff', 'Automobilzulieferer': '#475b63ff', 'Branche 3': '#f3e8eeff', 'Branche 4': '#bacdb0ff', 'Branche 5': '#2e2c2fff'}\n", - "df_nodes['color'] = df_nodes['branche'].map(node_color)\n", + "node_color = {\n", + " \"Automobilhersteller\": \" #729b79ff\",\n", + " \"Automobilzulieferer\": \"#475b63ff\",\n", + " \"Branche 3\": \"#f3e8eeff\",\n", + " \"Branche 4\": \"#bacdb0ff\",\n", + " \"Branche 5\": \"#2e2c2fff\",\n", + "}\n", + "df_nodes[\"color\"] = df_nodes[\"branche\"].map(node_color)\n", "\n", "# add information column that can be used for the mouse over in the graph\n", - "df_nodes = df_nodes.fillna('')\n", - "df_nodes['title'] = df_nodes['label'] + '\\n' + df_nodes['branche']\n", + "df_nodes = df_nodes.fillna(\"\")\n", + "df_nodes[\"title\"] = df_nodes[\"label\"] + \"\\n\" + df_nodes[\"branche\"]\n", "\n", "# show first five entries of the dataframe\n", "print(df_nodes.head())" @@ -215,7 +221,7 @@ ], "source": [ "# create dataframe based on the sample data\n", - "df_edges = pd.read_csv('relations.csv', sep = ';')\n", + "df_edges = pd.read_csv(\"relations.csv\", sep=\";\")\n", "\n", "# show first five entries of the dataframe\n", "print(df_edges.head())" @@ -263,23 +269,25 @@ "import networkx as nx\n", "import matplotlib.pyplot as plt\n", "\n", - "fig = plt.figure(figsize=(12,12))\n", + "fig = plt.figure(figsize=(12, 12))\n", "ax = plt.subplot(111)\n", - "ax.set_title('Graph - Shapes', fontsize=10)\n", + "ax.set_title(\"Graph - Shapes\", fontsize=10)\n", "\n", "# initiate graph\n", "graph = nx.MultiGraph()\n", "\n", "# create edges from dataframe\n", - "graph = nx.from_pandas_edgelist(df_edges, source = 'from', target = 'to', edge_attr= 'label')\n", + "graph = nx.from_pandas_edgelist(df_edges, source=\"from\", target=\"to\", edge_attr=\"label\")\n", "\n", "# update node attributes from dataframe\n", - "nodes_attr = df_nodes.set_index('id').to_dict(orient = 'index')\n", + "nodes_attr = df_nodes.set_index(\"id\").to_dict(orient=\"index\")\n", "nx.set_node_attributes(graph, nodes_attr)\n", "\n", "\n", "pos = nx.spring_layout(graph)\n", - "nx.draw(graph, pos, node_size=1500, node_color='yellow', font_size=8, font_weight='bold')\n", + "nx.draw(\n", + " graph, pos, node_size=1500, node_color=\"yellow\", font_size=8, font_weight=\"bold\"\n", + ")\n", "\n", "plt.tight_layout()\n", "plt.show()\n", @@ -307,16 +315,19 @@ "# visualize using pyvis\n", "from pyvis.network import Network\n", "\n", + "\n", "def create_centrality_graph(df, measure_type, save_path):\n", " # initiate network\n", - " net = Network(directed=False, neighborhood_highlight=True, bgcolor = \"white\", font_color=\"black\")\n", + " net = Network(\n", + " directed=False, neighborhood_highlight=True, bgcolor=\"white\", font_color=\"black\"\n", + " )\n", "\n", " # pass networkx graph to pyvis\n", " net.from_nx(graph)\n", "\n", - " # set edge options \n", + " # set edge options\n", " net.inherit_edge_colors(False)\n", - " net.set_edge_smooth('dynamic')\n", + " net.set_edge_smooth(\"dynamic\")\n", "\n", " adj_list = net.get_adj_list()\n", "\n", @@ -341,28 +352,33 @@ " measure_vector = nx.average_degree_connectivity(graph)\n", " # df[\"average_degree\"] = measure_vector.values()\n", " print(measure_vector.values())\n", - " \n", "\n", " # calculate and update size of the nodes depending on their number of edges\n", " for node_id, neighbors in adj_list.items():\n", - " \n", - " # df[\"edges\"] = measure_vector.values()\n", - " \n", + " # df[\"edges\"] = measure_vector.values()\n", + "\n", " if measure_type == \"edges\":\n", - " size = 10 #len(neighbors)*5 \n", + " size = 10 # len(neighbors)*5\n", " else:\n", - " size = measure_vector[node_id]*50 \n", - " next((node.update({'size': size}) for node in net.nodes if node['id'] == node_id), None)\n", + " size = measure_vector[node_id] * 50\n", + " next(\n", + " (\n", + " node.update({\"size\": size})\n", + " for node in net.nodes\n", + " if node[\"id\"] == node_id\n", + " ),\n", + " None,\n", + " )\n", "\n", " # set the node distance and spring lenght using repulsion\n", " net.repulsion(node_distance=150, spring_length=50)\n", "\n", " # activate physics buttons to further explore the available solvers:\n", " # barnesHut, forceAtlas2Based, repulsion, hierarchicalRepulsion\n", - " net.show_buttons(filter_=['physics'])\n", + " net.show_buttons(filter_=[\"physics\"])\n", "\n", " # save graph as HTML\n", - " net.save_graph(save_path)\n" + " net.save_graph(save_path)" ] }, { @@ -593,33 +609,36 @@ } ], "source": [ - "net = Network(directed=False, neighborhood_highlight=True, bgcolor = \"white\", font_color=\"black\")\n", + "net = Network(\n", + " directed=False, neighborhood_highlight=True, bgcolor=\"white\", font_color=\"black\"\n", + ")\n", "\n", "# pass networkx graph to pyvis\n", "net.from_nx(s)\n", "\n", - "# set edge options \n", + "# set edge options\n", "net.inherit_edge_colors(False)\n", - "net.set_edge_smooth('dynamic')\n", + "net.set_edge_smooth(\"dynamic\")\n", "\n", "adj_list = net.get_adj_list()\n", "\n", "# calculate and update size of the nodes depending on their number of edges\n", "for node_id, neighbors in adj_list.items():\n", - " \n", - " # df[\"edges\"] = measure_vector.values()\n", - " \n", - " \n", - " size = 10 #len(neighbors)*5 \n", - " \n", - " next((node.update({'size': size}) for node in net.nodes if node['id'] == node_id), None)\n", + " # df[\"edges\"] = measure_vector.values()\n", + "\n", + " size = 10 # len(neighbors)*5\n", + "\n", + " next(\n", + " (node.update({\"size\": size}) for node in net.nodes if node[\"id\"] == node_id),\n", + " None,\n", + " )\n", "\n", "# set the node distance and spring lenght using repulsion\n", "net.repulsion(node_distance=150, spring_length=50)\n", "\n", "# activate physics buttons to further explore the available solvers:\n", "# barnesHut, forceAtlas2Based, repulsion, hierarchicalRepulsion\n", - "net.show_buttons(filter_=['physics'])\n", + "net.show_buttons(filter_=[\"physics\"])\n", "\n", "# save graph as HTML\n", "net.save_graph(\"./metrics/connected_components_networkx.html\")"