{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "f18d8932", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pyarrow in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from -r requirements.txt (line 1)) (20.0.0)\n", "Requirement already satisfied: pandas in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from -r requirements.txt (line 2)) (2.3.1)\n", "Requirement already satisfied: scikit-learn in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from -r requirements.txt (line 3)) (1.7.0)\n", "Requirement already satisfied: mlflow in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from -r requirements.txt (line 4)) (3.1.1)\n", "Requirement already satisfied: boto3 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from -r requirements.txt (line 5)) (1.39.3)\n", "Requirement already satisfied: python-dotenv in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from -r requirements.txt (line 6)) (1.1.1)\n", "Requirement already satisfied: numpy>=1.26.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from pandas->-r requirements.txt (line 2)) (2.3.1)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from pandas->-r requirements.txt (line 2)) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from pandas->-r requirements.txt (line 2)) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from pandas->-r requirements.txt (line 2)) (2025.2)\n", "Requirement already satisfied: scipy>=1.8.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from scikit-learn->-r requirements.txt (line 3)) (1.16.0)\n", "Requirement already satisfied: joblib>=1.2.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from scikit-learn->-r requirements.txt (line 3)) (1.5.1)\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from scikit-learn->-r requirements.txt (line 3)) (3.6.0)\n", "Requirement already satisfied: mlflow-skinny==3.1.1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow->-r requirements.txt (line 4)) (3.1.1)\n", "Requirement already satisfied: Flask<4 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow->-r requirements.txt (line 4)) (3.1.1)\n", "Requirement already satisfied: alembic!=1.10.0,<2 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow->-r requirements.txt (line 4)) (1.16.3)\n", "Requirement already satisfied: docker<8,>=4.0.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow->-r requirements.txt (line 4)) (7.1.0)\n", "Requirement already satisfied: graphene<4 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow->-r requirements.txt (line 4)) (3.4.3)\n", "Requirement already satisfied: gunicorn<24 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow->-r requirements.txt (line 4)) (23.0.0)\n", "Requirement already satisfied: matplotlib<4 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow->-r requirements.txt (line 4)) (3.10.3)\n", "Requirement already satisfied: sqlalchemy<3,>=1.4.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow->-r requirements.txt (line 4)) (2.0.41)\n", "Requirement already satisfied: cachetools<7,>=5.0.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (5.5.2)\n", "Requirement already satisfied: click<9,>=7.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (8.2.1)\n", "Requirement already satisfied: cloudpickle<4 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (3.1.1)\n", "Requirement already satisfied: databricks-sdk<1,>=0.20.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (0.58.0)\n", "Requirement already satisfied: fastapi<1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (0.116.0)\n", "Requirement already satisfied: gitpython<4,>=3.1.9 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (3.1.44)\n", "Requirement already satisfied: importlib_metadata!=4.7.0,<9,>=3.7.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (8.7.0)\n", "Requirement already satisfied: opentelemetry-api<3,>=1.9.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (1.34.1)\n", "Requirement already satisfied: opentelemetry-sdk<3,>=1.9.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (1.34.1)\n", "Requirement already satisfied: packaging<26 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (25.0)\n", "Requirement already satisfied: protobuf<7,>=3.12.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (6.31.1)\n", "Requirement already satisfied: pydantic<3,>=1.10.8 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (2.11.7)\n", "Requirement already satisfied: pyyaml<7,>=5.1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (6.0.2)\n", "Requirement already satisfied: requests<3,>=2.17.3 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (2.32.4)\n", "Requirement already satisfied: sqlparse<1,>=0.4.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (0.5.3)\n", "Requirement already satisfied: typing-extensions<5,>=4.0.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (4.14.1)\n", "Requirement already satisfied: uvicorn<1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (0.35.0)\n", "Requirement already satisfied: Mako in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from alembic!=1.10.0,<2->mlflow->-r requirements.txt (line 4)) (1.3.10)\n", "Requirement already satisfied: google-auth~=2.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from databricks-sdk<1,>=0.20.0->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (2.40.3)\n", "Requirement already satisfied: urllib3>=1.26.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from docker<8,>=4.0.0->mlflow->-r requirements.txt (line 4)) (2.5.0)\n", "Requirement already satisfied: starlette<0.47.0,>=0.40.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from fastapi<1->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (0.46.2)\n", "Requirement already satisfied: blinker>=1.9.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from Flask<4->mlflow->-r requirements.txt (line 4)) (1.9.0)\n", "Requirement already satisfied: itsdangerous>=2.2.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from Flask<4->mlflow->-r requirements.txt (line 4)) (2.2.0)\n", "Requirement already satisfied: jinja2>=3.1.2 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from Flask<4->mlflow->-r requirements.txt (line 4)) (3.1.6)\n", "Requirement already satisfied: markupsafe>=2.1.1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from Flask<4->mlflow->-r requirements.txt (line 4)) (3.0.2)\n", "Requirement already satisfied: werkzeug>=3.1.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from Flask<4->mlflow->-r requirements.txt (line 4)) (3.1.3)\n", "Requirement already satisfied: gitdb<5,>=4.0.1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from gitpython<4,>=3.1.9->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (4.0.12)\n", "Requirement already satisfied: smmap<6,>=3.0.1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from gitdb<5,>=4.0.1->gitpython<4,>=3.1.9->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (5.0.2)\n", "Requirement already satisfied: pyasn1-modules>=0.2.1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from google-auth~=2.0->databricks-sdk<1,>=0.20.0->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (0.4.2)\n", "Requirement already satisfied: rsa<5,>=3.1.4 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from google-auth~=2.0->databricks-sdk<1,>=0.20.0->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (4.9.1)\n", "Requirement already satisfied: graphql-core<3.3,>=3.1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from graphene<4->mlflow->-r requirements.txt (line 4)) (3.2.6)\n", "Requirement already satisfied: graphql-relay<3.3,>=3.1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from graphene<4->mlflow->-r requirements.txt (line 4)) (3.2.0)\n", "Requirement already satisfied: zipp>=3.20 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from importlib_metadata!=4.7.0,<9,>=3.7.0->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (3.23.0)\n", "Requirement already satisfied: contourpy>=1.0.1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from matplotlib<4->mlflow->-r requirements.txt (line 4)) (1.3.2)\n", "Requirement already satisfied: cycler>=0.10 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from matplotlib<4->mlflow->-r requirements.txt (line 4)) (0.12.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from matplotlib<4->mlflow->-r requirements.txt (line 4)) (4.58.5)\n", "Requirement already satisfied: kiwisolver>=1.3.1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from matplotlib<4->mlflow->-r requirements.txt (line 4)) (1.4.8)\n", "Requirement already satisfied: pillow>=8 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from matplotlib<4->mlflow->-r requirements.txt (line 4)) (11.3.0)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from matplotlib<4->mlflow->-r requirements.txt (line 4)) (3.2.3)\n", "Requirement already satisfied: opentelemetry-semantic-conventions==0.55b1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from opentelemetry-sdk<3,>=1.9.0->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (0.55b1)\n", "Requirement already satisfied: annotated-types>=0.6.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from pydantic<3,>=1.10.8->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.33.2 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from pydantic<3,>=1.10.8->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (2.33.2)\n", "Requirement already satisfied: typing-inspection>=0.4.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from pydantic<3,>=1.10.8->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (0.4.1)\n", "Requirement already satisfied: six>=1.5 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->-r requirements.txt (line 2)) (1.17.0)\n", "Requirement already satisfied: charset_normalizer<4,>=2 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from requests<3,>=2.17.3->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (3.4.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from requests<3,>=2.17.3->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (3.10)\n", "Requirement already satisfied: certifi>=2017.4.17 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from requests<3,>=2.17.3->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (2025.7.9)\n", "Requirement already satisfied: pyasn1>=0.1.3 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from rsa<5,>=3.1.4->google-auth~=2.0->databricks-sdk<1,>=0.20.0->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (0.6.1)\n", "Requirement already satisfied: anyio<5,>=3.6.2 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from starlette<0.47.0,>=0.40.0->fastapi<1->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (4.9.0)\n", "Requirement already satisfied: sniffio>=1.1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from anyio<5,>=3.6.2->starlette<0.47.0,>=0.40.0->fastapi<1->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (1.3.1)\n", "Requirement already satisfied: h11>=0.8 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from uvicorn<1->mlflow-skinny==3.1.1->mlflow->-r requirements.txt (line 4)) (0.16.0)\n", "Requirement already satisfied: botocore<1.40.0,>=1.39.3 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from boto3->-r requirements.txt (line 5)) (1.39.3)\n", "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from boto3->-r requirements.txt (line 5)) (1.0.1)\n", "Requirement already satisfied: s3transfer<0.14.0,>=0.13.0 in /Users/martinper/CodeProjects/jupyter_main_venv/lib/python3.12/site-packages (from boto3->-r requirements.txt (line 5)) (0.13.0)\n" ] } ], "source": [ "# run on python 3.12.11\n", "\n", "!pip install -r requirements.txt\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "3db9777a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Timestamp1572_Bd_Magenta_Débit horaire1572_Bd_Magenta_Taux d'occupation1572_Bd_Magenta_Etat trafic1572_Bd_Magenta_Identifiant noeud amont1572_Bd_Magenta_Libelle noeud amont1572_Bd_Magenta_Identifiant noeud aval1572_Bd_Magenta_Libelle noeud aval1572_Bd_Magenta_Etat arc1572_Bd_Magenta_Date debut dispo data...PA18:O3AUT:PM10BASCH:PM10ELYS:PM10PA01H:PM10PA18:PM10AUT:PM25PA18:PM25PA01H:PM25ELYS:PM25
02024-06-01 03:00:00+00:00NaNNaNInconnu832Magenta-Guy_Patin-Ambroise_Pare834Magenta-Barbes-Chapelle-RochechInvalide1996-10-03...65.6NoneNoneNone10.815.4None3.96.8None
12024-06-01 04:00:00+00:00NaNNaNInconnu832Magenta-Guy_Patin-Ambroise_Pare834Magenta-Barbes-Chapelle-RochechInvalide1996-10-03...64.3NoneNoneNone9.28.4None2.65.9None
22024-06-01 05:00:00+00:00NaNNaNInconnu832Magenta-Guy_Patin-Ambroise_Pare834Magenta-Barbes-Chapelle-RochechInvalide1996-10-03...61.5NoneNoneNone6.96.8None1.24.4None
32024-06-01 06:00:00+00:00NaNNaNInconnu832Magenta-Guy_Patin-Ambroise_Pare834Magenta-Barbes-Chapelle-RochechInvalide1996-10-03...61.2NoneNoneNone5.96.7None-0.13.6None
42024-06-01 07:00:00+00:00NaNNaNInconnu832Magenta-Guy_Patin-Ambroise_Pare834Magenta-Barbes-Chapelle-RochechInvalide1996-10-03...60.0NoneNoneNone6.56.6None1.73.7None
\n", "

5 rows × 595 columns

\n", "
" ], "text/plain": [ " Timestamp 1572_Bd_Magenta_Débit horaire \\\n", "0 2024-06-01 03:00:00+00:00 NaN \n", "1 2024-06-01 04:00:00+00:00 NaN \n", "2 2024-06-01 05:00:00+00:00 NaN \n", "3 2024-06-01 06:00:00+00:00 NaN \n", "4 2024-06-01 07:00:00+00:00 NaN \n", "\n", " 1572_Bd_Magenta_Taux d'occupation 1572_Bd_Magenta_Etat trafic \\\n", "0 NaN Inconnu \n", "1 NaN Inconnu \n", "2 NaN Inconnu \n", "3 NaN Inconnu \n", "4 NaN Inconnu \n", "\n", " 1572_Bd_Magenta_Identifiant noeud amont \\\n", "0 832 \n", "1 832 \n", "2 832 \n", "3 832 \n", "4 832 \n", "\n", " 1572_Bd_Magenta_Libelle noeud amont 1572_Bd_Magenta_Identifiant noeud aval \\\n", "0 Magenta-Guy_Patin-Ambroise_Pare 834 \n", "1 Magenta-Guy_Patin-Ambroise_Pare 834 \n", "2 Magenta-Guy_Patin-Ambroise_Pare 834 \n", "3 Magenta-Guy_Patin-Ambroise_Pare 834 \n", "4 Magenta-Guy_Patin-Ambroise_Pare 834 \n", "\n", " 1572_Bd_Magenta_Libelle noeud aval 1572_Bd_Magenta_Etat arc \\\n", "0 Magenta-Barbes-Chapelle-Rochech Invalide \n", "1 Magenta-Barbes-Chapelle-Rochech Invalide \n", "2 Magenta-Barbes-Chapelle-Rochech Invalide \n", "3 Magenta-Barbes-Chapelle-Rochech Invalide \n", "4 Magenta-Barbes-Chapelle-Rochech Invalide \n", "\n", " 1572_Bd_Magenta_Date debut dispo data ... PA18:O3 AUT:PM10 BASCH:PM10 \\\n", "0 1996-10-03 ... 65.6 None None \n", "1 1996-10-03 ... 64.3 None None \n", "2 1996-10-03 ... 61.5 None None \n", "3 1996-10-03 ... 61.2 None None \n", "4 1996-10-03 ... 60.0 None None \n", "\n", " ELYS:PM10 PA01H:PM10 PA18:PM10 AUT:PM25 PA18:PM25 PA01H:PM25 ELYS:PM25 \n", "0 None 10.8 15.4 None 3.9 6.8 None \n", "1 None 9.2 8.4 None 2.6 5.9 None \n", "2 None 6.9 6.8 None 1.2 4.4 None \n", "3 None 5.9 6.7 None -0.1 3.6 None \n", "4 None 6.5 6.6 None 1.7 3.7 None \n", "\n", "[5 rows x 595 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "# df_save = pd.read_parquet(\"/Users/martinper/Downloads/meteo_cleaned_pivoted.parquet\")\n", "df_save = pd.read_parquet(\"../../data/2024_semester2_merged_v2.parquet\")\n", "df_save.head()" ] }, { "cell_type": "code", "execution_count": 3, "id": "1f242a07", "metadata": {}, "outputs": [], "source": [ "\n", "df = df_save.copy()\n", "pd.set_option(\"display.max_columns\", None)\n", "df = df.apply(lambda x: x.replace(\"Inconnu\", None))\n", "\n", "# traffic_status dictionary\n", "traffic_status = {\n", " None: None,\n", " \"Fluide\": 0., # freeflow in realtime api\n", " \"Pré-saturé\": 1., # heavy in realtime api\n", " \"Saturé\": 1., # heavy in realtime api\n", " \"Bloqué\": 2. # congested in realtime api\n", "}\n", "\n", "\n", "# replace values in columns ending with 'Etat trafic'\n", "for col in df.columns:\n", " if col.endswith(\"Etat trafic\"):\n", " df[col] = df[col].map(traffic_status)\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "904d1a22", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PressureTemperatureWind SpeedHumidityTraffic StatusNOXPM10PM25O3
count4104.0000004104.0000004104.0000004104.0000004099.0000004104.0000004088.0000004104.0000004104.000000
mean1008.65229013.3411843.69336877.7624270.14218724.24837016.1009938.94564448.526613
std7.9372724.6946291.89171115.5390760.17254018.0061287.9904574.96656325.497348
min973.0000001.5197840.16666730.6666670.0000003.1000001.600000-0.633333-0.360000
25%1003.7000009.9299702.23333367.3333330.00000012.45500010.5000005.47500030.215000
50%1008.20000013.3342543.43333381.8333330.00000019.08333314.5000007.80000048.600000
75%1013.80000016.6005524.90000090.3333330.28571430.38500020.00000011.18125063.585000
max1028.60000027.65165011.30000098.8333331.000000174.20000063.50000041.200000182.820000
\n", "
" ], "text/plain": [ " Pressure Temperature Wind Speed Humidity Traffic Status \\\n", "count 4104.000000 4104.000000 4104.000000 4104.000000 4099.000000 \n", "mean 1008.652290 13.341184 3.693368 77.762427 0.142187 \n", "std 7.937272 4.694629 1.891711 15.539076 0.172540 \n", "min 973.000000 1.519784 0.166667 30.666667 0.000000 \n", "25% 1003.700000 9.929970 2.233333 67.333333 0.000000 \n", "50% 1008.200000 13.334254 3.433333 81.833333 0.000000 \n", "75% 1013.800000 16.600552 4.900000 90.333333 0.285714 \n", "max 1028.600000 27.651650 11.300000 98.833333 1.000000 \n", "\n", " NOX PM10 PM25 O3 \n", "count 4104.000000 4088.000000 4104.000000 4104.000000 \n", "mean 24.248370 16.100993 8.945644 48.526613 \n", "std 18.006128 7.990457 4.966563 25.497348 \n", "min 3.100000 1.600000 -0.633333 -0.360000 \n", "25% 12.455000 10.500000 5.475000 30.215000 \n", "50% 19.083333 14.500000 7.800000 48.600000 \n", "75% 30.385000 20.000000 11.181250 63.585000 \n", "max 174.200000 63.500000 41.200000 182.820000 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# five columns are needed\n", "# pressure, temperature, wind speed, humidity, traffic status\n", "\n", "# # function where all columns finishing with a suffix are averaged, removing NaN and None values\n", "# def average_columns_with_suffix(df, suffix):\n", "# return df.filter(regex=f\".*{suffix}\").mean(axis=1)\n", "\n", "def average_columns_with_suffix(df, suffix):\n", " # Filter columns matching the suffix\n", " cols = df.filter(regex=f\".*{suffix}\")\n", " \n", " # Convert all values to numeric, setting errors='coerce' to convert non-numeric to NaN\n", " cols = cols.apply(pd.to_numeric, errors='coerce')\n", " \n", " # Return row-wise mean\n", " return cols.mean(axis=1)\n", "\n", "final_df = pd.DataFrame()\n", "\n", "\n", "dict_of_columns = {\n", " \"Timestamp\": df[\"Timestamp\"].copy(),\n", " \"Pressure\": average_columns_with_suffix(df, \"_PSTAT\"),\n", " \"Temperature\": average_columns_with_suffix(df, \"_T\"),\n", " \"Wind Speed\": average_columns_with_suffix(df, \"_FF\"),\n", " \"Humidity\": average_columns_with_suffix(df, \"_U\"),\n", " \"Traffic Status\": average_columns_with_suffix(df, \"_Etat trafic\"),\n", " \"NOX\": average_columns_with_suffix(df, \"NOX\"),\n", " \"PM10\": average_columns_with_suffix(df, \"PM10\"),\n", " \"PM25\": average_columns_with_suffix(df, \"PM25\"),\n", " \"O3\": average_columns_with_suffix(df, \"O3\"),\n", "}\n", "\n", "final_df = pd.concat(dict_of_columns, axis=1)\n", "final_df.drop(columns=[\"Timestamp\"], inplace=True)\n", "final_df.describe()\n" ] }, { "cell_type": "code", "execution_count": 5, "id": "52584806", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of rows after dropna: 4083 / 4104\n" ] } ], "source": [ "num_rows_before_dropna = len(final_df)\n", "final_df.dropna(inplace=True)\n", "print(f\"Number of rows after dropna: {len(final_df)} / {num_rows_before_dropna}\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "11b34d8c", "metadata": {}, "outputs": [], "source": [ "# df = df_save.head(1000).copy()\n", "# # delete irrelevant columns (with names)\n", "# df.drop(inplace=True, columns=[\"Identifiant arc\", \"Libelle\", \"Identifiant noeud amont\", \"Libelle noeud amont\", \"Identifiant noeud aval\", \"Libelle noeud aval\"])\n", "\n", "# # delete other irrelevant columns\n", "# df.drop(inplace=True, columns=[\"Etat arc\", \"Date debut dispo data\", \"Date fin dispo data\", \"geo_shape\"])\n", "\n", "# # delete other irrelevant columns\n", "# df.drop(inplace=True, columns=[\"Timestamp\", \"NUM_POSTE\", \"NOM_USUEL\"])\n", "\n", "# # one hot encode Etat trafic\n", "# df = pd.get_dummies(df, columns=[\"Etat trafic\"])\n", "\n", "# # split geo_point_2d into lat and lon in one operation\n", "# df[[\"latitude\", \"longitude\"]] = df[\"geo_point_2d\"].str.split(\",\", expand=True).astype(float)\n", "# df.drop(columns=[\"geo_point_2d\"], inplace=True)\n", "\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "c9d826a6", "metadata": {}, "outputs": [], "source": [ "\n", "#train a linear regression model\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.model_selection import train_test_split\n", "\n", "\n", "model = LinearRegression()\n", "\n", "\n", "x_columns = [\"Pressure\", \"Temperature\", \"Wind Speed\", \"Humidity\", \"Traffic Status\"]\n", "y_columns = [\"NOX\", \"PM10\", \"PM25\", \"O3\"]\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "9c267bf0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3266, 5)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# X = final_df[x_columns].head(100).copy()\n", "# y = final_df[y_columns].head(100).copy()\n", "X = final_df[x_columns].copy()\n", "y = final_df[y_columns].copy()\n", "\n", "# ensure that the test set is from the end of the dataframe so there's no overlap between train and test timestamps\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)\n", "X_train.shape\n" ] }, { "cell_type": "code", "execution_count": 9, "id": "4a299e80", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025/07/10 18:18:46 WARNING mlflow.utils.autologging_utils: Encountered unexpected error during sklearn autologging: Failed to upload /var/folders/k1/mflcykd117v59sp52pvncmmr0000gn/T/tmpub8bgpj5/model/python_env.yaml to jedhaparis/artifacts/3/models/m-e57f6e62b1604f50a6b04575ecb22c52/artifacts/python_env.yaml: An error occurred (AccessDenied) when calling the PutObject operation: Access Denied\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Model saved to S3 as random_forest_grid_search_2025_07_10_18_18_46.pkl\n", "\n", "Test Score: -0.0387\n", "🏃 View run intrigued-gnu-284 at: https://martper56-mlflow-server.hf.space/#/experiments/3/runs/570f3172b9754037a76cac7f600d4de2\n", "🧪 View experiment at: https://martper56-mlflow-server.hf.space/#/experiments/3\n", "[[13.26950833 20.63625 10.54925 86.249425 ]]\n", "\n", "Model Coefficients: {'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 200, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}\n" ] } ], "source": [ "\n", "# Import MLflow\n", "import datetime\n", "import mlflow\n", "import mlflow.sklearn\n", "import pickle\n", "import boto3\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.model_selection import GridSearchCV\n", "import os\n", "from dotenv import load_dotenv\n", "\n", "\n", "load_dotenv()\n", "\n", "# AWS S3 session\n", "session = boto3.Session(\n", " aws_access_key_id=os.getenv(\"AWS_ACCESS_KEY_ID\"),\n", " aws_secret_access_key=os.getenv(\"AWS_SECRET_ACCESS_KEY\"),\n", " region_name=os.getenv(\"AWS_REGION\")\n", ")\n", "s3 = session.client('s3')\n", "\n", "# Configure MLflow to use your Hugging Face Space tracking server\n", "# mlflow.set_tracking_uri(\"https://martper56-air-quality-space.hf.space\")\n", "mlflow.set_tracking_uri(os.getenv(\"MLFLOW_TRACKING_URI\"))\n", "mlflow.set_experiment(\"air_quality_prediction\")\n", "\n", "\n", "# Enable autologging\n", "mlflow.sklearn.autolog()\n", "\n", "with mlflow.start_run():\n", " # Grid search\n", " # Define hyperparameter grid\n", " param_grid = {\n", " \"n_estimators\": [5, 10, 20, 50, 100, 200, 300],\n", " }\n", "\n", " base_model = RandomForestRegressor(random_state=42)\n", "\n", " # Perform grid search\n", " grid_search = GridSearchCV(\n", " estimator=base_model,\n", " param_grid=param_grid,\n", " cv=3,\n", " n_jobs=-1,\n", " scoring=\"r2\"\n", " )\n", "\n", " model_base_name = \"random_forest_grid_search\"\n", "\n", " grid_search.fit(X_train, y_train)\n", "\n", " # Best model from grid search\n", " model = grid_search.best_estimator_\n", "\n", "\n", " # Linear Regression\n", " # model = LinearRegression()\n", " # model_base_name = \"linear_model\"\n", " # model.fit(X_train, y_train)\n", "\n", " # Random Forest\n", " # model = RandomForestRegressor(n_estimators=300, random_state=42)\n", " # model_base_name = \"random_forest_model\"\n", " # model.fit(X_train, y_train)\n", "\n", " model_filename = model_base_name + \".pkl\"\n", " model_filename_for_s3 = model_base_name + \"_\" + datetime.datetime.now().strftime(\"%Y_%m_%d_%H_%M_%S\") + \".pkl\"\n", "\n", " # save the model to a pickle file locally\n", " with open(model_filename, \"wb\") as f:\n", " pickle.dump(model, f)\n", " # # mlflow.log_artifact(model_filename)\n", " # print(f\"Model saved to {model_filename} as artifact\")\n", "\n", " # upload the model to s3\n", " s3.upload_file(model_filename, \"jedha-quality-air\", f\"models/{model_filename_for_s3}\")\n", " print(f\"Model saved to S3 as {model_filename_for_s3}\")\n", "\n", " score = model.score(X_test, y_test)\n", " print(f\"\\nTest Score: {score:.4f}\")\n", "\n", "# test the model on simple values\n", "random_values = {\n", " \"Pressure\": 999,\n", " \"Temperature\": 22,\n", " \"Wind Speed\": 10,\n", " \"Humidity\": 50,\n", " \"Traffic Status\": 0,\n", "}\n", "\n", "print(model.predict(pd.DataFrame([random_values])))\n", "print(\"\\nModel Coefficients: \", model.get_params())\n" ] }, { "cell_type": "code", "execution_count": null, "id": "dcad316b", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "jupyter_main_venv", "language": "python", "name": "jupyter_main_venv" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.11" } }, "nbformat": 4, "nbformat_minor": 5 }