{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "3ebfe4e7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✓ Libraries imported successfully\n" ] } ], "source": [ "# Import required libraries\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from datalake.config import DataLakeConfig\n", "from datalake.athena import AthenaQuery\n", "from datalake.catalog import DataLakeCatalog\n", "from datalake.query import DataLakeQuery\n", "from datalake.batch import BatchProcessor\n", "\n", "# Set up plotting\n", "%matplotlib inline\n", "plt.style.use('seaborn-v0_8')\n", "sns.set_palette(\"husl\")\n", "\n", "print(\"✓ Libraries imported successfully\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "f03eaae2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "✓ Configuration loaded\n", " Database: dbparquetdatalake05\n", " Workgroup: athenaworkgroup-datalake05\n", " Region: eu-north-1\n" ] } ], "source": [ "# Configure connection with your credentials\n", "config = DataLakeConfig.from_credentials(\n", " database_name=\"dbparquetdatalake05\",\n", " workgroup=\"athenaworkgroup-datalake05\",\n", " s3_output_location=\"s3://canedge-raw-data-parquet/athena-results/\",\n", " region=\"eu-north-1\",\n", " access_key_id=\"AKIARJQJFFVASPMSGNNY\",\n", " secret_access_key=\"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\",\n", ")\n", "\n", "print(f\"✓ Configuration loaded\")\n", "print(f\" Database: {config.database_name}\")\n", "print(f\" Workgroup: {config.workgroup}\")\n", "print(f\" Region: {config.region}\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "9e8ceaf6", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2026-01-26 23:23:13,728 - datalake.athena - INFO - Initialized Athena client for database: dbparquetdatalake05\n", "2026-01-26 23:23:13,729 - datalake.catalog - INFO - Initialized catalog for database: dbparquetdatalake05\n", "2026-01-26 23:23:13,729 - datalake.query - INFO - Initialized DataLakeQuery\n", "2026-01-26 23:23:13,730 - datalake.batch - INFO - Initialized BatchProcessor\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "✓ Athena client and catalog initialized\n" ] } ], "source": [ "# Initialize Athena and catalog\n", "athena = AthenaQuery(config)\n", "catalog = DataLakeCatalog(athena, config)\n", "query = DataLakeQuery(athena, catalog)\n", "processor = BatchProcessor(query)\n", "\n", "print(\"✓ Athena client and catalog initialized\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "0e3d813f", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2026-01-26 23:23:14,057 - datalake.athena - INFO - Query started with execution ID: beffdb49-e31a-48bf-8dbf-8c06ae7960cc\n", "2026-01-26 23:23:15,190 - datalake.athena - INFO - Query beffdb49-e31a-48bf-8dbf-8c06ae7960cc completed successfully\n", "2026-01-26 23:23:15,490 - datalake.athena - INFO - Retrieved 77 rows from S3 for query beffdb49-e31a-48bf-8dbf-8c06ae7960cc\n" ] } ], "source": [ "test_query = f\"SHOW TABLES IN {config.database_name}\"\n", "df_tables = athena.query_to_dataframe(test_query, timeout=60)" ] }, { "cell_type": "code", "execution_count": 6, "id": "fca55b3b", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2026-01-26 23:23:15,601 - datalake.athena - INFO - Query started with execution ID: bd81d8c7-2371-431b-a6ed-0208bc4b4f1c\n", "2026-01-26 23:23:16,798 - datalake.athena - INFO - Query bd81d8c7-2371-431b-a6ed-0208bc4b4f1c completed successfully\n", "2026-01-26 23:23:16,920 - datalake.athena - INFO - Retrieved 78 rows from S3 for query bd81d8c7-2371-431b-a6ed-0208bc4b4f1c\n", "2026-01-26 23:23:16,921 - datalake.catalog - INFO - Found 78 tables in database\n", "2026-01-26 23:23:16,922 - datalake.catalog - INFO - Found 3 device(s)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Found 3 device(s):\n", " - 97a4aaf4\n", " - b8280fd1\n", " - f1da612a\n" ] } ], "source": [ "# Discover devices\n", "devices = catalog.list_devices()\n", "print(f\"Found {len(devices)} device(s):\")\n", "for device in devices:\n", " print(f\" - {device}\")" ] }, { "cell_type": "code", "execution_count": 13, "id": "103ddb07", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{1: 'out of memory',\n", " 2: 'syntax error',\n", " 3: 'no element found',\n", " 4: 'not well-formed (invalid token)',\n", " 5: 'unclosed token',\n", " 6: 'partial character',\n", " 7: 'mismatched tag',\n", " 8: 'duplicate attribute',\n", " 9: 'junk after document element',\n", " 10: 'illegal parameter entity reference',\n", " 11: 'undefined entity',\n", " 12: 'recursive entity reference',\n", " 13: 'asynchronous entity',\n", " 14: 'reference to invalid character number',\n", " 15: 'reference to binary entity',\n", " 16: 'reference to external entity in attribute',\n", " 17: 'XML or text declaration not at start of entity',\n", " 18: 'unknown encoding',\n", " 19: 'encoding specified in XML declaration is incorrect',\n", " 20: 'unclosed CDATA section',\n", " 21: 'error in processing external entity reference',\n", " 22: 'document is not standalone',\n", " 23: 'unexpected parser state - please send a bug report',\n", " 24: 'entity declared in parameter entity',\n", " 25: 'requested feature requires XML_DTD support in Expat',\n", " 26: 'cannot change setting once parsing has begun',\n", " 27: 'unbound prefix',\n", " 28: 'must not undeclare prefix',\n", " 29: 'incomplete markup in parameter entity',\n", " 30: 'XML declaration not well-formed',\n", " 31: 'text declaration not well-formed',\n", " 32: 'illegal character(s) in public id',\n", " 33: 'parser suspended',\n", " 34: 'parser not suspended',\n", " 35: 'parsing aborted',\n", " 36: 'parsing finished',\n", " 37: 'cannot suspend in external parameter entity'}" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "messages" ] }, { "cell_type": "code", "execution_count": 19, "id": "fbc4938b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['0100', '0103', '0104', '0105', '0106', '0107', '010c', '010d', '010e', '010f', '0110', '0111', '011f', '012e', '012f', '0133', '0134', '0135', '0143', '0144', '0149', '0155', '0156', '015c']\n" ] } ], "source": [ "import re\n", "\n", "pattern = re.compile(r\"s(?P\\d{2})pid.*m(?P[0-9a-fA-F]{2})$\")\n", "\n", "strings = [\n", " \"can1_obd2_s_m41_s01pid_m00\",\n", " \"can1_obd2_s_m41_s01pid_m03\",\n", " \"can1_obd2_s_m41_s01pid_m04\",\n", " \"can1_obd2_s_m41_s01pid_m05\",\n", " \"can1_obd2_s_m41_s01pid_m06\",\n", " \"can1_obd2_s_m41_s01pid_m07\",\n", " \"can1_obd2_s_m41_s01pid_m0c\",\n", "]\n", "\n", "out = []\n", "for x in messages:\n", " if x.startswith('can1') is False:\n", " continue\n", " m = pattern.search(x)\n", " out.append((m.group(\"s\") + m.group(\"m\")))\n", "\n", "print(out)\n", "# [('01', '00'), ('01', '03'), ('01', '04'), ('01', '05'), ('01', '06'), ('01', '07'), ('01', '0c')]\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "41a79e1e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['97a4aaf4', 'b8280fd1', 'f1da612a']" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "devices" ] }, { "cell_type": "code", "execution_count": 16, "id": "ffe04714", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Exploring device: 97a4aaf4\n", "============================================================\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2026-01-26 23:34:44,692 - datalake.athena - INFO - Query started with execution ID: 442a7cfd-68ed-46eb-98ee-b964a3e3cb6d\n", "2026-01-26 23:34:45,808 - datalake.athena - INFO - Query 442a7cfd-68ed-46eb-98ee-b964a3e3cb6d completed successfully\n", "2026-01-26 23:34:46,146 - datalake.athena - INFO - Retrieved 78 rows from S3 for query 442a7cfd-68ed-46eb-98ee-b964a3e3cb6d\n", "2026-01-26 23:34:46,146 - datalake.catalog - INFO - Found 78 tables in database\n", "2026-01-26 23:34:46,146 - datalake.catalog - INFO - Found 35 messages for device 97a4aaf4\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Found 35 message(s):\n", " - can1_obd2_s_m41_s01pid_m00\n", " - can1_obd2_s_m41_s01pid_m03\n", " - can1_obd2_s_m41_s01pid_m04\n", " - can1_obd2_s_m41_s01pid_m05\n", " - can1_obd2_s_m41_s01pid_m06\n", " - can1_obd2_s_m41_s01pid_m07\n", " - can1_obd2_s_m41_s01pid_m0c\n", " - can1_obd2_s_m41_s01pid_m0d\n", " - can1_obd2_s_m41_s01pid_m0e\n", " - can1_obd2_s_m41_s01pid_m0f\n", " - can1_obd2_s_m41_s01pid_m10\n", " - can1_obd2_s_m41_s01pid_m11\n", " - can1_obd2_s_m41_s01pid_m1f\n", " - can1_obd2_s_m41_s01pid_m2e\n", " - can1_obd2_s_m41_s01pid_m2f\n", " - can1_obd2_s_m41_s01pid_m33\n", " - can1_obd2_s_m41_s01pid_m34\n", " - can1_obd2_s_m41_s01pid_m35\n", " - can1_obd2_s_m41_s01pid_m43\n", " - can1_obd2_s_m41_s01pid_m44\n", " - can1_obd2_s_m41_s01pid_m49\n", " - can1_obd2_s_m41_s01pid_m55\n", " - can1_obd2_s_m41_s01pid_m56\n", " - can1_obd2_s_m41_s01pid_m5c\n", " - can9_gnssaltitude\n", " - can9_gnssdistance\n", " - can9_gnsspos\n", " - can9_gnssspeed\n", " - can9_gnssstatus\n", " - can9_gnsstime\n", " - can9_heartbeat\n", " - can9_imudata\n", " - can9_timecalendar\n", " - can9_timeexternal\n", " - messages\n" ] } ], "source": [ "if devices:\n", " device_id = devices[0]\n", " print(f\"\\nExploring device: {device_id}\")\n", " print(\"=\" * 60)\n", " \n", " messages = catalog.list_messages(device_id)\n", " print(f\"Found {len(messages)} message(s):\")\n", " for message in messages:\n", " print(f\" - {message}\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "a7bae557", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2026-01-26 15:14:27,675 - datalake.athena - INFO - Query started with execution ID: 62096cc6-be14-49f4-ae61-80efc006dbc2\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Schema for 97a4aaf4/can1_obd2_s_m41_s01pid_m00:\n", "============================================================\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2026-01-26 15:14:28,793 - datalake.athena - INFO - Query 62096cc6-be14-49f4-ae61-80efc006dbc2 completed successfully\n", "2026-01-26 15:14:28,947 - datalake.athena - INFO - Retrieved 78 rows from S3 for query 62096cc6-be14-49f4-ae61-80efc006dbc2\n", "2026-01-26 15:14:28,948 - datalake.catalog - INFO - Found 78 tables in database\n", "2026-01-26 15:14:29,052 - datalake.athena - INFO - Query started with execution ID: 93497778-7bd4-4976-bf95-7009ab18b6df\n", "2026-01-26 15:14:30,176 - datalake.athena - INFO - Query 93497778-7bd4-4976-bf95-7009ab18b6df completed successfully\n", "2026-01-26 15:14:30,296 - datalake.athena - INFO - Retrieved 3 rows from S3 for query 93497778-7bd4-4976-bf95-7009ab18b6df\n", "2026-01-26 15:14:30,297 - datalake.catalog - INFO - Schema for 97a4aaf4/can1_obd2_s_m41_s01pid_m00: 3 columns\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " Column Type\n", " t timestamp(3)\n", "s01pid00_pidssupported_01_20 double\n", " date_created varchar\n", "\n", "Total columns: 3\n" ] } ], "source": [ "# Get schema for first device/message combination\n", "if devices and messages:\n", " device_id = devices[0]\n", " message = messages[0]\n", " \n", " print(f\"\\nSchema for {device_id}/{message}:\")\n", " print(\"=\" * 60)\n", " \n", " schema = catalog.get_schema(device_id, message)\n", " if schema:\n", " schema_df = pd.DataFrame([\n", " {\"Column\": col, \"Type\": dtype}\n", " for col, dtype in schema.items()\n", " ])\n", " print(schema_df.to_string(index=False))\n", " print(f\"\\nTotal columns: {len(schema)}\")" ] }, { "cell_type": "code", "execution_count": 8, "id": "f3b16b2d", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2026-01-26 15:14:30,406 - datalake.athena - INFO - Query started with execution ID: 4fb3b506-612a-458e-a0e7-709b60a9f91e\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Partitions (dates) for 97a4aaf4/can1_obd2_s_m41_s01pid_m00:\n", "============================================================\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2026-01-26 15:14:31,530 - datalake.athena - INFO - Query 4fb3b506-612a-458e-a0e7-709b60a9f91e completed successfully\n", "2026-01-26 15:14:31,648 - datalake.athena - INFO - Retrieved 78 rows from S3 for query 4fb3b506-612a-458e-a0e7-709b60a9f91e\n", "2026-01-26 15:14:31,649 - datalake.catalog - INFO - Found 78 tables in database\n", "2026-01-26 15:14:31,755 - datalake.athena - INFO - Query started with execution ID: 4ce87717-db17-4fa5-b33d-ef63fb4a89fe\n", "2026-01-26 15:14:36,039 - datalake.athena - INFO - Query 4ce87717-db17-4fa5-b33d-ef63fb4a89fe completed successfully\n", "2026-01-26 15:14:36,162 - datalake.athena - INFO - Retrieved 13 rows from S3 for query 4ce87717-db17-4fa5-b33d-ef63fb4a89fe\n", "2026-01-26 15:14:36,164 - datalake.catalog - INFO - Found 13 partitions for tbl_97a4aaf4_can1_obd2_s_m41_s01pid_m00\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Found 13 partition(s):\n", " Date range: 2025-10-21 to 2025-11-11\n", "\n", " All dates:\n", " - 2025-10-21\n", " - 2025-10-27\n", " - 2025-10-28\n", " - 2025-10-29\n", " - 2025-10-30\n", " - 2025-10-31\n", " - 2025-11-03\n", " - 2025-11-04\n", " - 2025-11-05\n", " - 2025-11-06\n", " - 2025-11-07\n", " - 2025-11-10\n", " - 2025-11-11\n" ] } ], "source": [ "# Check available partitions (dates)\n", "if devices and messages:\n", " device_id = devices[0]\n", " message = messages[0]\n", " \n", " print(f\"\\nPartitions (dates) for {device_id}/{message}:\")\n", " print(\"=\" * 60)\n", " \n", " try:\n", " partitions = catalog.list_partitions(device_id, message)\n", " if partitions:\n", " print(f\"Found {len(partitions)} partition(s):\")\n", " print(f\" Date range: {partitions[0]} to {partitions[-1]}\")\n", " print(f\"\\n All dates:\")\n", " for date in partitions[:20]: # Show first 20\n", " print(f\" - {date}\")\n", " if len(partitions) > 20:\n", " print(f\" ... and {len(partitions) - 20} more\")\n", " else:\n", " print(\"No partitions found (table may not be partitioned)\")\n", " except Exception as e:\n", " print(f\"Could not list partitions: {e}\")" ] }, { "cell_type": "code", "execution_count": 9, "id": "66579956", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'97a4aaf4'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "device_id" ] }, { "cell_type": "code", "execution_count": 10, "id": "3411bcdb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['can1_obd2_s_m41_s01pid_m00', 'can1_obd2_s_m41_s01pid_m03', 'can1_obd2_s_m41_s01pid_m04', 'can1_obd2_s_m41_s01pid_m05', 'can1_obd2_s_m41_s01pid_m06', 'can1_obd2_s_m41_s01pid_m07', 'can1_obd2_s_m41_s01pid_m0c', 'can1_obd2_s_m41_s01pid_m0d', 'can1_obd2_s_m41_s01pid_m0e', 'can1_obd2_s_m41_s01pid_m0f', 'can1_obd2_s_m41_s01pid_m10', 'can1_obd2_s_m41_s01pid_m11', 'can1_obd2_s_m41_s01pid_m1f', 'can1_obd2_s_m41_s01pid_m2e', 'can1_obd2_s_m41_s01pid_m2f', 'can1_obd2_s_m41_s01pid_m33', 'can1_obd2_s_m41_s01pid_m34', 'can1_obd2_s_m41_s01pid_m35', 'can1_obd2_s_m41_s01pid_m43', 'can1_obd2_s_m41_s01pid_m44', 'can1_obd2_s_m41_s01pid_m49', 'can1_obd2_s_m41_s01pid_m55', 'can1_obd2_s_m41_s01pid_m56', 'can1_obd2_s_m41_s01pid_m5c', 'can9_gnssaltitude', 'can9_gnssdistance', 'can9_gnsspos', 'can9_gnssspeed', 'can9_gnssstatus', 'can9_gnsstime', 'can9_heartbeat', 'can9_imudata', 'can9_timecalendar', 'can9_timeexternal', 'messages']\n" ] } ], "source": [ "print(messages)" ] }, { "cell_type": "code", "execution_count": 20, "id": "b98df0e7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Reading sample data from 97a4aaf4/can1_obd2_s_m41_s01pid_m49...\n", "============================================================\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2026-01-26 23:48:28,200 - datalake.athena - INFO - Query started with execution ID: 2501a646-a908-47a8-95df-3870e5696e62\n", "2026-01-26 23:48:29,314 - datalake.athena - INFO - Query 2501a646-a908-47a8-95df-3870e5696e62 completed successfully\n", "2026-01-26 23:48:29,604 - datalake.athena - INFO - Retrieved 78 rows from S3 for query 2501a646-a908-47a8-95df-3870e5696e62\n", "2026-01-26 23:48:29,604 - datalake.catalog - INFO - Found 78 tables in database\n", "2026-01-26 23:48:29,604 - datalake.query - INFO - Executing query for 97a4aaf4/can1_obd2_s_m41_s01pid_m49\n", "2026-01-26 23:48:29,718 - datalake.athena - INFO - Query started with execution ID: b9f04fc6-6408-4054-8d9a-bf77c0bcf28d\n", "2026-01-26 23:48:35,706 - datalake.athena - INFO - Query b9f04fc6-6408-4054-8d9a-bf77c0bcf28d completed successfully\n", "2026-01-26 23:48:41,916 - datalake.athena - INFO - Retrieved 652001 rows from S3 for query b9f04fc6-6408-4054-8d9a-bf77c0bcf28d\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "✓ Loaded 652001 records\n", "\n", "Data shape: (652001, 3)\n", "\n", "Columns: ['t', 's01pid49_absthrottleposd', 'date_created']\n", "\n", "First few rows:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ts01pid49_absthrottleposddate_created
02026-01-02 03:04:02.44115.6862752026/01/02
12025-12-03 03:10:02.21715.6862752025/12/03
22025-12-31 03:00:00.16215.6862752025/12/31
32025-12-19 04:00:00.15730.9803922025/12/19
42025-12-22 04:00:00.66115.6862752025/12/22
52026-01-13 06:00:00.33915.6862752026/01/13
62025-12-19 07:00:00.01038.8235292025/12/19
72025-12-19 04:00:01.15633.3333332025/12/19
82025-12-19 04:00:02.15735.2941182025/12/19
92025-12-19 07:00:01.00934.9019612025/12/19
\n", "
" ], "text/plain": [ " t s01pid49_absthrottleposd date_created\n", "0 2026-01-02 03:04:02.441 15.686275 2026/01/02\n", "1 2025-12-03 03:10:02.217 15.686275 2025/12/03\n", "2 2025-12-31 03:00:00.162 15.686275 2025/12/31\n", "3 2025-12-19 04:00:00.157 30.980392 2025/12/19\n", "4 2025-12-22 04:00:00.661 15.686275 2025/12/22\n", "5 2026-01-13 06:00:00.339 15.686275 2026/01/13\n", "6 2025-12-19 07:00:00.010 38.823529 2025/12/19\n", "7 2025-12-19 04:00:01.156 33.333333 2025/12/19\n", "8 2025-12-19 04:00:02.157 35.294118 2025/12/19\n", "9 2025-12-19 07:00:01.009 34.901961 2025/12/19" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Data types:\n", "t object\n", "s01pid49_absthrottleposd float64\n", "date_created object\n", "dtype: object\n", "\n", "Basic statistics:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
s01pid49_absthrottleposd
count652001.000000
mean21.921143
std8.487119
min15.686275
25%15.686275
50%15.686275
75%29.019608
max58.431373
\n", "
" ], "text/plain": [ " s01pid49_absthrottleposd\n", "count 652001.000000\n", "mean 21.921143\n", "std 8.487119\n", "min 15.686275\n", "25% 15.686275\n", "50% 15.686275\n", "75% 29.019608\n", "max 58.431373" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Read a sample of data\n", "if devices and messages:\n", " device_id = devices[0]\n", " # message = messages[0]\n", " message = 'can1_obd2_s_m41_s01pid_m49'\n", " \n", " print(f\"Reading sample data from {device_id}/{message}...\")\n", " print(\"=\" * 60)\n", " \n", " try:\n", " df = query.read_device_message(\n", " device_id=device_id,\n", " message=message,\n", " )\n", " \n", " print(f\"✓ Loaded {len(df)} records\")\n", " print(f\"\\nData shape: {df.shape}\")\n", " print(f\"\\nColumns: {list(df.columns)}\")\n", " print(f\"\\nFirst few rows:\")\n", " display(df.head(10))\n", " \n", " print(f\"\\nData types:\")\n", " print(df.dtypes)\n", " \n", " print(f\"\\nBasic statistics:\")\n", " display(df.describe())\n", " \n", " except Exception as e:\n", " print(f\"✗ Error reading data: {e}\")\n", " import traceback\n", " traceback.print_exc()" ] }, { "cell_type": "code", "execution_count": 21, "id": "31396a98", "metadata": {}, "outputs": [], "source": [ "df['t'] = pd.to_datetime(df['t']) " ] }, { "cell_type": "code", "execution_count": 25, "id": "8fa88ee6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ts01pid49_absthrottleposd
02025-11-2215.709290
12025-11-23NaN
22025-11-2421.347658
32025-11-2522.176305
42025-11-2622.074130
52025-11-2722.379063
62025-11-2822.611687
72025-11-29NaN
82025-11-30NaN
92025-12-01NaN
102025-12-02NaN
112025-12-0322.212069
122025-12-0421.593356
132025-12-0522.048014
142025-12-06NaN
152025-12-07NaN
162025-12-0821.288014
172025-12-0922.105263
182025-12-1022.144666
192025-12-1121.774071
202025-12-1221.957367
212025-12-13NaN
222025-12-14NaN
232025-12-1520.411036
242025-12-1621.394285
252025-12-1721.644342
262025-12-1822.130631
272025-12-1921.194253
282025-12-20NaN
292025-12-21NaN
302025-12-2221.804700
312025-12-2321.961360
322025-12-2422.252882
332025-12-2521.916508
342025-12-2622.494252
352025-12-27NaN
362025-12-28NaN
372025-12-2921.873543
382025-12-3021.890226
392025-12-3122.529185
402026-01-01NaN
412026-01-0222.761742
422026-01-03NaN
432026-01-04NaN
442026-01-0522.315963
452026-01-0622.110849
462026-01-0721.613014
472026-01-0821.953064
482026-01-0921.585424
492026-01-10NaN
502026-01-11NaN
512026-01-1222.092380
522026-01-1322.664499
532026-01-1422.124919
542026-01-1522.252390
552026-01-1622.551813
\n", "
" ], "text/plain": [ " t s01pid49_absthrottleposd\n", "0 2025-11-22 15.709290\n", "1 2025-11-23 NaN\n", "2 2025-11-24 21.347658\n", "3 2025-11-25 22.176305\n", "4 2025-11-26 22.074130\n", "5 2025-11-27 22.379063\n", "6 2025-11-28 22.611687\n", "7 2025-11-29 NaN\n", "8 2025-11-30 NaN\n", "9 2025-12-01 NaN\n", "10 2025-12-02 NaN\n", "11 2025-12-03 22.212069\n", "12 2025-12-04 21.593356\n", "13 2025-12-05 22.048014\n", "14 2025-12-06 NaN\n", "15 2025-12-07 NaN\n", "16 2025-12-08 21.288014\n", "17 2025-12-09 22.105263\n", "18 2025-12-10 22.144666\n", "19 2025-12-11 21.774071\n", "20 2025-12-12 21.957367\n", "21 2025-12-13 NaN\n", "22 2025-12-14 NaN\n", "23 2025-12-15 20.411036\n", "24 2025-12-16 21.394285\n", "25 2025-12-17 21.644342\n", "26 2025-12-18 22.130631\n", "27 2025-12-19 21.194253\n", "28 2025-12-20 NaN\n", "29 2025-12-21 NaN\n", "30 2025-12-22 21.804700\n", "31 2025-12-23 21.961360\n", "32 2025-12-24 22.252882\n", "33 2025-12-25 21.916508\n", "34 2025-12-26 22.494252\n", "35 2025-12-27 NaN\n", "36 2025-12-28 NaN\n", "37 2025-12-29 21.873543\n", "38 2025-12-30 21.890226\n", "39 2025-12-31 22.529185\n", "40 2026-01-01 NaN\n", "41 2026-01-02 22.761742\n", "42 2026-01-03 NaN\n", "43 2026-01-04 NaN\n", "44 2026-01-05 22.315963\n", "45 2026-01-06 22.110849\n", "46 2026-01-07 21.613014\n", "47 2026-01-08 21.953064\n", "48 2026-01-09 21.585424\n", "49 2026-01-10 NaN\n", "50 2026-01-11 NaN\n", "51 2026-01-12 22.092380\n", "52 2026-01-13 22.664499\n", "53 2026-01-14 22.124919\n", "54 2026-01-15 22.252390\n", "55 2026-01-16 22.551813" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.drop(columns=['date_created']).groupby(pd.Grouper(key='t', freq='D')).mean().reset_index()" ] }, { "cell_type": "code", "execution_count": 22, "id": "64376fb5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 15.686275\n", "1 15.686275\n", "2 15.686275\n", "3 30.980392\n", "4 15.686275\n", " ... \n", "651996 15.686275\n", "651997 15.686275\n", "651998 15.686275\n", "651999 15.686275\n", "652000 15.686275\n", "Name: s01pid49_absthrottleposd, Length: 652001, dtype: float64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['s01pid49_absthrottleposd']" ] }, { "cell_type": "code", "execution_count": 15, "id": "ec3d240b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
taltitudevalidaltitudealtitudeaccuracydate_created
69232532025-10-21 15:09:49.4541.0-45.443.02025/10/21
69232542025-10-21 15:09:49.6541.0-44.437.02025/10/21
69232552025-10-21 15:09:49.8541.0-43.832.02025/10/21
69232562025-10-21 15:09:50.2631.0-43.329.02025/10/21
69232572025-10-21 15:09:50.4631.0-41.924.02025/10/21
..................
128767662026-01-17 12:59:59.0931.0-12.41.02026/01/17
128767672026-01-17 12:59:59.2931.0-12.41.02026/01/17
128767682026-01-17 12:59:59.4931.0-12.41.02026/01/17
128767692026-01-17 12:59:59.6931.0-12.41.02026/01/17
128767702026-01-17 12:59:59.8931.0-12.41.02026/01/17
\n", "

12981228 rows × 5 columns

\n", "
" ], "text/plain": [ " t altitudevalid altitude altitudeaccuracy \\\n", "6923253 2025-10-21 15:09:49.454 1.0 -45.4 43.0 \n", "6923254 2025-10-21 15:09:49.654 1.0 -44.4 37.0 \n", "6923255 2025-10-21 15:09:49.854 1.0 -43.8 32.0 \n", "6923256 2025-10-21 15:09:50.263 1.0 -43.3 29.0 \n", "6923257 2025-10-21 15:09:50.463 1.0 -41.9 24.0 \n", "... ... ... ... ... \n", "12876766 2026-01-17 12:59:59.093 1.0 -12.4 1.0 \n", "12876767 2026-01-17 12:59:59.293 1.0 -12.4 1.0 \n", "12876768 2026-01-17 12:59:59.493 1.0 -12.4 1.0 \n", "12876769 2026-01-17 12:59:59.693 1.0 -12.4 1.0 \n", "12876770 2026-01-17 12:59:59.893 1.0 -12.4 1.0 \n", "\n", " date_created \n", "6923253 2025/10/21 \n", "6923254 2025/10/21 \n", "6923255 2025/10/21 \n", "6923256 2025/10/21 \n", "6923257 2025/10/21 \n", "... ... \n", "12876766 2026/01/17 \n", "12876767 2026/01/17 \n", "12876768 2026/01/17 \n", "12876769 2026/01/17 \n", "12876770 2026/01/17 \n", "\n", "[12981228 rows x 5 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values(by='t')" ] }, { "cell_type": "code", "execution_count": null, "id": "ed959149", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.18" } }, "nbformat": 4, "nbformat_minor": 5 }