Spaces:
Sleeping
Sleeping
Upload 35 files
Browse files- src/__init__.py +1 -0
- src/__pycache__/__init__.cpython-310.pyc +0 -0
- src/__pycache__/app.cpython-310.pyc +0 -0
- src/examples/batch_example.py +12 -5
- src/examples/explore_example.py +10 -3
- src/examples/query_example.py +11 -4
- src/streamlit_app.py +49 -36
- src/test_connection.py +10 -3
- src/utils/__pycache__/dimension_reduction.cpython-310.pyc +0 -0
- src/utils/dimension_reduction.py +1 -1
src/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""OXON Technologies / CANedge Data Lake application and utilities."""
|
src/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (213 Bytes). View file
|
|
|
src/__pycache__/app.cpython-310.pyc
ADDED
|
Binary file (25.8 kB). View file
|
|
|
src/examples/batch_example.py
CHANGED
|
@@ -5,11 +5,18 @@ This script demonstrates memory-efficient batch processing across
|
|
| 5 |
the entire data lake using SQL queries.
|
| 6 |
"""
|
| 7 |
|
| 8 |
-
|
| 9 |
-
from
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
import pandas as pd
|
| 14 |
|
| 15 |
|
|
|
|
| 5 |
the entire data lake using SQL queries.
|
| 6 |
"""
|
| 7 |
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
_project_root = Path(__file__).resolve().parent.parent.parent
|
| 12 |
+
if str(_project_root) not in sys.path:
|
| 13 |
+
sys.path.insert(0, str(_project_root))
|
| 14 |
+
|
| 15 |
+
from src.datalake.config import DataLakeConfig
|
| 16 |
+
from src.datalake.athena import AthenaQuery
|
| 17 |
+
from src.datalake.catalog import DataLakeCatalog
|
| 18 |
+
from src.datalake.query import DataLakeQuery
|
| 19 |
+
from src.datalake.batch import BatchProcessor
|
| 20 |
import pandas as pd
|
| 21 |
|
| 22 |
|
src/examples/explore_example.py
CHANGED
|
@@ -5,9 +5,16 @@ This script demonstrates how to discover devices, messages, dates,
|
|
| 5 |
and schemas in the CANedge Athena data lake.
|
| 6 |
"""
|
| 7 |
|
| 8 |
-
|
| 9 |
-
from
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
def main():
|
|
|
|
| 5 |
and schemas in the CANedge Athena data lake.
|
| 6 |
"""
|
| 7 |
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
_project_root = Path(__file__).resolve().parent.parent.parent
|
| 12 |
+
if str(_project_root) not in sys.path:
|
| 13 |
+
sys.path.insert(0, str(_project_root))
|
| 14 |
+
|
| 15 |
+
from src.datalake.config import DataLakeConfig
|
| 16 |
+
from src.datalake.athena import AthenaQuery
|
| 17 |
+
from src.datalake.catalog import DataLakeCatalog
|
| 18 |
|
| 19 |
|
| 20 |
def main():
|
src/examples/query_example.py
CHANGED
|
@@ -5,10 +5,17 @@ This script demonstrates how to read data for specific devices/messages,
|
|
| 5 |
perform time series queries, and filter by date ranges using SQL.
|
| 6 |
"""
|
| 7 |
|
| 8 |
-
|
| 9 |
-
from
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
import pandas as pd
|
| 13 |
|
| 14 |
|
|
|
|
| 5 |
perform time series queries, and filter by date ranges using SQL.
|
| 6 |
"""
|
| 7 |
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
_project_root = Path(__file__).resolve().parent.parent.parent
|
| 12 |
+
if str(_project_root) not in sys.path:
|
| 13 |
+
sys.path.insert(0, str(_project_root))
|
| 14 |
+
|
| 15 |
+
from src.datalake.config import DataLakeConfig
|
| 16 |
+
from src.datalake.athena import AthenaQuery
|
| 17 |
+
from src.datalake.catalog import DataLakeCatalog
|
| 18 |
+
from src.datalake.query import DataLakeQuery
|
| 19 |
import pandas as pd
|
| 20 |
|
| 21 |
|
src/streamlit_app.py
CHANGED
|
@@ -4,10 +4,16 @@ OXON Technologies - Professional Streamlit Dashboard
|
|
| 4 |
A comprehensive dashboard for analyzing device data from AWS Athena data lake.
|
| 5 |
"""
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
import streamlit as st
|
| 8 |
from warnings import filterwarnings
|
| 9 |
import base64
|
| 10 |
-
from pathlib import Path
|
| 11 |
from PIL import Image
|
| 12 |
import pandas as pd
|
| 13 |
import numpy as np
|
|
@@ -19,15 +25,18 @@ from typing import Dict, Optional, List, Tuple
|
|
| 19 |
from ydata_profiling import ProfileReport
|
| 20 |
import plotly.express as px
|
| 21 |
|
| 22 |
-
from datalake.config import DataLakeConfig
|
| 23 |
-
from datalake.athena import AthenaQuery
|
| 24 |
-
from datalake.catalog import DataLakeCatalog
|
| 25 |
-
from datalake.query import DataLakeQuery
|
| 26 |
-
from datalake.batch import BatchProcessor
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
from utils.feature_class import DetectFeatureClasses
|
| 31 |
|
| 32 |
# Ignore warnings
|
| 33 |
filterwarnings("ignore")
|
|
@@ -36,20 +45,22 @@ filterwarnings("ignore")
|
|
| 36 |
# Configuration Management
|
| 37 |
# ============================================================================
|
| 38 |
|
| 39 |
-
def load_config(config_path: str =
|
| 40 |
"""
|
| 41 |
Load configuration from YAML file.
|
| 42 |
-
|
| 43 |
Args:
|
| 44 |
-
config_path: Path to the configuration YAML file
|
| 45 |
-
|
| 46 |
Returns:
|
| 47 |
Dictionary containing configuration settings
|
| 48 |
-
|
| 49 |
Raises:
|
| 50 |
FileNotFoundError: If config file doesn't exist
|
| 51 |
yaml.YAMLError: If config file is invalid YAML
|
| 52 |
"""
|
|
|
|
|
|
|
| 53 |
config_file = Path(config_path)
|
| 54 |
if not config_file.exists():
|
| 55 |
raise FileNotFoundError(f"Configuration file not found: {config_path}")
|
|
@@ -245,20 +256,21 @@ def display_header(logo_path: str, title: str):
|
|
| 245 |
<div style="display: flex; align-items: center;">
|
| 246 |
<img src="data:image/png;base64,{logo_base64}" alt="Logo"
|
| 247 |
style="height: 200px; margin-right: 10px;">
|
| 248 |
-
<h1 style="display: inline; margin: 0;">{title}
|
| 249 |
</div>
|
| 250 |
""",
|
| 251 |
unsafe_allow_html=True,
|
| 252 |
)
|
| 253 |
else:
|
| 254 |
-
st.title(f"{title}
|
| 255 |
|
| 256 |
|
| 257 |
def display_sidebar():
|
| 258 |
"""Display sidebar with device selection."""
|
| 259 |
with st.sidebar:
|
| 260 |
# Logo
|
| 261 |
-
|
|
|
|
| 262 |
try:
|
| 263 |
st.image(Image.open(logo_path), width='stretch')
|
| 264 |
except Exception:
|
|
@@ -270,7 +282,7 @@ def display_sidebar():
|
|
| 270 |
|
| 271 |
# Check if AWS services are initialized
|
| 272 |
if not st.session_state['aws_initialized']:
|
| 273 |
-
st.warning("
|
| 274 |
return
|
| 275 |
|
| 276 |
# Load device list if not cached
|
|
@@ -320,7 +332,7 @@ def display_sidebar():
|
|
| 320 |
|
| 321 |
# Show selected device info only after user has confirmed
|
| 322 |
if st.session_state['selected_device']:
|
| 323 |
-
st.success(f"
|
| 324 |
|
| 325 |
|
| 326 |
# ============================================================================
|
|
@@ -394,7 +406,7 @@ def render_message_viewer_tab():
|
|
| 394 |
return
|
| 395 |
|
| 396 |
if not st.session_state['selected_device']:
|
| 397 |
-
st.info("
|
| 398 |
return
|
| 399 |
|
| 400 |
device_id = st.session_state['selected_device']
|
|
@@ -467,7 +479,7 @@ def render_message_viewer_tab():
|
|
| 467 |
st.rerun()
|
| 468 |
|
| 469 |
if st.session_state['selected_message']:
|
| 470 |
-
st.info(f"
|
| 471 |
|
| 472 |
# Date range selection (optional filter)
|
| 473 |
st.divider()
|
|
@@ -527,7 +539,7 @@ def render_message_viewer_tab():
|
|
| 527 |
if apply_filter_clicked:
|
| 528 |
# Validate date range before applying
|
| 529 |
if date_start > date_end:
|
| 530 |
-
st.error("
|
| 531 |
else:
|
| 532 |
st.session_state['applied_date_range_start'] = date_start
|
| 533 |
st.session_state['applied_date_range_end'] = date_end
|
|
@@ -536,14 +548,14 @@ def render_message_viewer_tab():
|
|
| 536 |
# Show current applied filter status
|
| 537 |
if st.session_state.get('applied_date_range_start') and st.session_state.get('applied_date_range_end'):
|
| 538 |
st.success(
|
| 539 |
-
f"
|
| 540 |
f"{st.session_state['applied_date_range_end']}"
|
| 541 |
)
|
| 542 |
elif date_start and date_end:
|
| 543 |
if date_start <= date_end:
|
| 544 |
-
st.info("
|
| 545 |
else:
|
| 546 |
-
st.error("
|
| 547 |
else:
|
| 548 |
# Clear applied date range when disabled
|
| 549 |
if st.session_state.get('date_range_enabled'):
|
|
@@ -612,13 +624,13 @@ def render_message_data(device_id: str, message: str):
|
|
| 612 |
|
| 613 |
if len(df) == 0:
|
| 614 |
st.warning(
|
| 615 |
-
f"
|
| 616 |
f"({st.session_state['applied_date_range_start']} to {st.session_state['applied_date_range_end']})."
|
| 617 |
)
|
| 618 |
st.info("Try selecting a different date range or disable the filter to see all data.")
|
| 619 |
return
|
| 620 |
elif len(df) < original_row_count:
|
| 621 |
-
st.info(f"
|
| 622 |
|
| 623 |
# Display statistics
|
| 624 |
# st.subheader("Statistics")
|
|
@@ -859,7 +871,7 @@ def render_correlations_tab():
|
|
| 859 |
return
|
| 860 |
|
| 861 |
if not st.session_state['selected_device']:
|
| 862 |
-
st.info("
|
| 863 |
return
|
| 864 |
|
| 865 |
device_id = st.session_state['selected_device']
|
|
@@ -899,7 +911,7 @@ def render_correlations_tab():
|
|
| 899 |
st.error("No features available for correlation analysis.")
|
| 900 |
return
|
| 901 |
|
| 902 |
-
st.info(f"
|
| 903 |
|
| 904 |
# Detect feature classes
|
| 905 |
st.subheader("1. Feature Classification")
|
|
@@ -1000,7 +1012,7 @@ def render_correlations_tab():
|
|
| 1000 |
for idx, cluster in enumerate(cluster_list):
|
| 1001 |
with st.expander(f"Cohort {idx + 1}: {len(cluster)} features (all pairs in {band_label})"):
|
| 1002 |
for feature in cluster:
|
| 1003 |
-
st.write(f"
|
| 1004 |
if len(cluster) > 1:
|
| 1005 |
st.markdown("**Pairwise correlations (values lie in " + band_label + "):**")
|
| 1006 |
cluster_corr = corr_matrix.loc[cluster, cluster]
|
|
@@ -1038,7 +1050,7 @@ def render_correlations_tab():
|
|
| 1038 |
|
| 1039 |
def render_placeholder_tab():
|
| 1040 |
"""Render placeholder tab."""
|
| 1041 |
-
st.info("
|
| 1042 |
|
| 1043 |
|
| 1044 |
# ============================================================================
|
|
@@ -1053,8 +1065,8 @@ def main():
|
|
| 1053 |
# Load configuration
|
| 1054 |
if st.session_state['app_config'] is None:
|
| 1055 |
st.error(
|
| 1056 |
-
f"
|
| 1057 |
-
"Please ensure `config.yaml` exists and is properly formatted."
|
| 1058 |
)
|
| 1059 |
st.stop()
|
| 1060 |
|
|
@@ -1062,8 +1074,8 @@ def main():
|
|
| 1062 |
if not initialize_aws_if_needed():
|
| 1063 |
if st.session_state['aws_error']:
|
| 1064 |
st.error(
|
| 1065 |
-
f"
|
| 1066 |
-
"Please check your AWS credentials in `config.yaml`."
|
| 1067 |
)
|
| 1068 |
st.stop()
|
| 1069 |
|
|
@@ -1091,7 +1103,8 @@ def main():
|
|
| 1091 |
)
|
| 1092 |
|
| 1093 |
# Display header
|
| 1094 |
-
|
|
|
|
| 1095 |
header_title = dashboard_config.get('page_title', 'Analytical Dashboard')
|
| 1096 |
display_header(header_logo, header_title)
|
| 1097 |
|
|
|
|
| 4 |
A comprehensive dashboard for analyzing device data from AWS Athena data lake.
|
| 5 |
"""
|
| 6 |
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
_project_root = Path(__file__).resolve().parent.parent
|
| 11 |
+
if str(_project_root) not in sys.path:
|
| 12 |
+
sys.path.insert(0, str(_project_root))
|
| 13 |
+
|
| 14 |
import streamlit as st
|
| 15 |
from warnings import filterwarnings
|
| 16 |
import base64
|
|
|
|
| 17 |
from PIL import Image
|
| 18 |
import pandas as pd
|
| 19 |
import numpy as np
|
|
|
|
| 25 |
from ydata_profiling import ProfileReport
|
| 26 |
import plotly.express as px
|
| 27 |
|
| 28 |
+
from src.datalake.config import DataLakeConfig
|
| 29 |
+
from src.datalake.athena import AthenaQuery
|
| 30 |
+
from src.datalake.catalog import DataLakeCatalog
|
| 31 |
+
from src.datalake.query import DataLakeQuery
|
| 32 |
+
from src.datalake.batch import BatchProcessor
|
| 33 |
+
|
| 34 |
+
from src.utils.correlation import CorrelationMatrixGenerator
|
| 35 |
+
from src.utils.dimension_reduction import DimensionReduction
|
| 36 |
+
from src.utils.feature_class import DetectFeatureClasses
|
| 37 |
|
| 38 |
+
# Base directory for config/images (relative to this file)
|
| 39 |
+
_SRC_DIR = Path(__file__).resolve().parent
|
|
|
|
| 40 |
|
| 41 |
# Ignore warnings
|
| 42 |
filterwarnings("ignore")
|
|
|
|
| 45 |
# Configuration Management
|
| 46 |
# ============================================================================
|
| 47 |
|
| 48 |
+
def load_config(config_path: Optional[str] = None) -> Dict:
|
| 49 |
"""
|
| 50 |
Load configuration from YAML file.
|
| 51 |
+
|
| 52 |
Args:
|
| 53 |
+
config_path: Path to the configuration YAML file (default: src/config.yaml)
|
| 54 |
+
|
| 55 |
Returns:
|
| 56 |
Dictionary containing configuration settings
|
| 57 |
+
|
| 58 |
Raises:
|
| 59 |
FileNotFoundError: If config file doesn't exist
|
| 60 |
yaml.YAMLError: If config file is invalid YAML
|
| 61 |
"""
|
| 62 |
+
if config_path is None:
|
| 63 |
+
config_path = _SRC_DIR / "config.yaml"
|
| 64 |
config_file = Path(config_path)
|
| 65 |
if not config_file.exists():
|
| 66 |
raise FileNotFoundError(f"Configuration file not found: {config_path}")
|
|
|
|
| 256 |
<div style="display: flex; align-items: center;">
|
| 257 |
<img src="data:image/png;base64,{logo_base64}" alt="Logo"
|
| 258 |
style="height: 200px; margin-right: 10px;">
|
| 259 |
+
<h1 style="display: inline; margin: 0;">{title} ??</h1>
|
| 260 |
</div>
|
| 261 |
""",
|
| 262 |
unsafe_allow_html=True,
|
| 263 |
)
|
| 264 |
else:
|
| 265 |
+
st.title(f"{title} ??")
|
| 266 |
|
| 267 |
|
| 268 |
def display_sidebar():
|
| 269 |
"""Display sidebar with device selection."""
|
| 270 |
with st.sidebar:
|
| 271 |
# Logo
|
| 272 |
+
logo_rel = st.session_state['app_config'].get('dashboard', {}).get('logo_path', 'images/logo.png')
|
| 273 |
+
logo_path = _SRC_DIR / logo_rel
|
| 274 |
try:
|
| 275 |
st.image(Image.open(logo_path), width='stretch')
|
| 276 |
except Exception:
|
|
|
|
| 282 |
|
| 283 |
# Check if AWS services are initialized
|
| 284 |
if not st.session_state['aws_initialized']:
|
| 285 |
+
st.warning("?? AWS services not initialized. Please check configuration.")
|
| 286 |
return
|
| 287 |
|
| 288 |
# Load device list if not cached
|
|
|
|
| 332 |
|
| 333 |
# Show selected device info only after user has confirmed
|
| 334 |
if st.session_state['selected_device']:
|
| 335 |
+
st.success(f"? Selected: {st.session_state['selected_device']}")
|
| 336 |
|
| 337 |
|
| 338 |
# ============================================================================
|
|
|
|
| 406 |
return
|
| 407 |
|
| 408 |
if not st.session_state['selected_device']:
|
| 409 |
+
st.info("?? Please select a device from the sidebar and click **Go!** to begin.")
|
| 410 |
return
|
| 411 |
|
| 412 |
device_id = st.session_state['selected_device']
|
|
|
|
| 479 |
st.rerun()
|
| 480 |
|
| 481 |
if st.session_state['selected_message']:
|
| 482 |
+
st.info(f"?? Selected message: `{st.session_state['selected_message']}` ({selected_message_name})")
|
| 483 |
|
| 484 |
# Date range selection (optional filter)
|
| 485 |
st.divider()
|
|
|
|
| 539 |
if apply_filter_clicked:
|
| 540 |
# Validate date range before applying
|
| 541 |
if date_start > date_end:
|
| 542 |
+
st.error("?? Start date must be before or equal to end date.")
|
| 543 |
else:
|
| 544 |
st.session_state['applied_date_range_start'] = date_start
|
| 545 |
st.session_state['applied_date_range_end'] = date_end
|
|
|
|
| 548 |
# Show current applied filter status
|
| 549 |
if st.session_state.get('applied_date_range_start') and st.session_state.get('applied_date_range_end'):
|
| 550 |
st.success(
|
| 551 |
+
f"?? **Applied filter:** {st.session_state['applied_date_range_start']} to "
|
| 552 |
f"{st.session_state['applied_date_range_end']}"
|
| 553 |
)
|
| 554 |
elif date_start and date_end:
|
| 555 |
if date_start <= date_end:
|
| 556 |
+
st.info("?? Select dates and click **Apply Filter** to filter the data.")
|
| 557 |
else:
|
| 558 |
+
st.error("?? Start date must be before or equal to end date.")
|
| 559 |
else:
|
| 560 |
# Clear applied date range when disabled
|
| 561 |
if st.session_state.get('date_range_enabled'):
|
|
|
|
| 624 |
|
| 625 |
if len(df) == 0:
|
| 626 |
st.warning(
|
| 627 |
+
f"?? No data found in the selected date range "
|
| 628 |
f"({st.session_state['applied_date_range_start']} to {st.session_state['applied_date_range_end']})."
|
| 629 |
)
|
| 630 |
st.info("Try selecting a different date range or disable the filter to see all data.")
|
| 631 |
return
|
| 632 |
elif len(df) < original_row_count:
|
| 633 |
+
st.info(f"?? Showing {len(df):,} of {original_row_count:,} records (filtered by date range).")
|
| 634 |
|
| 635 |
# Display statistics
|
| 636 |
# st.subheader("Statistics")
|
|
|
|
| 871 |
return
|
| 872 |
|
| 873 |
if not st.session_state['selected_device']:
|
| 874 |
+
st.info("?? Please select a device from the sidebar and click **Go!** to begin.")
|
| 875 |
return
|
| 876 |
|
| 877 |
device_id = st.session_state['selected_device']
|
|
|
|
| 911 |
st.error("No features available for correlation analysis.")
|
| 912 |
return
|
| 913 |
|
| 914 |
+
st.info(f"?? Analyzing {len(df_features.columns)} features from {len(df)} days of data.")
|
| 915 |
|
| 916 |
# Detect feature classes
|
| 917 |
st.subheader("1. Feature Classification")
|
|
|
|
| 1012 |
for idx, cluster in enumerate(cluster_list):
|
| 1013 |
with st.expander(f"Cohort {idx + 1}: {len(cluster)} features (all pairs in {band_label})"):
|
| 1014 |
for feature in cluster:
|
| 1015 |
+
st.write(f" � {feature}")
|
| 1016 |
if len(cluster) > 1:
|
| 1017 |
st.markdown("**Pairwise correlations (values lie in " + band_label + "):**")
|
| 1018 |
cluster_corr = corr_matrix.loc[cluster, cluster]
|
|
|
|
| 1050 |
|
| 1051 |
def render_placeholder_tab():
|
| 1052 |
"""Render placeholder tab."""
|
| 1053 |
+
st.info("?? This feature is under development.")
|
| 1054 |
|
| 1055 |
|
| 1056 |
# ============================================================================
|
|
|
|
| 1065 |
# Load configuration
|
| 1066 |
if st.session_state['app_config'] is None:
|
| 1067 |
st.error(
|
| 1068 |
+
f"? Configuration Error: {st.session_state.get('config_error', 'Unknown error')}\n\n"
|
| 1069 |
+
"Please ensure `src/config.yaml` exists and is properly formatted."
|
| 1070 |
)
|
| 1071 |
st.stop()
|
| 1072 |
|
|
|
|
| 1074 |
if not initialize_aws_if_needed():
|
| 1075 |
if st.session_state['aws_error']:
|
| 1076 |
st.error(
|
| 1077 |
+
f"? AWS Initialization Error: {st.session_state['aws_error']}\n\n"
|
| 1078 |
+
"Please check your AWS credentials in `src/config.yaml`."
|
| 1079 |
)
|
| 1080 |
st.stop()
|
| 1081 |
|
|
|
|
| 1103 |
)
|
| 1104 |
|
| 1105 |
# Display header
|
| 1106 |
+
header_logo_rel = dashboard_config.get('header_logo_path', 'images/analysis.png')
|
| 1107 |
+
header_logo = str(_SRC_DIR / header_logo_rel)
|
| 1108 |
header_title = dashboard_config.get('page_title', 'Analytical Dashboard')
|
| 1109 |
display_header(header_logo, header_title)
|
| 1110 |
|
src/test_connection.py
CHANGED
|
@@ -2,9 +2,16 @@
|
|
| 2 |
Quick test script to verify Athena connection and basic functionality.
|
| 3 |
"""
|
| 4 |
|
| 5 |
-
|
| 6 |
-
from
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
def main():
|
| 10 |
"""Test basic connection and functionality."""
|
|
|
|
| 2 |
Quick test script to verify Athena connection and basic functionality.
|
| 3 |
"""
|
| 4 |
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
_project_root = Path(__file__).resolve().parent.parent
|
| 9 |
+
if str(_project_root) not in sys.path:
|
| 10 |
+
sys.path.insert(0, str(_project_root))
|
| 11 |
+
|
| 12 |
+
from src.datalake.config import DataLakeConfig
|
| 13 |
+
from src.datalake.athena import AthenaQuery
|
| 14 |
+
from src.datalake.catalog import DataLakeCatalog
|
| 15 |
|
| 16 |
def main():
|
| 17 |
"""Test basic connection and functionality."""
|
src/utils/__pycache__/dimension_reduction.cpython-310.pyc
CHANGED
|
Binary files a/src/utils/__pycache__/dimension_reduction.cpython-310.pyc and b/src/utils/__pycache__/dimension_reduction.cpython-310.pyc differ
|
|
|
src/utils/dimension_reduction.py
CHANGED
|
@@ -5,7 +5,7 @@ import pandas as pd
|
|
| 5 |
from sklearn.decomposition import PCA
|
| 6 |
from sklearn.preprocessing import MinMaxScaler
|
| 7 |
|
| 8 |
-
from utils.correlation import CorrelationMatrixGenerator
|
| 9 |
|
| 10 |
|
| 11 |
class DimensionReduction:
|
|
|
|
| 5 |
from sklearn.decomposition import PCA
|
| 6 |
from sklearn.preprocessing import MinMaxScaler
|
| 7 |
|
| 8 |
+
from src.utils.correlation import CorrelationMatrixGenerator
|
| 9 |
|
| 10 |
|
| 11 |
class DimensionReduction:
|