arash7920 commited on
Commit
29854ee
·
verified ·
1 Parent(s): 6c66df9

Upload 35 files

Browse files
src/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """OXON Technologies / CANedge Data Lake application and utilities."""
src/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (213 Bytes). View file
 
src/__pycache__/app.cpython-310.pyc ADDED
Binary file (25.8 kB). View file
 
src/examples/batch_example.py CHANGED
@@ -5,11 +5,18 @@ This script demonstrates memory-efficient batch processing across
5
  the entire data lake using SQL queries.
6
  """
7
 
8
- from datalake.config import DataLakeConfig
9
- from datalake.athena import AthenaQuery
10
- from datalake.catalog import DataLakeCatalog
11
- from datalake.query import DataLakeQuery
12
- from datalake.batch import BatchProcessor
 
 
 
 
 
 
 
13
  import pandas as pd
14
 
15
 
 
5
  the entire data lake using SQL queries.
6
  """
7
 
8
+ import sys
9
+ from pathlib import Path
10
+
11
+ _project_root = Path(__file__).resolve().parent.parent.parent
12
+ if str(_project_root) not in sys.path:
13
+ sys.path.insert(0, str(_project_root))
14
+
15
+ from src.datalake.config import DataLakeConfig
16
+ from src.datalake.athena import AthenaQuery
17
+ from src.datalake.catalog import DataLakeCatalog
18
+ from src.datalake.query import DataLakeQuery
19
+ from src.datalake.batch import BatchProcessor
20
  import pandas as pd
21
 
22
 
src/examples/explore_example.py CHANGED
@@ -5,9 +5,16 @@ This script demonstrates how to discover devices, messages, dates,
5
  and schemas in the CANedge Athena data lake.
6
  """
7
 
8
- from datalake.config import DataLakeConfig
9
- from datalake.athena import AthenaQuery
10
- from datalake.catalog import DataLakeCatalog
 
 
 
 
 
 
 
11
 
12
 
13
  def main():
 
5
  and schemas in the CANedge Athena data lake.
6
  """
7
 
8
+ import sys
9
+ from pathlib import Path
10
+
11
+ _project_root = Path(__file__).resolve().parent.parent.parent
12
+ if str(_project_root) not in sys.path:
13
+ sys.path.insert(0, str(_project_root))
14
+
15
+ from src.datalake.config import DataLakeConfig
16
+ from src.datalake.athena import AthenaQuery
17
+ from src.datalake.catalog import DataLakeCatalog
18
 
19
 
20
  def main():
src/examples/query_example.py CHANGED
@@ -5,10 +5,17 @@ This script demonstrates how to read data for specific devices/messages,
5
  perform time series queries, and filter by date ranges using SQL.
6
  """
7
 
8
- from datalake.config import DataLakeConfig
9
- from datalake.athena import AthenaQuery
10
- from datalake.catalog import DataLakeCatalog
11
- from datalake.query import DataLakeQuery
 
 
 
 
 
 
 
12
  import pandas as pd
13
 
14
 
 
5
  perform time series queries, and filter by date ranges using SQL.
6
  """
7
 
8
+ import sys
9
+ from pathlib import Path
10
+
11
+ _project_root = Path(__file__).resolve().parent.parent.parent
12
+ if str(_project_root) not in sys.path:
13
+ sys.path.insert(0, str(_project_root))
14
+
15
+ from src.datalake.config import DataLakeConfig
16
+ from src.datalake.athena import AthenaQuery
17
+ from src.datalake.catalog import DataLakeCatalog
18
+ from src.datalake.query import DataLakeQuery
19
  import pandas as pd
20
 
21
 
src/streamlit_app.py CHANGED
@@ -4,10 +4,16 @@ OXON Technologies - Professional Streamlit Dashboard
4
  A comprehensive dashboard for analyzing device data from AWS Athena data lake.
5
  """
6
 
 
 
 
 
 
 
 
7
  import streamlit as st
8
  from warnings import filterwarnings
9
  import base64
10
- from pathlib import Path
11
  from PIL import Image
12
  import pandas as pd
13
  import numpy as np
@@ -19,15 +25,18 @@ from typing import Dict, Optional, List, Tuple
19
  from ydata_profiling import ProfileReport
20
  import plotly.express as px
21
 
22
- from datalake.config import DataLakeConfig
23
- from datalake.athena import AthenaQuery
24
- from datalake.catalog import DataLakeCatalog
25
- from datalake.query import DataLakeQuery
26
- from datalake.batch import BatchProcessor
 
 
 
 
27
 
28
- from utils.correlation import CorrelationMatrixGenerator
29
- from utils.dimension_reduction import DimensionReduction
30
- from utils.feature_class import DetectFeatureClasses
31
 
32
  # Ignore warnings
33
  filterwarnings("ignore")
@@ -36,20 +45,22 @@ filterwarnings("ignore")
36
  # Configuration Management
37
  # ============================================================================
38
 
39
- def load_config(config_path: str = "config.yaml") -> Dict:
40
  """
41
  Load configuration from YAML file.
42
-
43
  Args:
44
- config_path: Path to the configuration YAML file
45
-
46
  Returns:
47
  Dictionary containing configuration settings
48
-
49
  Raises:
50
  FileNotFoundError: If config file doesn't exist
51
  yaml.YAMLError: If config file is invalid YAML
52
  """
 
 
53
  config_file = Path(config_path)
54
  if not config_file.exists():
55
  raise FileNotFoundError(f"Configuration file not found: {config_path}")
@@ -245,20 +256,21 @@ def display_header(logo_path: str, title: str):
245
  <div style="display: flex; align-items: center;">
246
  <img src="data:image/png;base64,{logo_base64}" alt="Logo"
247
  style="height: 200px; margin-right: 10px;">
248
- <h1 style="display: inline; margin: 0;">{title} 🔍</h1>
249
  </div>
250
  """,
251
  unsafe_allow_html=True,
252
  )
253
  else:
254
- st.title(f"{title} 🔍")
255
 
256
 
257
  def display_sidebar():
258
  """Display sidebar with device selection."""
259
  with st.sidebar:
260
  # Logo
261
- logo_path = st.session_state['app_config'].get('dashboard', {}).get('logo_path', 'images/logo.png')
 
262
  try:
263
  st.image(Image.open(logo_path), width='stretch')
264
  except Exception:
@@ -270,7 +282,7 @@ def display_sidebar():
270
 
271
  # Check if AWS services are initialized
272
  if not st.session_state['aws_initialized']:
273
- st.warning("⚠️ AWS services not initialized. Please check configuration.")
274
  return
275
 
276
  # Load device list if not cached
@@ -320,7 +332,7 @@ def display_sidebar():
320
 
321
  # Show selected device info only after user has confirmed
322
  if st.session_state['selected_device']:
323
- st.success(f" Selected: {st.session_state['selected_device']}")
324
 
325
 
326
  # ============================================================================
@@ -394,7 +406,7 @@ def render_message_viewer_tab():
394
  return
395
 
396
  if not st.session_state['selected_device']:
397
- st.info("👈 Please select a device from the sidebar and click **Go!** to begin.")
398
  return
399
 
400
  device_id = st.session_state['selected_device']
@@ -467,7 +479,7 @@ def render_message_viewer_tab():
467
  st.rerun()
468
 
469
  if st.session_state['selected_message']:
470
- st.info(f"📊 Selected message: `{st.session_state['selected_message']}` ({selected_message_name})")
471
 
472
  # Date range selection (optional filter)
473
  st.divider()
@@ -527,7 +539,7 @@ def render_message_viewer_tab():
527
  if apply_filter_clicked:
528
  # Validate date range before applying
529
  if date_start > date_end:
530
- st.error("⚠️ Start date must be before or equal to end date.")
531
  else:
532
  st.session_state['applied_date_range_start'] = date_start
533
  st.session_state['applied_date_range_end'] = date_end
@@ -536,14 +548,14 @@ def render_message_viewer_tab():
536
  # Show current applied filter status
537
  if st.session_state.get('applied_date_range_start') and st.session_state.get('applied_date_range_end'):
538
  st.success(
539
- f"📅 **Applied filter:** {st.session_state['applied_date_range_start']} to "
540
  f"{st.session_state['applied_date_range_end']}"
541
  )
542
  elif date_start and date_end:
543
  if date_start <= date_end:
544
- st.info("ℹ️ Select dates and click **Apply Filter** to filter the data.")
545
  else:
546
- st.error("⚠️ Start date must be before or equal to end date.")
547
  else:
548
  # Clear applied date range when disabled
549
  if st.session_state.get('date_range_enabled'):
@@ -612,13 +624,13 @@ def render_message_data(device_id: str, message: str):
612
 
613
  if len(df) == 0:
614
  st.warning(
615
- f"⚠️ No data found in the selected date range "
616
  f"({st.session_state['applied_date_range_start']} to {st.session_state['applied_date_range_end']})."
617
  )
618
  st.info("Try selecting a different date range or disable the filter to see all data.")
619
  return
620
  elif len(df) < original_row_count:
621
- st.info(f"📊 Showing {len(df):,} of {original_row_count:,} records (filtered by date range).")
622
 
623
  # Display statistics
624
  # st.subheader("Statistics")
@@ -859,7 +871,7 @@ def render_correlations_tab():
859
  return
860
 
861
  if not st.session_state['selected_device']:
862
- st.info("👈 Please select a device from the sidebar and click **Go!** to begin.")
863
  return
864
 
865
  device_id = st.session_state['selected_device']
@@ -899,7 +911,7 @@ def render_correlations_tab():
899
  st.error("No features available for correlation analysis.")
900
  return
901
 
902
- st.info(f"📊 Analyzing {len(df_features.columns)} features from {len(df)} days of data.")
903
 
904
  # Detect feature classes
905
  st.subheader("1. Feature Classification")
@@ -1000,7 +1012,7 @@ def render_correlations_tab():
1000
  for idx, cluster in enumerate(cluster_list):
1001
  with st.expander(f"Cohort {idx + 1}: {len(cluster)} features (all pairs in {band_label})"):
1002
  for feature in cluster:
1003
- st.write(f" {feature}")
1004
  if len(cluster) > 1:
1005
  st.markdown("**Pairwise correlations (values lie in " + band_label + "):**")
1006
  cluster_corr = corr_matrix.loc[cluster, cluster]
@@ -1038,7 +1050,7 @@ def render_correlations_tab():
1038
 
1039
  def render_placeholder_tab():
1040
  """Render placeholder tab."""
1041
- st.info("🚧 This feature is under development.")
1042
 
1043
 
1044
  # ============================================================================
@@ -1053,8 +1065,8 @@ def main():
1053
  # Load configuration
1054
  if st.session_state['app_config'] is None:
1055
  st.error(
1056
- f" Configuration Error: {st.session_state.get('config_error', 'Unknown error')}\n\n"
1057
- "Please ensure `config.yaml` exists and is properly formatted."
1058
  )
1059
  st.stop()
1060
 
@@ -1062,8 +1074,8 @@ def main():
1062
  if not initialize_aws_if_needed():
1063
  if st.session_state['aws_error']:
1064
  st.error(
1065
- f" AWS Initialization Error: {st.session_state['aws_error']}\n\n"
1066
- "Please check your AWS credentials in `config.yaml`."
1067
  )
1068
  st.stop()
1069
 
@@ -1091,7 +1103,8 @@ def main():
1091
  )
1092
 
1093
  # Display header
1094
- header_logo = dashboard_config.get('header_logo_path', 'images/analysis.png')
 
1095
  header_title = dashboard_config.get('page_title', 'Analytical Dashboard')
1096
  display_header(header_logo, header_title)
1097
 
 
4
  A comprehensive dashboard for analyzing device data from AWS Athena data lake.
5
  """
6
 
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ _project_root = Path(__file__).resolve().parent.parent
11
+ if str(_project_root) not in sys.path:
12
+ sys.path.insert(0, str(_project_root))
13
+
14
  import streamlit as st
15
  from warnings import filterwarnings
16
  import base64
 
17
  from PIL import Image
18
  import pandas as pd
19
  import numpy as np
 
25
  from ydata_profiling import ProfileReport
26
  import plotly.express as px
27
 
28
+ from src.datalake.config import DataLakeConfig
29
+ from src.datalake.athena import AthenaQuery
30
+ from src.datalake.catalog import DataLakeCatalog
31
+ from src.datalake.query import DataLakeQuery
32
+ from src.datalake.batch import BatchProcessor
33
+
34
+ from src.utils.correlation import CorrelationMatrixGenerator
35
+ from src.utils.dimension_reduction import DimensionReduction
36
+ from src.utils.feature_class import DetectFeatureClasses
37
 
38
+ # Base directory for config/images (relative to this file)
39
+ _SRC_DIR = Path(__file__).resolve().parent
 
40
 
41
  # Ignore warnings
42
  filterwarnings("ignore")
 
45
  # Configuration Management
46
  # ============================================================================
47
 
48
+ def load_config(config_path: Optional[str] = None) -> Dict:
49
  """
50
  Load configuration from YAML file.
51
+
52
  Args:
53
+ config_path: Path to the configuration YAML file (default: src/config.yaml)
54
+
55
  Returns:
56
  Dictionary containing configuration settings
57
+
58
  Raises:
59
  FileNotFoundError: If config file doesn't exist
60
  yaml.YAMLError: If config file is invalid YAML
61
  """
62
+ if config_path is None:
63
+ config_path = _SRC_DIR / "config.yaml"
64
  config_file = Path(config_path)
65
  if not config_file.exists():
66
  raise FileNotFoundError(f"Configuration file not found: {config_path}")
 
256
  <div style="display: flex; align-items: center;">
257
  <img src="data:image/png;base64,{logo_base64}" alt="Logo"
258
  style="height: 200px; margin-right: 10px;">
259
+ <h1 style="display: inline; margin: 0;">{title} ??</h1>
260
  </div>
261
  """,
262
  unsafe_allow_html=True,
263
  )
264
  else:
265
+ st.title(f"{title} ??")
266
 
267
 
268
  def display_sidebar():
269
  """Display sidebar with device selection."""
270
  with st.sidebar:
271
  # Logo
272
+ logo_rel = st.session_state['app_config'].get('dashboard', {}).get('logo_path', 'images/logo.png')
273
+ logo_path = _SRC_DIR / logo_rel
274
  try:
275
  st.image(Image.open(logo_path), width='stretch')
276
  except Exception:
 
282
 
283
  # Check if AWS services are initialized
284
  if not st.session_state['aws_initialized']:
285
+ st.warning("?? AWS services not initialized. Please check configuration.")
286
  return
287
 
288
  # Load device list if not cached
 
332
 
333
  # Show selected device info only after user has confirmed
334
  if st.session_state['selected_device']:
335
+ st.success(f"? Selected: {st.session_state['selected_device']}")
336
 
337
 
338
  # ============================================================================
 
406
  return
407
 
408
  if not st.session_state['selected_device']:
409
+ st.info("?? Please select a device from the sidebar and click **Go!** to begin.")
410
  return
411
 
412
  device_id = st.session_state['selected_device']
 
479
  st.rerun()
480
 
481
  if st.session_state['selected_message']:
482
+ st.info(f"?? Selected message: `{st.session_state['selected_message']}` ({selected_message_name})")
483
 
484
  # Date range selection (optional filter)
485
  st.divider()
 
539
  if apply_filter_clicked:
540
  # Validate date range before applying
541
  if date_start > date_end:
542
+ st.error("?? Start date must be before or equal to end date.")
543
  else:
544
  st.session_state['applied_date_range_start'] = date_start
545
  st.session_state['applied_date_range_end'] = date_end
 
548
  # Show current applied filter status
549
  if st.session_state.get('applied_date_range_start') and st.session_state.get('applied_date_range_end'):
550
  st.success(
551
+ f"?? **Applied filter:** {st.session_state['applied_date_range_start']} to "
552
  f"{st.session_state['applied_date_range_end']}"
553
  )
554
  elif date_start and date_end:
555
  if date_start <= date_end:
556
+ st.info("?? Select dates and click **Apply Filter** to filter the data.")
557
  else:
558
+ st.error("?? Start date must be before or equal to end date.")
559
  else:
560
  # Clear applied date range when disabled
561
  if st.session_state.get('date_range_enabled'):
 
624
 
625
  if len(df) == 0:
626
  st.warning(
627
+ f"?? No data found in the selected date range "
628
  f"({st.session_state['applied_date_range_start']} to {st.session_state['applied_date_range_end']})."
629
  )
630
  st.info("Try selecting a different date range or disable the filter to see all data.")
631
  return
632
  elif len(df) < original_row_count:
633
+ st.info(f"?? Showing {len(df):,} of {original_row_count:,} records (filtered by date range).")
634
 
635
  # Display statistics
636
  # st.subheader("Statistics")
 
871
  return
872
 
873
  if not st.session_state['selected_device']:
874
+ st.info("?? Please select a device from the sidebar and click **Go!** to begin.")
875
  return
876
 
877
  device_id = st.session_state['selected_device']
 
911
  st.error("No features available for correlation analysis.")
912
  return
913
 
914
+ st.info(f"?? Analyzing {len(df_features.columns)} features from {len(df)} days of data.")
915
 
916
  # Detect feature classes
917
  st.subheader("1. Feature Classification")
 
1012
  for idx, cluster in enumerate(cluster_list):
1013
  with st.expander(f"Cohort {idx + 1}: {len(cluster)} features (all pairs in {band_label})"):
1014
  for feature in cluster:
1015
+ st.write(f" {feature}")
1016
  if len(cluster) > 1:
1017
  st.markdown("**Pairwise correlations (values lie in " + band_label + "):**")
1018
  cluster_corr = corr_matrix.loc[cluster, cluster]
 
1050
 
1051
  def render_placeholder_tab():
1052
  """Render placeholder tab."""
1053
+ st.info("?? This feature is under development.")
1054
 
1055
 
1056
  # ============================================================================
 
1065
  # Load configuration
1066
  if st.session_state['app_config'] is None:
1067
  st.error(
1068
+ f"? Configuration Error: {st.session_state.get('config_error', 'Unknown error')}\n\n"
1069
+ "Please ensure `src/config.yaml` exists and is properly formatted."
1070
  )
1071
  st.stop()
1072
 
 
1074
  if not initialize_aws_if_needed():
1075
  if st.session_state['aws_error']:
1076
  st.error(
1077
+ f"? AWS Initialization Error: {st.session_state['aws_error']}\n\n"
1078
+ "Please check your AWS credentials in `src/config.yaml`."
1079
  )
1080
  st.stop()
1081
 
 
1103
  )
1104
 
1105
  # Display header
1106
+ header_logo_rel = dashboard_config.get('header_logo_path', 'images/analysis.png')
1107
+ header_logo = str(_SRC_DIR / header_logo_rel)
1108
  header_title = dashboard_config.get('page_title', 'Analytical Dashboard')
1109
  display_header(header_logo, header_title)
1110
 
src/test_connection.py CHANGED
@@ -2,9 +2,16 @@
2
  Quick test script to verify Athena connection and basic functionality.
3
  """
4
 
5
- from datalake.config import DataLakeConfig
6
- from datalake.athena import AthenaQuery
7
- from datalake.catalog import DataLakeCatalog
 
 
 
 
 
 
 
8
 
9
  def main():
10
  """Test basic connection and functionality."""
 
2
  Quick test script to verify Athena connection and basic functionality.
3
  """
4
 
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ _project_root = Path(__file__).resolve().parent.parent
9
+ if str(_project_root) not in sys.path:
10
+ sys.path.insert(0, str(_project_root))
11
+
12
+ from src.datalake.config import DataLakeConfig
13
+ from src.datalake.athena import AthenaQuery
14
+ from src.datalake.catalog import DataLakeCatalog
15
 
16
  def main():
17
  """Test basic connection and functionality."""
src/utils/__pycache__/dimension_reduction.cpython-310.pyc CHANGED
Binary files a/src/utils/__pycache__/dimension_reduction.cpython-310.pyc and b/src/utils/__pycache__/dimension_reduction.cpython-310.pyc differ
 
src/utils/dimension_reduction.py CHANGED
@@ -5,7 +5,7 @@ import pandas as pd
5
  from sklearn.decomposition import PCA
6
  from sklearn.preprocessing import MinMaxScaler
7
 
8
- from utils.correlation import CorrelationMatrixGenerator
9
 
10
 
11
  class DimensionReduction:
 
5
  from sklearn.decomposition import PCA
6
  from sklearn.preprocessing import MinMaxScaler
7
 
8
+ from src.utils.correlation import CorrelationMatrixGenerator
9
 
10
 
11
  class DimensionReduction: