LiamKhoaLe commited on
Commit
bc3c386
·
1 Parent(s): 5503637

Upd fuel efficiency model fusing

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
.dockerignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ diagram
2
+ OBD
3
+ *.md
4
+ data.json
5
+ organize.py
6
+ bulk_mongo_upload.py
Dockerfile CHANGED
@@ -28,11 +28,14 @@ RUN mkdir -p $HOME/app/logs \
28
  $HOME/app/cache \
29
  $HOME/app/cache/obd_data \
30
  $HOME/app/cache/obd_data/plots \
31
- $HOME/app/models/ul
 
32
 
33
- # ── Environment variables for HuggingFace model ──
34
  ENV MODEL_DIR=$HOME/app/models/ul
35
  ENV HF_MODEL_REPO=BinKhoaLe1812/Driver_Behavior_OBD
 
 
36
 
37
  # ── Models will be downloaded at runtime when app starts ──
38
 
 
28
  $HOME/app/cache \
29
  $HOME/app/cache/obd_data \
30
  $HOME/app/cache/obd_data/plots \
31
+ $HOME/app/models/ul \
32
+ $HOME/app/models/efficiency
33
 
34
+ # ── Environment variables for HuggingFace models ──
35
  ENV MODEL_DIR=$HOME/app/models/ul
36
  ENV HF_MODEL_REPO=BinKhoaLe1812/Driver_Behavior_OBD
37
+ ENV EFFICIENCY_MODEL_DIR=$HOME/app/models/efficiency
38
+ ENV HF_EFFICIENCY_MODEL_REPO=BinKhoaLe1812/Fuel_Efficiency_OBD
39
 
40
  # ── Models will be downloaded at runtime when app starts ──
41
 
OBD/DrivingAggressivenessScorer.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Dict, List, Tuple
6
+ from datetime import datetime
7
+
8
+
9
+ class DrivingAggressivenessScorer:
10
+ def __init__(self, bounds_file: str = 'obd_bounds.json', weights: Dict = None):
11
+ self.bounds_file = Path(bounds_file)
12
+ self.weights = weights if weights else self.weights.copy()
13
+ self.bounds = self._load_bounds()
14
+
15
+ weight_sum = sum(self.weights.values())
16
+ if not np.isclose(weight_sum, 1.0):
17
+ print(f"Warning: Weights sum to {weight_sum:.3f}, normalizing to 1.0")
18
+ self.weights = {k: v/weight_sum for k, v in self.weights.items()}
19
+
20
+ def _load_bounds(self) -> Dict:
21
+ if self.bounds_file.exists():
22
+ with open(self.bounds_file, 'r') as f:
23
+ return json.load(f)
24
+
25
+ def _save_bounds(self):
26
+ with open(self.bounds_file, 'w') as f:
27
+ json.dump(self.bounds, f, indent=2)
28
+ print(f"✓ Bounds updated and saved to {self.bounds_file}")
29
+
30
+ def update_bounds(self, df: pd.DataFrame):
31
+ updated = False
32
+ for param in self.weights.keys():
33
+ if param in df.columns:
34
+ data_min = df[param].min()
35
+ data_max = df[param].max()
36
+
37
+ # Update bounds if new extremes found
38
+ if data_min < self.bounds[param]['min']:
39
+ self.bounds[param]['min'] = data_min
40
+ updated = True
41
+ print(f" New MIN for {param}: {data_min:.2f}")
42
+
43
+ if data_max > self.bounds[param]['max']:
44
+ self.bounds[param]['max'] = data_max
45
+ updated = True
46
+ print(f" New MAX for {param}: {data_max:.2f}")
47
+
48
+ if updated:
49
+ self._save_bounds()
50
+ return updated
51
+
52
+ def normalize_value(self, value: float, param: str) -> float:
53
+
54
+ min_val = self.bounds[param]['min']
55
+ max_val = self.bounds[param]['max']
56
+
57
+ if max_val == min_val:
58
+ return 0.0
59
+
60
+ normalized = (value - min_val) / (max_val - min_val)
61
+ return np.clip(normalized, 0.0, 1.0)
62
+
63
+ def calculate_row_score(self, row: pd.Series) -> float:
64
+
65
+ weighted_score = 0.0
66
+
67
+ for param, weight in self.weights.items():
68
+ if param in row and pd.notna(row[param]):
69
+ normalized = self.normalize_value(row[param], param)
70
+ weighted_score += normalized * weight
71
+
72
+ # Convert to 0-100 scale
73
+ return weighted_score * 100
74
+
75
+ def calculate_drive_scores(self, df: pd.DataFrame) -> pd.DataFrame:
76
+
77
+ df = df.copy()
78
+ df['aggressiveness_score'] = df.apply(self.calculate_row_score, axis=1)
79
+ return df
80
+
81
+ def calculate_aggregate_score(self, scores: np.ndarray) -> Dict:
82
+
83
+ mean_score = np.mean(scores)
84
+ median_score = np.median(scores)
85
+ std_score = np.std(scores)
86
+
87
+ # Percentile analysis for spike detection
88
+ p50 = np.percentile(scores, 50)
89
+ p75 = np.percentile(scores, 75)
90
+ p90 = np.percentile(scores, 90)
91
+ p95 = np.percentile(scores, 95)
92
+ p99 = np.percentile(scores, 99)
93
+ max_score = np.max(scores)
94
+
95
+ # Detect aggressive spikes (scores > 70)
96
+ spike_threshold = 70
97
+ spike_count = np.sum(scores >= spike_threshold)
98
+ spike_percentage = (spike_count / len(scores)) * 100
99
+
100
+ # Detect extreme spikes (scores > 85)
101
+ extreme_threshold = 85
102
+ extreme_count = np.sum(scores >= extreme_threshold)
103
+ extreme_percentage = (extreme_count / len(scores)) * 100
104
+
105
+ # Penalty increases exponentially with spike frequency and intensity
106
+ spike_penalty = 0.0
107
+
108
+ if p95 > 70:
109
+ spike_penalty += (p95 - 70) * 0.3
110
+ if p99 > 80:
111
+ spike_penalty += (p99 - 80) * 0.5
112
+
113
+ # Penalty for frequency of spikes
114
+ if spike_percentage > 5:
115
+ spike_penalty += (spike_percentage - 5) * 2.0
116
+ if extreme_percentage > 2:
117
+ spike_penalty += (extreme_percentage - 2) * 3.0
118
+
119
+ # Calculate final aggregate score
120
+ base_score = (mean_score * 0.7) + (p75 * 0.3)
121
+
122
+ # Apply spike penalty
123
+ final_score = np.clip(base_score + spike_penalty, 0, 100)
124
+
125
+ return {
126
+ 'final_score': round(final_score, 2),
127
+ 'mean_score': round(mean_score, 2),
128
+ 'median_score': round(median_score, 2),
129
+ 'std_score': round(std_score, 2),
130
+ 'p75_score': round(p75, 2),
131
+ 'p90_score': round(p90, 2),
132
+ 'p95_score': round(p95, 2),
133
+ 'p99_score': round(p99, 2),
134
+ 'max_score': round(max_score, 2),
135
+ 'spike_percentage': round(spike_percentage, 2),
136
+ 'extreme_percentage': round(extreme_percentage, 2),
137
+ 'spike_penalty': round(spike_penalty, 2)
138
+ }
139
+
140
+ def analyze_drive(self, csv_path: str, update_bounds: bool = True) -> Tuple[pd.DataFrame, Dict]:
141
+
142
+ print(f"\n{'='*60}")
143
+ print(f"ANALYZING DRIVE: {csv_path}")
144
+ print(f"{'='*60}")
145
+
146
+ # Load data
147
+ df = pd.read_csv(csv_path)
148
+ print(f"✓ Loaded {len(df)} data points")
149
+
150
+ # Update bounds if requested
151
+ if update_bounds:
152
+ print("\nUpdating bounds...")
153
+ self.update_bounds(df)
154
+
155
+ # Calculate scores
156
+ print("\nCalculating aggressiveness scores...")
157
+ df_scored = self.calculate_drive_scores(df)
158
+
159
+ # Calculate aggregate
160
+ aggregate = self.calculate_aggregate_score(df_scored['aggressiveness_score'].values)
161
+
162
+ return df_scored, aggregate
163
+
164
+ def get_current_bounds(self) -> Dict:
165
+ return self.bounds
166
+
167
+ def print_bounds(self):
168
+ print("\nCurrent Parameter Bounds:")
169
+ print("-" * 50)
170
+ for param in self.weights.keys():
171
+ min_val = self.bounds[param]['min']
172
+ max_val = self.bounds[param]['max']
173
+ print(f"{param:20s}: {min_val:8.2f} to {max_val:8.2f}")
174
+
175
+
176
+ if __name__ == "__main__":
177
+ scorer = DrivingAggressivenessScorer()
178
+
179
+ # Analyze a drive
180
+ df_scored, results = scorer.analyze_drive('obd_data_log_20251012_121810.csv')
181
+
182
+ # Save scored data
183
+ output_path = 'obd_data_scored.csv'
184
+ df_scored.to_csv(output_path, index=False)
185
+ print(f"✓ Scored data saved to {output_path}")
186
+
187
+ # Display current bounds
188
+ scorer.print_bounds()
OBD/configScorer.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ from driving_aggressiveness_scorer import DrivingAggressivenessScorer
3
+ from driving_analyzer import visualize_drive, compare_drives
4
+
5
+
6
+ def load_config(config_path: str = 'config.yaml') -> dict:
7
+ """Load configuration from YAML file."""
8
+ try:
9
+ with open(config_path, 'r') as f:
10
+ return yaml.safe_load(f)
11
+ except FileNotFoundError:
12
+ print(f"Config file not found: {config_path}")
13
+ print("Using default configuration.")
14
+ return None
15
+
16
+
17
+ def create_scorer_from_config(config_path: str = 'config.yaml') -> DrivingAggressivenessScorer:
18
+ """Create scorer instance from configuration file."""
19
+ config = load_config(config_path)
20
+
21
+ if config:
22
+ weights = config.get('weights', None)
23
+ bounds_file = config.get('bounds', {}).get('file', 'obd_bounds.json')
24
+ scorer = DrivingAggressivenessScorer(bounds_file=bounds_file, weights=weights)
25
+ print(f"✓ Scorer initialized with config from {config_path}")
26
+ else:
27
+ scorer = DrivingAggressivenessScorer()
28
+ print("✓ Scorer initialized with default settings")
29
+
30
+ return scorer
31
+
32
+
33
+ # Quick start examples
34
+ if __name__ == "__main__":
35
+
36
+ # METHOD 1: Use with config file (recommended)
37
+ print("\n" + "="*60)
38
+ print("METHOD 1: Config-based scoring")
39
+ print("="*60)
40
+ scorer = create_scorer_from_config('config.yaml')
41
+ df_scored, results = scorer.analyze_drive('obd_data_log_20251012_121810.csv')
42
+ visualize_drive(df_scored, results, save_path='drive_analysis_config.png')
43
+
44
+
45
+ # METHOD 2: Use with custom weights (no config file)
46
+ print("\n" + "="*60)
47
+ print("METHOD 2: Custom weights")
48
+ print("="*60)
49
+ custom_weights = {
50
+ 'RPM': 0.20,
51
+ 'THROTTLE_POS': 0.35, # More emphasis on throttle
52
+ 'ENGINE_LOAD': 0.25,
53
+ 'MAF': 0.10,
54
+ 'SPEED': 0.05,
55
+ 'INTAKE_PRESSURE': 0.05
56
+ }
57
+ scorer_custom = DrivingAggressivenessScorer(weights=custom_weights)
58
+ df_scored2, results2 = scorer_custom.analyze_drive('obd_data_log_20251012_121810.csv')
59
+
60
+
61
+ # METHOD 3: Analyze without updating bounds (testing)
62
+ print("\n" + "="*60)
63
+ print("METHOD 3: Analysis without updating bounds")
64
+ print("="*60)
65
+ scorer_test = DrivingAggressivenessScorer()
66
+ df_test, results_test = scorer_test.analyze_drive(
67
+ 'obd_data_log_20251012_121810.csv',
68
+ update_bounds=False # Don't update global bounds
69
+ )
70
+
71
+
72
+ # METHOD 4: Quick comparison script
73
+ print("\n" + "="*60)
74
+ print("METHOD 4: Compare multiple drives")
75
+ print("="*60)
76
+ """
77
+ # Uncomment when you have multiple CSV files:
78
+ comparison = compare_drives(scorer, [
79
+ 'obd_data_log_20251012_121810.csv',
80
+ 'obd_data_log_20251013_101234.csv',
81
+ 'obd_data_log_20251014_155030.csv'
82
+ ])
83
+ """
84
+
85
+
86
+ print("\n" + "="*60)
87
+ print("SETUP COMPLETE!")
88
+ print("="*60)
89
+ print("\nYour system is ready to:")
90
+ print(" 1. Analyze individual drives")
91
+ print(" 2. Compare multiple drives")
92
+ print(" 3. Batch process folders")
93
+ print(" 4. Dynamically update bounds")
94
+ print(" 5. Generate visualizations")
95
+ print("\nBounds file: obd_bounds.json")
96
+ print("Config file: config.yaml")
97
+ print("="*60 + "\n")
OBD/obd_analyzer.py CHANGED
@@ -24,18 +24,22 @@ KPH_TO_MPS = 1 / 3.6
24
  G_ACCELERATION = 9.80665
25
  MIN_MOVING_SPEED_KPH = 2 # have to be moving
26
 
27
- AGGRESSIVE_RPM_ENTRY_THRESHOLD = 2700
 
28
  AGGRESSIVE_THROTTLE_ENTRY_THRESHOLD = 40
29
- AGGRESSIVE_RPM_HOLD_THRESHOLD = 2300
30
- HARSH_BRAKING_THRESHOLD_G = -0.25
31
 
32
- # roc
33
- AGGRESSIVE_RPM_ROC_THRESHOLD = 500
34
- AGGRESSIVE_THROTTLE_ROC_THRESHOLD = 45
35
- POSITIVE_ACCEL_FOR_ROC_CHECK_G = 0.1
36
 
37
- MODERATE_RPM_THRESHOLD = 2100
38
- MODERATE_THROTTLE_THRESHOLD = 25
 
 
 
39
 
40
  MIN_DATA_POINTS_FOR_ROC = 2
41
 
@@ -67,15 +71,26 @@ def load_and_preprocess_data(csv_filepath):
67
  # Handle empty DataFrame after potential filtering or if it was empty to begin with
68
  return df # Or handle error appropriately
69
 
70
- numeric_cols = ['SPEED', 'RPM', 'THROTTLE_POS']
71
- for col in numeric_cols:
72
- if col in df.columns:
73
- df[col] = pd.to_numeric(df[col], errors='coerce')
74
- else:
75
- print(f"Warning: Column {col} not found. It will be filled with NaN.")
 
 
 
 
 
 
76
  df[col] = np.nan
77
-
78
- df[numeric_cols] = df[numeric_cols].fillna(method='ffill').fillna(0)
 
 
 
 
 
79
 
80
  if 'SPEED' in df.columns:
81
  df['SPEED_mps'] = df['SPEED'] * KPH_TO_MPS
@@ -115,8 +130,8 @@ def load_and_preprocess_data(csv_filepath):
115
  return df
116
 
117
  def classify_driving_style_stateful(df):
118
- if df.empty or not all(col in df.columns for col in ['RPM', 'THROTTLE_POS', 'SPEED', 'acceleration_g']):
119
- print("Warning: Missing one or more required columns for stateful classification (RPM, THROTTLE_POS, SPEED, acceleration_g).")
120
  return pd.Series([DRIVING_STYLE_UNKNOWN] * len(df), index=df.index, dtype=str)
121
 
122
  driving_styles = [DRIVING_STYLE_UNKNOWN] * len(df)
@@ -130,45 +145,63 @@ def classify_driving_style_stateful(df):
130
  rpm_roc = df.loc[i, 'RPM_roc']
131
  throttle_roc = df.loc[i, 'THROTTLE_roc']
132
 
133
- row_style = DRIVING_STYLE_PASSIVE
134
  is_moving = speed_kph > MIN_MOVING_SPEED_KPH
135
 
136
- is_hard_braking_trigger = accel_g < HARSH_BRAKING_THRESHOLD_G and is_moving
137
-
138
- is_high_abs_rpm_throttle_trigger = (rpm > AGGRESSIVE_RPM_ENTRY_THRESHOLD and
139
- throttle > AGGRESSIVE_THROTTLE_ENTRY_THRESHOLD and
140
- is_moving)
141
-
142
- is_actively_accelerating = accel_g > POSITIVE_ACCEL_FOR_ROC_CHECK_G
143
-
144
- is_high_roc_trigger = (is_moving and
145
- is_actively_accelerating and
146
- (rpm_roc > AGGRESSIVE_RPM_ROC_THRESHOLD or
147
- throttle_roc > AGGRESSIVE_THROTTLE_ROC_THRESHOLD))
148
 
149
- is_currently_aggressive_event = is_hard_braking_trigger or is_high_abs_rpm_throttle_trigger or is_high_roc_trigger
 
 
 
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  if current_style == DRIVING_STYLE_AGGRESSIVE:
152
- if is_currently_aggressive_event:
153
  row_style = DRIVING_STYLE_AGGRESSIVE
154
- elif rpm > AGGRESSIVE_RPM_HOLD_THRESHOLD and is_moving:
155
  row_style = DRIVING_STYLE_AGGRESSIVE
156
- else:
157
  if (rpm > MODERATE_RPM_THRESHOLD or throttle > MODERATE_THROTTLE_THRESHOLD) and is_moving:
158
  row_style = DRIVING_STYLE_MODERATE
159
  else:
160
  row_style = DRIVING_STYLE_PASSIVE
161
- else:
162
  if is_currently_aggressive_event:
163
- row_style = DRIVING_STYLE_AGGRESSIVE
164
- else:
165
  if (rpm > MODERATE_RPM_THRESHOLD or throttle > MODERATE_THROTTLE_THRESHOLD) and is_moving:
166
  row_style = DRIVING_STYLE_MODERATE
167
  else:
168
  row_style = DRIVING_STYLE_PASSIVE
169
 
170
  driving_styles[i] = row_style
171
- current_style = row_style
172
 
173
  print("Stateful driving style classification complete.")
174
  return pd.Series(driving_styles, index=df.index)
@@ -206,7 +239,7 @@ def main():
206
  print(f"Error saving output CSV to {args.output_csv}: {e}")
207
  else:
208
  print("\n--- First 20 Rows of Analyzed Data (showing key fields) ---")
209
- display_cols = ['timestamp', 'SPEED', 'RPM', 'THROTTLE_POS', 'acceleration_g', 'driving_style_analyzed']
210
  display_cols = [col for col in display_cols if col in df.columns]
211
  if display_cols: print(df[display_cols].head(20))
212
  else: print("Key display columns not found in DataFrame.")
 
24
  G_ACCELERATION = 9.80665
25
  MIN_MOVING_SPEED_KPH = 2 # have to be moving
26
 
27
+ VERY_HIGH_RPM_AGGRESSIVE_THRESHOLD = 3500
28
+ AGGRESSIVE_RPM_ENTRY_THRESHOLD = 2900
29
  AGGRESSIVE_THROTTLE_ENTRY_THRESHOLD = 40
30
+ AGGRESSIVE_RPM_HOLD_THRESHOLD = 2400
31
+ HARSH_BRAKING_THRESHOLD_G = -0.25
32
 
33
+ HIGH_RPM_FOR_ROC_AGGRESSIVE_THRESHOLD = 2300
34
+ AGGRESSIVE_RPM_ROC_THRESHOLD = 500
35
+ AGGRESSIVE_THROTTLE_ROC_THRESHOLD = 45
36
+ POSITIVE_ACCEL_FOR_ROC_CHECK_G = 0.1
37
 
38
+ MIN_SPEED_FOR_HOLDING_GEAR_CHECK_KPH = 15
39
+ LOW_G_FOR_HOLDING_GEAR = 0.1
40
+
41
+ MODERATE_RPM_THRESHOLD = 2100
42
+ MODERATE_THROTTLE_THRESHOLD = 25
43
 
44
  MIN_DATA_POINTS_FOR_ROC = 2
45
 
 
71
  # Handle empty DataFrame after potential filtering or if it was empty to begin with
72
  return df # Or handle error appropriately
73
 
74
+ # Define all possible numeric columns from current fuel efficiency logging
75
+ all_numeric_cols = ['SPEED', 'RPM', 'THROTTLE_POS', 'MAF', 'ENGINE_LOAD', 'INTAKE_PRESSURE',
76
+ 'SHORT_FUEL_TRIM_1', 'SHORT_FUEL_TRIM_2', 'LONG_FUEL_TRIM_1', 'LONG_FUEL_TRIM_2']
77
+
78
+ # Only process columns that exist in the dataframe
79
+ numeric_cols = [col for col in all_numeric_cols if col in df.columns]
80
+ required_cols = ['SPEED', 'RPM', 'THROTTLE_POS'] # Essential for driving style analysis
81
+
82
+ # Ensure required columns exist
83
+ for col in required_cols:
84
+ if col not in df.columns:
85
+ print(f"Warning: Required column {col} not found. It will be filled with NaN.")
86
  df[col] = np.nan
87
+
88
+ # Convert all numeric columns to numeric type
89
+ for col in numeric_cols:
90
+ df[col] = pd.to_numeric(df[col], errors='coerce')
91
+
92
+ # Fill missing values for all numeric columns
93
+ df[numeric_cols] = df[numeric_cols].ffill().fillna(0)
94
 
95
  if 'SPEED' in df.columns:
96
  df['SPEED_mps'] = df['SPEED'] * KPH_TO_MPS
 
130
  return df
131
 
132
  def classify_driving_style_stateful(df):
133
+ if df.empty or not all(col in df.columns for col in ['RPM', 'THROTTLE_POS', 'SPEED', 'acceleration_g', 'RPM_roc', 'THROTTLE_roc']):
134
+ print("Warning: Missing required columns for stateful classification.")
135
  return pd.Series([DRIVING_STYLE_UNKNOWN] * len(df), index=df.index, dtype=str)
136
 
137
  driving_styles = [DRIVING_STYLE_UNKNOWN] * len(df)
 
145
  rpm_roc = df.loc[i, 'RPM_roc']
146
  throttle_roc = df.loc[i, 'THROTTLE_roc']
147
 
148
+ row_style = DRIVING_STYLE_PASSIVE # Default for this row
149
  is_moving = speed_kph > MIN_MOVING_SPEED_KPH
150
 
151
+ # --- Define Aggressive Triggers for this specific row ---
152
+ # 1. Absolute very high RPM
153
+ trigger_very_high_rpm = (rpm > VERY_HIGH_RPM_AGGRESSIVE_THRESHOLD and is_moving)
 
 
 
 
 
 
 
 
 
154
 
155
+ # 2. High RPM + High Throttle (user's primary combo)
156
+ trigger_high_rpm_throttle = (rpm > AGGRESSIVE_RPM_ENTRY_THRESHOLD and
157
+ throttle > AGGRESSIVE_THROTTLE_ENTRY_THRESHOLD and
158
+ is_moving)
159
 
160
+ # 3. RoC-based (RPM or Throttle) during active acceleration, with RPM already elevated
161
+ is_actively_accelerating = accel_g > POSITIVE_ACCEL_FOR_ROC_CHECK_G
162
+ trigger_high_roc = (is_moving and is_actively_accelerating and
163
+ rpm > HIGH_RPM_FOR_ROC_AGGRESSIVE_THRESHOLD and
164
+ (rpm_roc > AGGRESSIVE_RPM_ROC_THRESHOLD or
165
+ throttle_roc > AGGRESSIVE_THROTTLE_ROC_THRESHOLD))
166
+
167
+ # 4. Holding gear aggressively (high RPM, moving, but low change in speed)
168
+ trigger_holding_gear = (rpm > AGGRESSIVE_RPM_HOLD_THRESHOLD and # Using hold RPM as base for this check
169
+ is_moving and
170
+ speed_kph > MIN_SPEED_FOR_HOLDING_GEAR_CHECK_KPH and
171
+ abs(accel_g) < LOW_G_FOR_HOLDING_GEAR)
172
+
173
+ # 5. Hard braking
174
+ trigger_hard_braking = (accel_g < HARSH_BRAKING_THRESHOLD_G and is_moving)
175
+
176
+ # Combine all triggers for the current row
177
+ is_currently_aggressive_event = (trigger_very_high_rpm or
178
+ trigger_high_rpm_throttle or
179
+ trigger_high_roc or
180
+ trigger_holding_gear or
181
+ trigger_hard_braking)
182
+
183
+ # --- Stateful Logic ---
184
  if current_style == DRIVING_STYLE_AGGRESSIVE:
185
+ if is_currently_aggressive_event: # Re-triggered by a new event this row
186
  row_style = DRIVING_STYLE_AGGRESSIVE
187
+ elif rpm > AGGRESSIVE_RPM_HOLD_THRESHOLD and is_moving: # Maintain based on RPM hold
188
  row_style = DRIVING_STYLE_AGGRESSIVE
189
+ else: # Conditions to stay aggressive not met, transition out
190
  if (rpm > MODERATE_RPM_THRESHOLD or throttle > MODERATE_THROTTLE_THRESHOLD) and is_moving:
191
  row_style = DRIVING_STYLE_MODERATE
192
  else:
193
  row_style = DRIVING_STYLE_PASSIVE
194
+ else: # current_style is Passive or Moderate
195
  if is_currently_aggressive_event:
196
+ row_style = DRIVING_STYLE_AGGRESSIVE # Enter aggressive state
197
+ else: # Not an aggressive event, classify as Moderate or Passive
198
  if (rpm > MODERATE_RPM_THRESHOLD or throttle > MODERATE_THROTTLE_THRESHOLD) and is_moving:
199
  row_style = DRIVING_STYLE_MODERATE
200
  else:
201
  row_style = DRIVING_STYLE_PASSIVE
202
 
203
  driving_styles[i] = row_style
204
+ current_style = row_style # Update the overall state for the next iteration
205
 
206
  print("Stateful driving style classification complete.")
207
  return pd.Series(driving_styles, index=df.index)
 
239
  print(f"Error saving output CSV to {args.output_csv}: {e}")
240
  else:
241
  print("\n--- First 20 Rows of Analyzed Data (showing key fields) ---")
242
+ display_cols = ['timestamp', 'SPEED', 'RPM', 'THROTTLE_POS', 'acceleration_g', 'RPM_roc', 'THROTTLE_roc', 'driving_style_analyzed']
243
  display_cols = [col for col in display_cols if col in df.columns]
244
  if display_cols: print(df[display_cols].head(20))
245
  else: print("Key display columns not found in DataFrame.")
OBD/obd_logger.py CHANGED
@@ -3,80 +3,54 @@ import time
3
  import datetime
4
  import csv
5
  import os
6
- from collections import deque
7
- import numpy as np
8
  import shutil
9
  import subprocess
10
-
11
- DRIVING_STYLE_PASSIVE = "Passive"
12
- DRIVING_STYLE_MODERATE = "Moderate"
13
- DRIVING_STYLE_AGGRESSIVE = "Aggressive"
14
- DRIVING_STYLE_UNKNOWN = "UNKNOWN_STYLE"
15
-
16
- ROAD_TYPE_LOCAL = "Local"
17
- ROAD_TYPE_MAIN = "Main"
18
- ROAD_TYPE_HIGHWAY = "Highway"
19
- ROAD_TYPE_UNKNOWN = "UNKNOWN_ROAD"
20
-
21
- TRAFFIC_CONDITION_LIGHT = "Light"
22
- TRAFFIC_CONDITION_MODERATE = "Moderate"
23
- TRAFFIC_CONDITION_HEAVY = "Heavy"
24
- TRAFFIC_CONDITION_UNKNOWN = "UNKNOWN_TRAFFIC"
25
-
26
- # Rolling Average Configuration
27
- ROLLING_WINDOW_SIZE = 20 # 6 seconds
28
- MIN_SAMPLES_FOR_CLASSIFICATION = 10
29
-
30
- # ROC needs tuning
31
- SHORT_ROC_WINDOW_SIZE = 3
32
- MIN_SAMPLES_FOR_ROC_CHECK = SHORT_ROC_WINDOW_SIZE
33
- ROC_THROTTLE_AGGRESSIVE_THRESHOLD = 25.0
34
- ROC_RPM_AGGRESSIVE_THRESHOLD = 700.0
35
- ROC_SPEED_AGGRESSIVE_THRESHOLD = 8.0
36
- MIN_RPM_FOR_AGGRESSIVE_TRIGGER = 1000.0
37
- AGGRESSIVE_EVENT_COOLDOWN_SAMPLES = 15
38
-
39
- HIGH_FREQUENCY_PIDS = [
40
- obd.commands.RPM,
41
- obd.commands.THROTTLE_POS,
42
- obd.commands.SPEED,
43
  ]
44
 
45
- LOW_FREQUENCY_PIDS_POOL = [
46
- obd.commands.FUEL_PRESSURE,
47
- obd.commands.ENGINE_LOAD,
48
- obd.commands.COOLANT_TEMP,
49
- obd.commands.INTAKE_TEMP,
50
- obd.commands.TIMING_ADVANCE,
51
- obd.commands.MAF,
52
- obd.commands.INTAKE_PRESSURE,
53
- obd.commands.SHORT_FUEL_TRIM_1,
54
- obd.commands.LONG_FUEL_TRIM_1,
55
- obd.commands.SHORT_FUEL_TRIM_2,
56
  obd.commands.LONG_FUEL_TRIM_2,
57
- obd.commands.COMMANDED_EQUIV_RATIO,
58
- obd.commands.O2_B1S2,
59
- obd.commands.O2_B2S2,
60
- obd.commands.O2_S1_WR_VOLTAGE,
61
- obd.commands.COMMANDED_EGR,
62
  ]
63
 
 
 
 
64
  ALL_PIDS_TO_LOG = HIGH_FREQUENCY_PIDS + LOW_FREQUENCY_PIDS_POOL
65
 
66
  CSV_FILENAME_BASE = "obd_data_log"
67
- # Define new structured log directories relative to the OBD_Logger/OBD directory
68
- LOGS_BASE_DIR = os.path.join(os.path.dirname(__file__), "..", "..", "logs") # Corrected: Up two levels to Base, then into logs
69
- ORIGINAL_CSV_DIR = os.path.join(LOGS_BASE_DIR, "OriginalCSV")
70
- DUPLICATE_CSV_DIR = os.path.join(LOGS_BASE_DIR, "DuplicateCSV")
71
 
72
- WIFI_ADAPTER_HOST = "192.168.0.10"
73
- WIFI_ADAPTER_PORT = 35000
74
-
75
- WIFI_PROTOCOL = "6"
76
- USE_WIFI_SETTINGS = False # using socat to mimic serial connection
77
 
78
  def get_pid_value(connection, pid_command):
79
- """Queries a PID and returns its value, or None if not available or error."""
80
  try:
81
  response = connection.query(pid_command, force=True)
82
  if response.is_null() or response.value is None:
@@ -87,47 +61,98 @@ def get_pid_value(connection, pid_command):
87
  except Exception as e:
88
  print(f"Error querying {pid_command.name}: {e}")
89
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
- def perform_logging_session():
92
- connection = None
93
- print("Starting OBD-II Data Logger...")
94
- print("Classifications (Style, Road, Traffic) will be determined automatically.")
 
 
95
 
96
-
97
- initial_driving_style = ""
98
- initial_road_type = ""
99
- initial_traffic_condition = ""
100
 
101
- BASE_LOG_INTERVAL = .3 # for high frequency data
102
- LOW_FREQUENCY_GROUP_POLL_INTERVAL = 90.0 # Interval in seconds to poll one group of LF PIDs
103
- NUM_LOW_FREQUENCY_GROUPS = 3
104
-
105
- # Prepare Low-Frequency PID groups
106
- low_frequency_pid_groups = []
107
- if LOW_FREQUENCY_PIDS_POOL:
108
- chunk_size = (len(LOW_FREQUENCY_PIDS_POOL) + NUM_LOW_FREQUENCY_GROUPS - 1) // NUM_LOW_FREQUENCY_GROUPS
109
- for i in range(0, len(LOW_FREQUENCY_PIDS_POOL), chunk_size):
110
- low_frequency_pid_groups.append(LOW_FREQUENCY_PIDS_POOL[i:i + chunk_size])
111
 
112
- if not low_frequency_pid_groups: # Handle case with no LF PIDs
113
- low_frequency_pid_groups.append([])
114
- NUM_LOW_FREQUENCY_GROUPS = 1
115
-
116
- last_low_frequency_group_poll_time = time.monotonic()
117
- current_low_frequency_group_index = 0
118
 
119
  current_pid_values = {pid.name: '' for pid in ALL_PIDS_TO_LOG}
120
 
121
- # Create log directories
122
- for dir_path in [ORIGINAL_CSV_DIR, DUPLICATE_CSV_DIR]: # Add ANALYZED_OUTPUT_DIR if used
123
  try:
124
  os.makedirs(dir_path, exist_ok=True)
125
  print(f"Ensured directory exists: {dir_path}")
126
  except OSError as e:
127
  print(f"Error creating directory {dir_path}: {e}. Attempting to use current directory.")
128
- # Fallback logic may be needed if creation fails critically
129
- if dir_path == ORIGINAL_CSV_DIR: # Critical for saving original log
130
- print("Cannot create original log directory. Exiting.")
131
  return None
132
 
133
  current_session_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -135,81 +160,72 @@ def perform_logging_session():
135
  original_csv_filepath = os.path.join(ORIGINAL_CSV_DIR, csv_file_name_only)
136
 
137
  try:
138
- if USE_WIFI_SETTINGS:
139
- print(f"Attempting to connect to WiFi adapter at {WIFI_ADAPTER_HOST}:{WIFI_ADAPTER_PORT} using protocol {WIFI_PROTOCOL}...")
140
- connection = obd.OBD(protocol=WIFI_PROTOCOL,
141
- host=WIFI_ADAPTER_HOST,
142
- port=WIFI_ADAPTER_PORT,
143
- fast=False,
144
- timeout=30)
145
- else:
146
- print("Attempting to connect via socat PTY /dev/ttys011...")
147
- connection = obd.OBD("/dev/ttys086", fast=True, timeout=30) # Auto-scan for USB/Bluetooth
148
-
149
- if not connection.is_connected():
150
- print("Failed to connect to OBD-II adapter.")
151
- print(f"Connection status: {connection.status()}")
152
- return None
153
-
154
- print(f"Successfully connected to OBD-II adapter: {connection.port_name()}")
155
- print(f"Adapter status: {connection.status()}")
156
- print(f"Supported PIDs (sample):")
157
- supported_commands = connection.supported_commands
158
- for i, cmd in enumerate(supported_commands):
159
- print(f" - {cmd.name}")
160
- if not supported_commands:
161
- print("No commands")
162
 
163
  # Creating initial full PID sample to have fully populated rows from beginning
164
  print("\nPerforming initial full PID sample...")
165
  initial_log_entry = {
166
- 'timestamp': datetime.datetime.now().isoformat(),
167
- 'driving_style': initial_driving_style,
168
- 'road_type': initial_road_type,
169
- 'traffic_condition': initial_traffic_condition
170
  }
171
 
172
- print("Polling initial High-Frequency PIDs...")
173
- for pid_command in HIGH_FREQUENCY_PIDS:
174
- value = get_pid_value(connection, pid_command)
175
- current_pid_values[pid_command.name] = value if value is not None else ''
176
- initial_log_entry[pid_command.name] = current_pid_values[pid_command.name]
177
-
178
- print("Polling initial Low-Frequency PIDs (all groups)...")
179
- if low_frequency_pid_groups and low_frequency_pid_groups[0]: # Check if there are any LF PIDs
180
- for group in low_frequency_pid_groups:
181
- for pid_command in group:
182
- value = get_pid_value(connection, pid_command)
183
- current_pid_values[pid_command.name] = value if value is not None else ''
184
- initial_log_entry[pid_command.name] = current_pid_values[pid_command.name]
185
- else:
186
- print("No Low-Frequency PIDs to poll for initial sample.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  for pid_obj in ALL_PIDS_TO_LOG:
189
  if pid_obj.name not in initial_log_entry:
190
- initial_log_entry[pid_obj.name] = '' # Default to empty if somehow missed
 
 
 
 
 
 
 
191
 
192
  except Exception as e:
193
  print(f"An error occurred during connection or initial PID sample: {e}")
194
  if connection and connection.is_connected():
195
  connection.close()
196
- return None
197
 
198
  file_exists = os.path.isfile(original_csv_filepath)
199
  try:
200
  with open(original_csv_filepath, 'a', newline='') as csvfile:
201
- # Add new columns for analyzer output, they will be empty initially from logger
202
- header_names = ['timestamp',
203
- 'driving_style', 'road_type', 'traffic_condition', # Original placeholder columns
204
- 'driving_style_analyzed', 'road_type_analyzed', 'traffic_condition_analyzed' # For analyzer
205
- ] + [pid.name for pid in ALL_PIDS_TO_LOG]
206
-
207
- # Remove duplicates if any PID name is already in the first part
208
- processed_headers = []
209
- for item in header_names:
210
- if item not in processed_headers:
211
- processed_headers.append(item)
212
- header_names = processed_headers
213
 
214
  writer = csv.DictWriter(csvfile, fieldnames=header_names)
215
 
@@ -218,74 +234,106 @@ def perform_logging_session():
218
  print(f"Created new CSV file: {original_csv_filepath} with headers: {header_names}")
219
 
220
  if initial_log_entry:
221
- # Add placeholder columns for analyzer to the initial entry
222
- initial_log_entry['driving_style_analyzed'] = ''
223
- initial_log_entry['road_type_analyzed'] = ''
224
- initial_log_entry['traffic_condition_analyzed'] = ''
225
  writer.writerow(initial_log_entry)
226
  csvfile.flush()
227
- print(f"Logged initial full sample. Style: {initial_driving_style}, Road: {initial_road_type}, Traffic: {initial_traffic_condition}.")
228
 
229
- last_low_frequency_group_poll_time = time.monotonic()
230
- current_low_frequency_group_index = 0
231
 
232
- print(f"\nLogging high-frequency data every {BASE_LOG_INTERVAL} second(s).")
233
- print(f"Polling one group of low-frequency PIDs every {LOW_FREQUENCY_GROUP_POLL_INTERVAL} second(s).")
234
- print(f"Low-frequency PIDs divided into {len(low_frequency_pid_groups)} groups.")
235
 
236
- log_count = 0
237
- while True:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  loop_start_time = time.monotonic()
239
  current_datetime = datetime.datetime.now()
240
  timestamp_iso = current_datetime.isoformat()
241
 
242
- hf_reads = 0
243
- for pid_command in HIGH_FREQUENCY_PIDS:
244
- value = get_pid_value(connection, pid_command)
245
- current_pid_values[pid_command.name] = value if value is not None else ''
246
- if value is not None:
247
- hf_reads += 1
248
 
249
- lf_reads_this_cycle = 0
250
- lf_group_polled_this_cycle = "None"
251
- if low_frequency_pid_groups and (time.monotonic() - last_low_frequency_group_poll_time) >= LOW_FREQUENCY_GROUP_POLL_INTERVAL:
252
- group_to_poll = low_frequency_pid_groups[current_low_frequency_group_index]
253
- lf_group_polled_this_cycle = f"Group {current_low_frequency_group_index + 1}/{len(low_frequency_pid_groups)}"
254
-
255
- for pid_command in group_to_poll:
 
256
  value = get_pid_value(connection, pid_command)
257
  current_pid_values[pid_command.name] = value if value is not None else ''
258
  if value is not None:
259
- lf_reads_this_cycle +=1
260
- else:
261
- print(f"Warning: Could not read LF PID {pid_command.name}")
 
 
 
 
 
 
262
 
263
- last_low_frequency_group_poll_time = time.monotonic()
264
- current_low_frequency_group_index = (current_low_frequency_group_index + 1) % len(low_frequency_pid_groups)
265
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
  final_log_entry = {
268
- 'timestamp': timestamp_iso,
269
- 'driving_style': initial_driving_style,
270
- 'road_type': initial_road_type,
271
- 'traffic_condition': initial_traffic_condition,
272
- 'driving_style_analyzed': '',
273
- 'road_type_analyzed': '',
274
- 'traffic_condition_analyzed': ''
275
  }
276
- # Add all PID values for this cycle from current_pid_values
277
  for pid_obj in ALL_PIDS_TO_LOG:
278
  final_log_entry[pid_obj.name] = current_pid_values.get(pid_obj.name, '')
279
 
 
 
 
 
 
 
280
  writer.writerow(final_log_entry)
281
  csvfile.flush()
282
 
283
  log_count += 1
284
  if log_count % 10 == 0:
285
- status_msg = f"Logged entry {log_count} - HF PIDs Read: {hf_reads}/{len(HIGH_FREQUENCY_PIDS)}"
286
- if lf_reads_this_cycle > 0 or lf_group_polled_this_cycle != "None":
287
- status_msg += f" - LF PIDs ({lf_group_polled_this_cycle}) Read: {lf_reads_this_cycle}/unknown_total_for_group_easily"
288
- print(status_msg)
 
 
289
 
290
  elapsed_time_in_loop = time.monotonic() - loop_start_time
291
  sleep_duration = max(0, BASE_LOG_INTERVAL - elapsed_time_in_loop)
@@ -296,79 +344,188 @@ def perform_logging_session():
296
  except Exception as e:
297
  print(f"An error occurred during logging: {e}")
298
  finally:
299
- if connection and connection.is_connected():
300
- print("Closing OBD-II connection.")
301
- connection.close()
302
- print(f"Data logging stopped. Original CSV file '{original_csv_filepath}' saved.")
303
 
304
- return original_csv_filepath
305
 
306
- def duplicate_csv(original_filepath):
307
- if not original_filepath or not os.path.exists(original_filepath):
308
- print(f"Error: Original CSV not found for duplication: {original_filepath}")
309
  return None
310
 
311
- # Ensure DUPLICATE_CSV_DIR exists (it should have been created by perform_logging_session)
312
- os.makedirs(DUPLICATE_CSV_DIR, exist_ok=True)
 
313
 
314
- # Get just the filename from the original path
315
- original_filename = os.path.basename(original_filepath)
316
- base, ext = os.path.splitext(original_filename)
317
 
318
- # Construct new filename for the duplicate
319
- duplicate_filename = f"{base}_to_analyze{ext}" # Suffix to distinguish
320
- duplicate_filepath = os.path.join(DUPLICATE_CSV_DIR, duplicate_filename)
321
 
322
  try:
323
- shutil.copy2(original_filepath, duplicate_filepath)
324
- print(f"Successfully duplicated CSV to: {duplicate_filepath}")
325
- return duplicate_filepath
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  except Exception as e:
327
- print(f"Error duplicating CSV {original_filepath} to {duplicate_filepath}: {e}")
 
 
328
  return None
329
 
330
- def run_analyzer_on_csv(csv_to_analyze_path):
331
- if not csv_to_analyze_path or not os.path.exists(csv_to_analyze_path):
332
- print(f"Error: Analyzer input CSV not found: {csv_to_analyze_path}")
333
- return
334
 
335
- # Analyzer script is in the same directory as this logger script
336
- analyzer_script_path = os.path.join(os.path.dirname(__file__), "obd_analyzer.py")
 
337
 
338
- if not os.path.exists(analyzer_script_path):
339
- print(f"CRITICAL Error: Analyzer script not found at {analyzer_script_path}")
340
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
- analyzed_file_basename = os.path.basename(csv_to_analyze_path).replace("_to_analyze.csv", "_final_analyzed.csv")
343
- final_output_path = os.path.join(DUPLICATE_CSV_DIR, analyzed_file_basename)
344
 
345
- command = [
346
- "python",
347
- analyzer_script_path,
348
- csv_to_analyze_path,
349
- "--output_csv",
350
- final_output_path
351
- ]
 
 
 
 
 
 
 
352
 
353
- print(f"Running analyzer: {' '.join(command)}")
354
  try:
355
- process = subprocess.run(command, check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
356
- print("Analyzer Output:\n", process.stdout)
357
- if process.stderr: print("Analyzer Errors:\n", process.stderr)
358
- print(f"Analyzer finished. Output saved to {final_output_path}")
359
- except subprocess.CalledProcessError as e:
360
- print(f"Error running analyzer: {e}\nStdout: {e.stdout}\nStderr: {e.stderr}")
361
- except FileNotFoundError:
362
- print(f"Error: 'python' or analyzer script not found ({analyzer_script_path}).")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
 
364
- if __name__ == "__main__":
365
- original_log_file = perform_logging_session()
366
 
367
- if original_log_file and os.path.exists(original_log_file):
368
- duplicated_log_file = duplicate_csv(original_log_file)
369
-
370
- if duplicated_log_file:
371
- run_analyzer_on_csv(duplicated_log_file)
372
- print(f"Process complete. Original log: {original_log_file}, Analyzed log copy: {duplicated_log_file}")
373
- else:
374
- print("OBD logging did not produce a valid CSV file. Skipping analysis.")
 
3
  import datetime
4
  import csv
5
  import os
 
 
6
  import shutil
7
  import subprocess
8
+ import sys
9
+ import select
10
+
11
+ try:
12
+ from logging_wrapper import auto_score_on_completion
13
+ SCORING_AVAILABLE = True
14
+ print("Auto-scoring module loaded")
15
+ except ImportError:
16
+ SCORING_AVAILABLE = False
17
+ print("Auto-scoring module not found - scoring will be skipped")
18
+
19
+
20
+ CRITICAL_FUEL_PIDS = [
21
+ obd.commands.RPM,
22
+ obd.commands.SPEED,
23
+ obd.commands.THROTTLE_POS,
24
+ obd.commands.MAF,
25
+ ]
26
+
27
+ SECONDARY_FUEL_PIDS = [
28
+ obd.commands.ENGINE_LOAD,
29
+ obd.commands.INTAKE_PRESSURE,
 
 
 
 
 
 
 
 
 
 
 
30
  ]
31
 
32
+ TERTIARY_FUEL_PIDS = [
33
+ obd.commands.SHORT_FUEL_TRIM_1,
34
+ obd.commands.SHORT_FUEL_TRIM_2,
35
+ obd.commands.LONG_FUEL_TRIM_1,
 
 
 
 
 
 
 
36
  obd.commands.LONG_FUEL_TRIM_2,
 
 
 
 
 
37
  ]
38
 
39
+ HIGH_FREQUENCY_PIDS = CRITICAL_FUEL_PIDS
40
+ LOW_FREQUENCY_PIDS_POOL = SECONDARY_FUEL_PIDS + TERTIARY_FUEL_PIDS
41
+
42
  ALL_PIDS_TO_LOG = HIGH_FREQUENCY_PIDS + LOW_FREQUENCY_PIDS_POOL
43
 
44
  CSV_FILENAME_BASE = "obd_data_log"
45
+ LOGS_BASE_DIR = os.path.join(os.path.dirname(__file__), "..", "..", "logs")
46
+ FUEL_LOGS_DIR = os.path.join(LOGS_BASE_DIR, "FuelLogs")
47
+ ANALYSED_LOGS_DIR = os.path.join(LOGS_BASE_DIR, "analysedLogsAutomated")
 
48
 
49
+ SCORED_LOGS_DIR = os.path.join(LOGS_BASE_DIR, "ScoredLogs")
50
+ ORIGINAL_CSV_DIR = FUEL_LOGS_DIR
 
 
 
51
 
52
  def get_pid_value(connection, pid_command):
53
+ """Queries a PID and returns its value"""
54
  try:
55
  response = connection.query(pid_command, force=True)
56
  if response.is_null() or response.value is None:
 
61
  except Exception as e:
62
  print(f"Error querying {pid_command.name}: {e}")
63
  return None
64
+
65
+ ef calculate_fuel_metrics(csv_path):
66
+ """Calculate fuel consumption and efficiency from MAF and SPEED data."""
67
+ try:
68
+ df = pd.read_csv(csv_path)
69
+
70
+ # Constants
71
+ AFR = 14.7 # Air-Fuel Ratio for petrol
72
+ FUEL_DENSITY = 737 # gg/ for petrol
73
+
74
+ # Calculate time delta between rows (in seconds)
75
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
76
+ df['time_delta'] = df['timestamp'].diff().dt.total_seconds()
77
+ df.loc[0, 'time_delta'] = 0 # First row has no previous row
78
+
79
+ # Calculate instantaneous fuel rate (L/hr) from MAF
80
+ df['fuel_rate_L_per_hr'] = (df['MAF'] * 3600) / (AFR * FUEL_DENSITY)
81
+
82
+ # Calculate fuel used in this time interval (L)
83
+ df['fuel_used_interval'] = (df['fuel_rate_L_per_hr'] / 3600) * df['time_delta']
84
+
85
+ # Calculate distance traveled in this interval (km)
86
+ df['distance_interval'] = (df['SPEED'] / 3600) * df['time_delta']
87
+
88
+ # Calculate cumulative values
89
+ df['Fuel_Used'] = df['fuel_used_interval'].cumsum()
90
+ df['Distance'] = df['distance_interval'].cumsum()
91
+
92
+ # Calculate fuel efficiency (L/100km)
93
+ df['Fuel_efficiency (L/100km)'] = np.where(
94
+ df['Distance'] > 0,
95
+ (df['Fuel_Used'] / df['Distance']) * 100,
96
+ 0
97
+ )
98
+
99
+ df['Fuel_Used'] = df['Fuel_Used'].round(3)
100
+ df['Distance'] = df['Distance'].round(2)
101
+ df['Fuel_efficiency (L/100km)'] = df['Fuel_efficiency (L/100km)'].round(2)
102
+
103
+ # Drop intermediate calculation columns
104
+ df = df.drop(columns=['time_delta', 'fuel_rate_L_per_hr',
105
+ 'fuel_used_interval', 'distance_interval'])
106
+
107
+ # Save back to CSV
108
+ df.to_csv(csv_path, index=False)
109
+
110
+ # Print summary
111
+ total_fuel = df['Fuel_Used'].iloc[-1]
112
+ total_distance = df['Distance'].iloc[-1]
113
+ avg_efficiency = df['Fuel_efficiency (L/100km)'].iloc[-1]
114
+
115
+ print(f"Total Fuel Used: {total_fuel:.3f} L")
116
+ print(f"Total Distance: {total_distance:.2f} km")
117
+ print(f"Average Efficiency: {avg_efficiency:.2f} L/100km")
118
+
119
+ return csv_path
120
+
121
+ except Exception as e:
122
+ print(f"Error calculating fuel metrics: {e}")
123
+ import traceback
124
+ traceback.print_exc()
125
+ return None
126
+
127
 
128
+ def perform_logging_session(connection):
129
+ """Perform a single logging session with an existing OBD connection."""
130
+ print(f"\nStarting new fuel efficiency logging session")
131
+ print("Commands:")
132
+ print(" - Type 'next' and press Enter to finish this drive and start a new one")
133
+ print(" - Type 'quit' and press Enter to stop all logging")
134
 
 
 
 
 
135
 
136
+ CRITICAL_PID_INTERVAL = 0.65
137
+ SECONDARY_PID_INTERVAL = 2.0
138
+ TERTIARY_PID_INTERVAL = 5.0
 
 
 
 
 
 
 
139
 
140
+ last_critical_poll_time = time.monotonic() - CRITICAL_PID_INTERVAL
141
+ last_secondary_poll_time = time.monotonic() - SECONDARY_PID_INTERVAL
142
+ last_tertiary_poll_time = time.monotonic() - TERTIARY_PID_INTERVAL
143
+
144
+ BASE_LOG_INTERVAL = CRITICAL_PID_INTERVAL
 
145
 
146
  current_pid_values = {pid.name: '' for pid in ALL_PIDS_TO_LOG}
147
 
148
+ for dir_path in [FUEL_LOGS_DIR, ANALYSED_LOGS_DIR, SCORED_LOGS_DIR]:
 
149
  try:
150
  os.makedirs(dir_path, exist_ok=True)
151
  print(f"Ensured directory exists: {dir_path}")
152
  except OSError as e:
153
  print(f"Error creating directory {dir_path}: {e}. Attempting to use current directory.")
154
+ if dir_path == FUEL_LOGS_DIR:
155
+ print("Cannot create fuel log directory. Exiting.")
 
156
  return None
157
 
158
  current_session_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
 
160
  original_csv_filepath = os.path.join(ORIGINAL_CSV_DIR, csv_file_name_only)
161
 
162
  try:
163
+ if not connection or not connection.is_connected():
164
+ print("OBD connection not available")
165
+ return None, "quit"
166
+
167
+ print(f"Using existing OBD connection: {connection.port_name()}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
  # Creating initial full PID sample to have fully populated rows from beginning
170
  print("\nPerforming initial full PID sample...")
171
  initial_log_entry = {
172
+ 'timestamp': datetime.datetime.now().isoformat()
 
 
 
173
  }
174
 
175
+ print("Polling initial Critical Fuel PIDs...")
176
+ for pid_command in CRITICAL_FUEL_PIDS:
177
+ try:
178
+ value = get_pid_value(connection, pid_command)
179
+ current_pid_values[pid_command.name] = value if value is not None else ''
180
+ initial_log_entry[pid_command.name] = current_pid_values[pid_command.name]
181
+ except Exception as e:
182
+ print(f"Warning: Failed to get {pid_command.name}: {e}")
183
+ current_pid_values[pid_command.name] = ''
184
+ initial_log_entry[pid_command.name] = ''
185
+
186
+ print("Polling initial Secondary Fuel PIDs...")
187
+ for pid_command in SECONDARY_FUEL_PIDS:
188
+ try:
189
+ value = get_pid_value(connection, pid_command)
190
+ current_pid_values[pid_command.name] = value if value is not None else ''
191
+ initial_log_entry[pid_command.name] = current_pid_values[pid_command.name]
192
+ except Exception as e:
193
+ print(f"Warning: Failed to get {pid_command.name}: {e}")
194
+ current_pid_values[pid_command.name] = ''
195
+ initial_log_entry[pid_command.name] = ''
196
+
197
+ print("Polling initial Tertiary Fuel PIDs...")
198
+ for pid_command in TERTIARY_FUEL_PIDS:
199
+ try:
200
+ value = get_pid_value(connection, pid_command)
201
+ current_pid_values[pid_command.name] = value if value is not None else ''
202
+ initial_log_entry[pid_command.name] = current_pid_values[pid_command.name]
203
+ except Exception as e:
204
+ print(f"Warning: Failed to get {pid_command.name}: {e}")
205
+ current_pid_values[pid_command.name] = ''
206
+ initial_log_entry[pid_command.name] = ''
207
 
208
  for pid_obj in ALL_PIDS_TO_LOG:
209
  if pid_obj.name not in initial_log_entry:
210
+ initial_log_entry[pid_obj.name] = ''
211
+
212
+ # Empty driving style and fuel columns
213
+ initial_log_entry['Driving_style'] = ''
214
+ initial_log_entry['Fuel_efficiency (L/100km)'] = ''
215
+ initial_log_entry['Distance'] = ''
216
+ initial_log_entry['Fuel_Used'] = ''
217
+ initial_log_entry['Route'] = ''
218
 
219
  except Exception as e:
220
  print(f"An error occurred during connection or initial PID sample: {e}")
221
  if connection and connection.is_connected():
222
  connection.close()
223
+ return None, "quit"
224
 
225
  file_exists = os.path.isfile(original_csv_filepath)
226
  try:
227
  with open(original_csv_filepath, 'a', newline='') as csvfile:
228
+ header_names = ['timestamp'] + [pid.name for pid in ALL_PIDS_TO_LOG] + ['Driving_style', 'Fuel_efficiency (L/100km)', 'Distance', 'Fuel_Used', 'Route']
 
 
 
 
 
 
 
 
 
 
 
229
 
230
  writer = csv.DictWriter(csvfile, fieldnames=header_names)
231
 
 
234
  print(f"Created new CSV file: {original_csv_filepath} with headers: {header_names}")
235
 
236
  if initial_log_entry:
 
 
 
 
237
  writer.writerow(initial_log_entry)
238
  csvfile.flush()
239
+ print(f"Logged initial full sample with all fuel efficiency PIDs.")
240
 
241
+ log_count = 0
242
+ user_stop_requested = False
243
 
244
+ print(f"Started logging")
 
 
245
 
246
+ while not user_stop_requested:
247
+ if log_count % 100 == 0 and log_count > 0:
248
+ print(f"Debug: Main loop running, iteration {log_count}")
249
+
250
+ # Check for non-blocking input
251
+ if select.select([sys.stdin], [], [], 0.0)[0]:
252
+ user_command = sys.stdin.readline().strip().lower()
253
+ if user_command == "next":
254
+ print("\nUser typed 'next'. Finishing current drive...")
255
+ user_stop_requested = True
256
+ break
257
+ elif user_command == "quit":
258
+ print("\nUser typed 'quit'. Stopping all logging...")
259
+ user_stop_requested = True
260
+ return original_csv_filepath, "quit"
261
+ else:
262
+ print(f"Input detected: '{user_command}'. Type 'next' or 'quit'.", end='\r')
263
+
264
  loop_start_time = time.monotonic()
265
  current_datetime = datetime.datetime.now()
266
  timestamp_iso = current_datetime.isoformat()
267
 
268
+ critical_reads = 0
269
+ secondary_reads = 0
270
+ tertiary_reads = 0
 
 
 
271
 
272
+ # Always poll critical PIDs (highest frequency)
273
+ if (time.monotonic() - last_critical_poll_time) >= CRITICAL_PID_INTERVAL:
274
+ if not connection or not connection.is_connected():
275
+ print("\nOBD connection lost during logging. Ending session.")
276
+ user_stop_requested = True
277
+ break
278
+
279
+ for pid_command in CRITICAL_FUEL_PIDS:
280
  value = get_pid_value(connection, pid_command)
281
  current_pid_values[pid_command.name] = value if value is not None else ''
282
  if value is not None:
283
+ critical_reads += 1
284
+ last_critical_poll_time = time.monotonic()
285
+
286
+ # Poll secondary PIDs at medium frequency
287
+ if (time.monotonic() - last_secondary_poll_time) >= SECONDARY_PID_INTERVAL:
288
+ if not connection or not connection.is_connected():
289
+ print("\nOBD connection lost during logging. Ending session.")
290
+ user_stop_requested = True
291
+ break
292
 
293
+ for pid_command in SECONDARY_FUEL_PIDS:
294
+ value = get_pid_value(connection, pid_command)
295
+ current_pid_values[pid_command.name] = value if value is not None else ''
296
+ if value is not None:
297
+ secondary_reads += 1
298
+ last_secondary_poll_time = time.monotonic()
299
+
300
+ # Poll tertiary PIDs at low frequency
301
+ if (time.monotonic() - last_tertiary_poll_time) >= TERTIARY_PID_INTERVAL:
302
+ if not connection or not connection.is_connected():
303
+ print("\nOBD connection lost during logging. Ending session.")
304
+ user_stop_requested = True
305
+ break
306
+
307
+ for pid_command in TERTIARY_FUEL_PIDS:
308
+ value = get_pid_value(connection, pid_command)
309
+ current_pid_values[pid_command.name] = value if value is not None else ''
310
+ if value is not None:
311
+ tertiary_reads += 1
312
+ last_tertiary_poll_time = time.monotonic()
313
 
314
  final_log_entry = {
315
+ 'timestamp': timestamp_iso
 
 
 
 
 
 
316
  }
 
317
  for pid_obj in ALL_PIDS_TO_LOG:
318
  final_log_entry[pid_obj.name] = current_pid_values.get(pid_obj.name, '')
319
 
320
+ final_log_entry['Driving_style'] = ''
321
+ final_log_entry['Fuel_efficiency (L/100km)'] = ''
322
+ final_log_entry['Distance'] = ''
323
+ final_log_entry['Fuel_Used'] = ''
324
+ final_log_entry['Route'] = ''
325
+
326
  writer.writerow(final_log_entry)
327
  csvfile.flush()
328
 
329
  log_count += 1
330
  if log_count % 10 == 0:
331
+ status_msg = f"Entry {log_count} - Critical: {critical_reads}/{len(CRITICAL_FUEL_PIDS)}"
332
+ if secondary_reads > 0:
333
+ status_msg += f" Secondary: {secondary_reads}/{len(SECONDARY_FUEL_PIDS)}"
334
+ if tertiary_reads > 0:
335
+ status_msg += f" Tertiary: {tertiary_reads}/{len(TERTIARY_FUEL_PIDS)}"
336
+ print(status_msg + " " * 20, end='\r')
337
 
338
  elapsed_time_in_loop = time.monotonic() - loop_start_time
339
  sleep_duration = max(0, BASE_LOG_INTERVAL - elapsed_time_in_loop)
 
344
  except Exception as e:
345
  print(f"An error occurred during logging: {e}")
346
  finally:
347
+ print(" " * 100, end='\r')
348
+ print(f"Drive completed - data saved to: {os.path.basename(original_csv_filepath)}")
 
 
349
 
350
+ return original_csv_filepath, "next"
351
 
352
+ def run_scorer_on_csv(original_csv_path):
353
+ if not SCORING_AVAILABLE:
354
+ print("Scoring module not available, skipping aggressiveness scoring")
355
  return None
356
 
357
+ if not original_csv_path or not os.path.exists(original_csv_path):
358
+ print(f"Error: Original CSV not found for scoring: {original_csv_path}")
359
+ return None
360
 
361
+ print(f"\nRunning aggressiveness scorer...")
 
 
362
 
363
+ original_filename = os.path.basename(original_csv_path)
364
+ base, ext = os.path.splitext(original_filename)
 
365
 
366
  try:
367
+ # Import and configure the scorer
368
+ from driving_aggressiveness_scorer import DrivingAggressivenessScorer
369
+ import json
370
+
371
+ # Initialize scorer with bounds file in logs directory
372
+ bounds_file = os.path.join(LOGS_BASE_DIR, 'obd_bounds.json')
373
+ scorer = DrivingAggressivenessScorer(bounds_file=bounds_file)
374
+
375
+ # Run analysis
376
+ df_scored, results = scorer.analyze_drive(str(original_csv_path), update_bounds=True)
377
+
378
+ df_scored['drive_score'] = results['final_score']
379
+
380
+ # Save scored CSV to ScoredLogs directory
381
+ scored_csv_path = os.path.join(SCORED_LOGS_DIR, f"{base}_scored{ext}")
382
+ df_scored.to_csv(scored_csv_path, index=False)
383
+ print(f"Scored CSV saved: {os.path.basename(scored_csv_path)}")
384
+
385
+ # Save summary JSON to ScoredLogs directory
386
+ summary_json_path = os.path.join(SCORED_LOGS_DIR, f"{base}_score_summary.json")
387
+ summary = {
388
+ 'timestamp': datetime.datetime.now().isoformat(),
389
+ 'original_file': str(original_csv_path),
390
+ 'scored_file': str(scored_csv_path),
391
+ 'results': results
392
+ }
393
+
394
+ with open(summary_json_path, 'w') as f:
395
+ json.dump(summary, f, indent=2)
396
+ print(f"Score summary saved: {os.path.basename(summary_json_path)}")
397
+
398
+ try:
399
+ from visualiseScorer import visualize_drive
400
+ visualization_path = os.path.join(SCORED_LOGS_DIR, f"{base}_visualization.png")
401
+ visualize_drive(df_scored, results, save_path=visualization_path)
402
+ print(f"Visualization saved: {os.path.basename(visualization_path)}")
403
+ except Exception as viz_error:
404
+ print(f"Warning: Could not generate visualization: {viz_error}")
405
+
406
+ # Print quick summary
407
+ print(f"Drive Score: {results['final_score']:.1f}/100")
408
+
409
+ return scored_csv_path
410
+
411
  except Exception as e:
412
+ print(f"Error running scorer: {e}")
413
+ import traceback
414
+ traceback.print_exc()
415
  return None
416
 
 
 
 
 
417
 
418
+ def initialize_obd_connection():
419
+ """Initialize OBD connection once for multiple sessions."""
420
+ connection = None
421
 
422
+ try:
423
+
424
+ print("Attempting to connect via socat PTY /dev/ttys006...")
425
+ connection = obd.OBD("/dev/ttys002", fast=True, timeout=30)
426
+
427
+ if not connection.is_connected():
428
+ print("Failed to connect to OBD-II adapter.")
429
+ print(f"Connection status: {connection.status()}")
430
+ return None
431
+
432
+ print(f"Successfully connected to OBD-II adapter: {connection.port_name()}")
433
+ print(f"Adapter status: {connection.status()}")
434
+ return connection
435
+
436
+ except Exception as e:
437
+ print(f"An error occurred during OBD connection: {e}")
438
+ return None
439
 
 
 
440
 
441
+ def main():
442
+ print("Fuel Efficiency OBD Logger - Multi-Session Mode")
443
+ if SCORING_AVAILABLE:
444
+ print("Aggressiveness scoring enabled")
445
+ print("=" * 50)
446
+
447
+ # Initialize OBD connection once
448
+ connection = initialize_obd_connection()
449
+ if not connection:
450
+ print("Could not establish OBD connection. Exiting.")
451
+ return
452
+
453
+ session_count = 0
454
+ logged_files = []
455
 
 
456
  try:
457
+ while True:
458
+ session_count += 1
459
+ print(f"\n📊 Session {session_count} ready to start")
460
+
461
+ # Check if connection is still available before starting new session
462
+ if not connection or not connection.is_connected():
463
+ print("OBD connection not available. Attempting to reconnect...")
464
+ connection = initialize_obd_connection()
465
+ if not connection:
466
+ print("Could not re-establish OBD connection. Exiting.")
467
+ break
468
+
469
+ result = perform_logging_session(connection)
470
+
471
+ if isinstance(result, tuple):
472
+ csv_file, command = result
473
+ else:
474
+ csv_file, command = result, "quit"
475
+
476
+ # Handle the result
477
+ if csv_file and os.path.exists(csv_file):
478
+ try:
479
+ with open(csv_file, 'r') as f:
480
+ lines = f.readlines()
481
+ if len(lines) > 1: # More than just the header
482
+ logged_files.append(csv_file)
483
+ print(f"Drive {session_count} saved: {os.path.basename(csv_file)}")
484
+
485
+ calculate_fuel_metrics(csv_file)
486
+
487
+ print(f"\nStarting aggressiveness scoring for drive {session_count}...")
488
+ scored_file = run_scorer_on_csv(csv_file)
489
+ if scored_file:
490
+ print(f"Aggressiveness scoring complete for drive {session_count}")
491
+ else:
492
+ print(f"Aggressiveness scoring failed for drive {session_count}, but drive data is still saved")
493
+
494
+ else:
495
+ print(f"⚠️ Drive {session_count} had no data, skipping analysis")
496
+ os.remove(csv_file)
497
+ except Exception as e:
498
+ print(f"Error checking file {csv_file}: {e}")
499
+
500
+ # Check if user wants to quit
501
+ if command == "quit":
502
+ print("\nStopping all logging as requested")
503
+ break
504
+
505
+ # Otherwise continue to next session
506
+ print(f"\n Ready for next drive (Session {session_count + 1})")
507
+
508
+ except KeyboardInterrupt:
509
+ print("\n Logging stopped by user (Ctrl+C)")
510
+
511
+ finally:
512
+ if connection and connection.is_connected():
513
+ print("Closing OBD-II connection...")
514
+ connection.close()
515
+
516
+ print("\n" + "=" * 50)
517
+ print(f"📈 LOGGING SUMMARY")
518
+ print(f"Total drives logged: {len(logged_files)}")
519
+ if logged_files:
520
+ print("📁 Files saved to:")
521
+ print(" - Raw logs: logs/FuelLogs/")
522
+ if SCORING_AVAILABLE:
523
+ print(" - Scored logs: logs/ScoredLogs/")
524
+ print("\n📝 Files created:")
525
+ for file in logged_files:
526
+ print(f" - {os.path.basename(file)}")
527
+ print("=" * 50)
528
 
 
 
529
 
530
+ if __name__ == "__main__":
531
+ main()
 
 
 
 
 
 
OBD/scorerConfig/scorerConfig.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ weights:
3
+ RPM: 0.25
4
+ THROTTLE_POS: 0.25
5
+ ENGINE_LOAD: 0.25
6
+ MAF: 0.25
7
+
8
+
9
+ # Spike Detection Thresholds
10
+ spike_thresholds:
11
+ moderate_spike: 65
12
+ extreme_spike: 85
13
+ spike_percentage_threshold: 3
14
+ extreme_percentage_threshold: 1
15
+
16
+ # Penalty Multipliers
17
+ penalty_multipliers:
18
+ p95_multiplier: 0.3
19
+ p99_multiplier: 0.5
20
+ spike_freq_multiplier: 2.0
21
+ extreme_freq_multiplier: 3.0
22
+
23
+ # Aggregate Score Calculation
24
+ aggregate_weights:
25
+ mean_weight: 0.7
26
+ p75_weight: 0.3
27
+
28
+ style_categories:
29
+ very_calm: [0, 20]
30
+ calm: [20, 40]
31
+ moderate: [40, 55]
32
+ aggressive: [55, 70]
33
+ very_aggressive: [70, 100]
34
+
35
+ bounds:
36
+ file: "obd_bounds.json"
37
+ auto_update: true # Automatically update bounds with new data
38
+
39
+
40
+ theoretical_maxes:
41
+ RPM: 6000
42
+ THROTTLE_POS: 100
43
+ ENGINE_LOAD: 100
44
+ MAF: 300
45
+ SPEED: 250
46
+ INTAKE_PRESSURE: 250
47
+
48
+ theoretical_mins:
49
+ RPM: 0
50
+ THROTTLE_POS: 0
51
+ ENGINE_LOAD: 0
52
+ MAF: 0
53
+ SPEED: 0
54
+ INTAKE_PRESSURE: 0
55
+
56
+ output:
57
+ save_scored_csv: true
58
+ visualization: true
59
+ verbose: true
OBD/visualiseScorer.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from driving_aggressiveness_scorer import DrivingAggressivenessScorer
5
+
6
+
7
+ def visualize_drive(df_scored: pd.DataFrame, results: dict, save_path: str = None):
8
+ """
9
+ Create comprehensive visualization of drive analysis.
10
+
11
+ Args:
12
+ df_scored: DataFrame with aggressiveness scores
13
+ results: Aggregate results dictionary
14
+ save_path: Optional path to save figure
15
+ """
16
+ fig, axes = plt.subplots(3, 2, figsize=(15, 12))
17
+ fig.suptitle(f"Drive Analysis - Score: {results['final_score']:.1f}/100",
18
+ fontsize=16, fontweight='bold')
19
+
20
+ # 1. Aggressiveness Score Over Time
21
+ ax = axes[0, 0]
22
+ ax.plot(df_scored['aggressiveness_score'], linewidth=1, color='#2E86AB')
23
+ ax.axhline(y=results['mean_score'], color='green', linestyle='--',
24
+ label=f"Mean: {results['mean_score']:.1f}")
25
+ ax.axhline(y=70, color='orange', linestyle='--', alpha=0.5, label='Spike Threshold')
26
+ ax.axhline(y=85, color='red', linestyle='--', alpha=0.5, label='Extreme Threshold')
27
+ ax.set_title('Aggressiveness Score Timeline')
28
+ ax.set_ylabel('Score (0-100)')
29
+ ax.set_xlabel('Sample Number')
30
+ ax.legend()
31
+ ax.grid(True, alpha=0.3)
32
+
33
+ # 2. Score Distribution
34
+ ax = axes[0, 1]
35
+ ax.hist(df_scored['aggressiveness_score'], bins=50, color='#A23B72', alpha=0.7, edgecolor='black')
36
+ ax.axvline(x=results['mean_score'], color='green', linestyle='--', linewidth=2, label='Mean')
37
+ ax.axvline(x=results['median_score'], color='blue', linestyle='--', linewidth=2, label='Median')
38
+ ax.set_title('Score Distribution')
39
+ ax.set_xlabel('Aggressiveness Score')
40
+ ax.set_ylabel('Frequency')
41
+ ax.legend()
42
+ ax.grid(True, alpha=0.3)
43
+
44
+ # 3. RPM vs Throttle Position (colored by score)
45
+ ax = axes[1, 0]
46
+ scatter = ax.scatter(df_scored['THROTTLE_POS'], df_scored['RPM'],
47
+ c=df_scored['aggressiveness_score'], cmap='RdYlGn_r',
48
+ s=10, alpha=0.6)
49
+ ax.set_title('RPM vs Throttle Position')
50
+ ax.set_xlabel('Throttle Position (%)')
51
+ ax.set_ylabel('RPM')
52
+ plt.colorbar(scatter, ax=ax, label='Aggressiveness')
53
+ ax.grid(True, alpha=0.3)
54
+
55
+ # 4. Speed vs Engine Load (colored by score)
56
+ ax = axes[1, 1]
57
+ scatter = ax.scatter(df_scored['SPEED'], df_scored['ENGINE_LOAD'],
58
+ c=df_scored['aggressiveness_score'], cmap='RdYlGn_r',
59
+ s=10, alpha=0.6)
60
+ ax.set_title('Speed vs Engine Load')
61
+ ax.set_xlabel('Speed (km/h)')
62
+ ax.set_ylabel('Engine Load (%)')
63
+ plt.colorbar(scatter, ax=ax, label='Aggressiveness')
64
+ ax.grid(True, alpha=0.3)
65
+
66
+ # 5. Key Metrics Over Time
67
+ ax = axes[2, 0]
68
+ ax2 = ax.twinx()
69
+
70
+ ln1 = ax.plot(df_scored['RPM'] / 100, label='RPM/100', color='#E63946', linewidth=0.8)
71
+ ln2 = ax.plot(df_scored['THROTTLE_POS'], label='Throttle %', color='#F77F00', linewidth=0.8)
72
+ ln3 = ax2.plot(df_scored['SPEED'], label='Speed', color='#06FFA5', linewidth=0.8)
73
+
74
+ ax.set_title('Key Metrics Timeline')
75
+ ax.set_xlabel('Sample Number')
76
+ ax.set_ylabel('RPM/100 & Throttle %')
77
+ ax2.set_ylabel('Speed (km/h)')
78
+
79
+ # Combine legends
80
+ lns = ln1 + ln2 + ln3
81
+ labs = [l.get_label() for l in lns]
82
+ ax.legend(lns, labs, loc='upper left')
83
+ ax.grid(True, alpha=0.3)
84
+
85
+ # 6. Score Statistics Summary
86
+ ax = axes[2, 1]
87
+ ax.axis('off')
88
+
89
+ stats_text = f"""
90
+ AGGREGATE SCORE BREAKDOWN
91
+ {'─' * 40}
92
+
93
+ Final Score: {results['final_score']:.1f} / 100
94
+
95
+ SCORE STATISTICS
96
+ Mean: {results['mean_score']:.1f}
97
+ Median: {results['median_score']:.1f}
98
+ Std Dev: {results['std_score']:.1f}
99
+
100
+ PERCENTILES
101
+ 75th: {results['p75_score']:.1f}
102
+ 90th: {results['p90_score']:.1f}
103
+ 95th: {results['p95_score']:.1f}
104
+ 99th: {results['p99_score']:.1f}
105
+ Max: {results['max_score']:.1f}
106
+
107
+ SPIKE ANALYSIS
108
+ Spikes (>70): {results['spike_percentage']:.1f}%
109
+ Extreme (>85): {results['extreme_percentage']:.1f}%
110
+ Spike Penalty: +{results['spike_penalty']:.1f}
111
+ """
112
+
113
+ ax.text(0.1, 0.95, stats_text, transform=ax.transAxes,
114
+ fontfamily='monospace', fontsize=10, verticalalignment='top',
115
+ bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))
116
+
117
+ plt.tight_layout()
118
+
119
+ if save_path:
120
+ plt.savefig(save_path, dpi=150, bbox_inches='tight')
121
+ print(f"✓ Visualization saved to {save_path}")
122
+ plt.close()
123
+ else:
124
+ plt.show()
125
+
126
+
127
+ def compare_drives(scorer: DrivingAggressivenessScorer, csv_paths: list):
128
+ """
129
+ Compare multiple drives side-by-side.
130
+
131
+ Args:
132
+ scorer: DrivingAggressivenessScorer instance
133
+ csv_paths: List of CSV file paths to compare
134
+ """
135
+ results_list = []
136
+
137
+ for csv_path in csv_paths:
138
+ _, results = scorer.analyze_drive(csv_path, update_bounds=True)
139
+ results['file'] = csv_path
140
+ results_list.append(results)
141
+
142
+ # Create comparison DataFrame
143
+ comparison_df = pd.DataFrame(results_list)
144
+
145
+ print("\n" + "="*80)
146
+ print("DRIVE COMPARISON")
147
+ print("="*80)
148
+ print(comparison_df[['file', 'final_score', 'mean_score',
149
+ 'spike_percentage', 'spike_penalty']].to_string(index=False))
150
+ print("="*80 + "\n")
151
+
152
+ return comparison_df
153
+
154
+
155
+ def batch_analyze_folder(folder_path: str, pattern: str = "*.csv"):
156
+
157
+ from pathlib import Path
158
+
159
+ scorer = DrivingAggressivenessScorer()
160
+ csv_files = list(Path(folder_path).glob(pattern))
161
+
162
+ if not csv_files:
163
+ print(f"No CSV files found in {folder_path}")
164
+ return
165
+
166
+ print(f"Found {len(csv_files)} CSV files")
167
+
168
+ all_results = []
169
+ for csv_file in csv_files:
170
+ try:
171
+ df_scored, results = scorer.analyze_drive(str(csv_file), update_bounds=True)
172
+ results['filename'] = csv_file.name
173
+ all_results.append(results)
174
+
175
+ # Save individual scored file
176
+ output_path = csv_file.parent / f"{csv_file.stem}_scored.csv"
177
+ df_scored.to_csv(output_path, index=False)
178
+
179
+ except Exception as e:
180
+ print(f"Error processing {csv_file}: {e}")
181
+ continue
182
+
183
+ summary_df = pd.DataFrame(all_results)
184
+ summary_path = Path(folder_path) / "drive_summary_report.csv"
185
+ summary_df.to_csv(summary_path, index=False)
186
+ print(f"\n✓ Summary report saved to {summary_path}")
187
+
188
+ return summary_df
189
+
190
+
191
+ def export_bounds_report(scorer: DrivingAggressivenessScorer, output_path: str = "bounds_report.txt"):
192
+ bounds = scorer.get_current_bounds()
193
+
194
+ report = []
195
+ report.append("="*60)
196
+ report.append("DRIVING AGGRESSIVENESS SCORER - BOUNDS REPORT")
197
+ report.append("="*60)
198
+ report.append(f"\nGenerated: {pd.Timestamp.now()}\n")
199
+
200
+ report.append("PARAMETER WEIGHTS:")
201
+ report.append("-"*60)
202
+ for param, weight in scorer.weights.items():
203
+ report.append(f"{param:20s}: {weight:.3f} ({weight*100:.1f}%)")
204
+
205
+ report.append("\n\nCURRENT BOUNDS:")
206
+ report.append("-"*60)
207
+ report.append(f"{'Parameter':<20s} {'Min':>12s} {'Max':>12s} {'Range':>12s}")
208
+ report.append("-"*60)
209
+
210
+ for param in scorer.weights.keys():
211
+ min_val = bounds[param]['min']
212
+ max_val = bounds[param]['max']
213
+ range_val = max_val - min_val
214
+ report.append(f"{param:<20s} {min_val:>12.2f} {max_val:>12.2f} {range_val:>12.2f}")
215
+
216
+ report.append("="*60)
217
+
218
+ report_text = "\n".join(report)
219
+
220
+ with open(output_path, 'w') as f:
221
+ f.write(report_text)
222
+
223
+ print(report_text)
224
+ print(f"\n✓ Report saved to {output_path}")
225
+
226
+
227
+ # Example usage
228
+ if __name__ == "__main__":
229
+ scorer = DrivingAggressivenessScorer()
230
+
231
+ csv_path = 'obd_data_log_20251012_121810.csv'
232
+ df_scored, results = scorer.analyze_drive(csv_path)
233
+ visualize_drive(df_scored, results, save_path='drive_analysis.png')
234
+
235
+
236
+ # Export bounds report
237
+ export_bounds_report(scorer)
README.md CHANGED
@@ -11,7 +11,7 @@ short_description: OBD-logging FastAPI server with data processing pipelines
11
 
12
  # OBD Logger
13
 
14
- A comprehensive OBD-II data logging and processing system built with FastAPI, featuring advanced data cleaning, Google Drive integration, MongoDB storage capabilities, and **Reinforcement Learning from Human Feedback (RLHF)** for driver behavior classification.
15
 
16
  ![System Architecture](diagram/diagram.svg)
17
 
@@ -24,6 +24,7 @@ A comprehensive OBD-II data logging and processing system built with FastAPI, fe
24
  - Firebase for structured data storage and querying
25
  - MongoDB Atlas for structured data storage and querying
26
  - **Driver Behavior Classification**: XGBoost-based ML model for driving style prediction
 
27
  - **RLHF Training System**: Continuous model improvement through human feedback
28
  - **Data Visualization**: Automatic generation of correlation heatmaps and trend plots
29
  - **RESTful API**: Comprehensive endpoints for data management and retrieval
@@ -45,6 +46,9 @@ The application is structured into modular components:
45
  - **`rlhf.py`**: Main RLHF training pipeline for continuous model improvement
46
  - **`OBD/`**: OBD-specific modules for data analysis and logging
47
  - **`utils/`**: Utility modules for model management and data processing
 
 
 
48
 
49
  ## Quick Start
50
 
@@ -58,8 +62,10 @@ The application is structured into modular components:
58
  - `FIREBASE_SERVICE_ACCOUNT_JSON`: Firebase connection string
59
  - `FIREBASE_ADMIN_JSON`: Firebase Admin SDK credentials
60
  - `HF_TOKEN`: Hugging Face authentication token
61
- - `HF_MODEL_REPO`: Hugging Face model repository (default: `BinKhoaLe1812/Driver_Behavior_OBD`)
62
- - `MODEL_DIR`: Local model directory (default: `/app/models/ul`)
 
 
63
 
64
  3. **Run the Application**:
65
  ```bash
 
11
 
12
  # OBD Logger
13
 
14
+ A comprehensive OBD-II data logging and processing system built with FastAPI, featuring advanced data cleaning, Google Drive integration, MongoDB storage capabilities, **Reinforcement Learning from Human Feedback (RLHF)** for driver behavior classification, and **fuel efficiency scoring** using machine learning models.
15
 
16
  ![System Architecture](diagram/diagram.svg)
17
 
 
24
  - Firebase for structured data storage and querying
25
  - MongoDB Atlas for structured data storage and querying
26
  - **Driver Behavior Classification**: XGBoost-based ML model for driving style prediction
27
+ - **Fuel Efficiency Scoring**: ML model for drive-level fuel efficiency prediction (0-100%)
28
  - **RLHF Training System**: Continuous model improvement through human feedback
29
  - **Data Visualization**: Automatic generation of correlation heatmaps and trend plots
30
  - **RESTful API**: Comprehensive endpoints for data management and retrieval
 
46
  - **`rlhf.py`**: Main RLHF training pipeline for continuous model improvement
47
  - **`OBD/`**: OBD-specific modules for data analysis and logging
48
  - **`utils/`**: Utility modules for model management and data processing
49
+ - **`efficiency/`**: Fuel efficiency model training and evaluation
50
+ - **`retrain.py`**: Train and upload fuel efficiency models to Hugging Face
51
+ - **`eval.py`**: Evaluate fuel efficiency on OBD data
52
 
53
  ## Quick Start
54
 
 
62
  - `FIREBASE_SERVICE_ACCOUNT_JSON`: Firebase connection string
63
  - `FIREBASE_ADMIN_JSON`: Firebase Admin SDK credentials
64
  - `HF_TOKEN`: Hugging Face authentication token
65
+ - `HF_MODEL_REPO`: Driver behavior model repository (default: `BinKhoaLe1812/Driver_Behavior_OBD`)
66
+ - `HF_EFFICIENCY_MODEL_REPO`: Fuel efficiency model repository (default: `BinKhoaLe1812/Fuel_Efficiency_OBD`)
67
+ - `MODEL_DIR`: Driver behavior model directory (default: `/app/models/ul`)
68
+ - `EFFICIENCY_MODEL_DIR`: Fuel efficiency model directory (default: `/app/models/efficiency`)
69
 
70
  3. **Run the Application**:
71
  ```bash
app.py CHANGED
@@ -15,8 +15,8 @@ import numpy as np
15
  import matplotlib.pyplot as plt
16
  import seaborn as sns
17
  from sklearn.preprocessing import MinMaxScaler, StandardScaler
18
- from sklearn.impute import KNNImputer
19
  # Utils
 
20
  import os, datetime, json, logging, re
21
  from datetime import timedelta
22
  import pathlib
@@ -29,7 +29,10 @@ from data.mongo_saver import MongoSaver, save_csv_to_mongo, save_dataframe_to_mo
29
  from data.firebase_saver import FirebaseSaver, save_csv_increment, save_dataframe_increment
30
 
31
  # UL Model
32
- from utils.ul_label import ULLabeler
 
 
 
33
 
34
  # RLHF Training
35
  from train import RLHFTrainer
@@ -58,6 +61,7 @@ os.makedirs(CLEANED_DIR, exist_ok=True)
58
  os.makedirs(PLOT_DIR, exist_ok=True)
59
 
60
  DRIVE_STYLE = [] # latest UL predictions (string labels) — overwritten each run
 
61
 
62
  # Init temp empty file
63
  if not os.path.exists(RAW_CSV):
@@ -78,7 +82,7 @@ async def startup_event():
78
  """Download models on app startup"""
79
  try:
80
  logger.info("🚀 Starting model download...")
81
- from utils.download import download_latest_models
82
 
83
  # Load .env file if it exists
84
  env_path = pathlib.Path(".env")
@@ -96,7 +100,20 @@ async def startup_event():
96
  if success:
97
  logger.info("✅ Models downloaded successfully on startup")
98
  else:
99
- logger.warning("⚠️ Model download failed on startup - some features may not work")
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  except Exception as e:
102
  logger.error(f"❌ Startup model download failed: {e}")
@@ -457,6 +474,26 @@ def _process_and_save(df, norm_ts):
457
  logger.info(f"✅ UL labels generated ({len(DRIVE_STYLE)}) → {labeled_path}")
458
  except Exception as e:
459
  logger.error(f"❌ UL labeling failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
  # 10) Plots
461
  _plot_corr(df, norm_ts)
462
  _plot_trend(df, norm_ts)
@@ -528,28 +565,56 @@ def health():
528
  def models_status():
529
  """Check if models are loaded and available"""
530
  try:
531
- model_dir = pathlib.Path(os.getenv("MODEL_DIR", "/app/models/ul"))
532
- required_files = ["label_encoder_ul.pkl", "scaler_ul.pkl", "xgb_drivestyle_ul.pkl"]
 
533
 
534
- available_files = []
535
- missing_files = []
536
 
537
- for file in required_files:
538
- file_path = model_dir / file
539
  if file_path.exists():
540
- available_files.append(file)
541
  else:
542
- missing_files.append(file)
543
 
544
- status = "ready" if len(available_files) == len(required_files) else "loading"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
545
 
546
  return {
547
- "status": status,
548
- "model_directory": str(model_dir),
549
- "available_files": available_files,
550
- "missing_files": missing_files,
551
- "total_files": len(required_files),
552
- "loaded_files": len(available_files)
 
 
 
 
 
 
 
 
 
 
 
553
  }
554
  except Exception as e:
555
  return {
@@ -564,6 +629,17 @@ def models_status():
564
  def get_events():
565
  return PIPELINE_EVENTS
566
 
 
 
 
 
 
 
 
 
 
 
 
567
 
568
  # ────── Delete event from dashboard ──────────────
569
  @app.delete("/events/remove/{timestamp}")
@@ -845,7 +921,7 @@ async def get_latest_model_version():
845
  Get the latest model version information for the UI.
846
  """
847
  try:
848
- from utils.download import get_latest_version
849
 
850
  # Get the latest version from Hugging Face
851
  latest_version = get_latest_version()
@@ -872,4 +948,4 @@ async def get_latest_model_version():
872
  raise HTTPException(
873
  status_code=500,
874
  detail=f"Failed to get latest model version: {str(e)}"
875
- )
 
15
  import matplotlib.pyplot as plt
16
  import seaborn as sns
17
  from sklearn.preprocessing import MinMaxScaler, StandardScaler
 
18
  # Utils
19
+ from sklearn.impute import KNNImputer
20
  import os, datetime, json, logging, re
21
  from datetime import timedelta
22
  import pathlib
 
29
  from data.firebase_saver import FirebaseSaver, save_csv_increment, save_dataframe_increment
30
 
31
  # UL Model
32
+ from utils.dbehavior_labeler import ULLabeler
33
+
34
+ # Fuel Efficiency Model
35
+ from utils.efficiency_labeler import EfficiencyLabeler
36
 
37
  # RLHF Training
38
  from train import RLHFTrainer
 
61
  os.makedirs(PLOT_DIR, exist_ok=True)
62
 
63
  DRIVE_STYLE = [] # latest UL predictions (string labels) — overwritten each run
64
+ FUEL_EFFICIENCY = [] # latest fuel efficiency predictions (0-100%) — overwritten each run
65
 
66
  # Init temp empty file
67
  if not os.path.exists(RAW_CSV):
 
82
  """Download models on app startup"""
83
  try:
84
  logger.info("🚀 Starting model download...")
85
+ from utils.dbehavior_download import download_latest_models
86
 
87
  # Load .env file if it exists
88
  env_path = pathlib.Path(".env")
 
100
  if success:
101
  logger.info("✅ Models downloaded successfully on startup")
102
  else:
103
+ logger.warning("⚠️ Driver behavior model download failed - some features may not work")
104
+
105
+ # Download fuel efficiency models
106
+ from utils.efficiency_download import download_latest_efficiency_models
107
+ success_efficiency = download_latest_efficiency_models()
108
+ if success_efficiency:
109
+ logger.info("✅ Fuel efficiency models downloaded successfully")
110
+ else:
111
+ logger.warning("⚠️ Fuel efficiency model download failed - some features may not work")
112
+
113
+ if success_ul or success_efficiency:
114
+ logger.info("✅ At least one model type downloaded successfully")
115
+ else:
116
+ logger.warning("⚠️ All model downloads failed - some features may not work")
117
 
118
  except Exception as e:
119
  logger.error(f"❌ Startup model download failed: {e}")
 
474
  logger.info(f"✅ UL labels generated ({len(DRIVE_STYLE)}) → {labeled_path}")
475
  except Exception as e:
476
  logger.error(f"❌ UL labeling failed: {e}")
477
+
478
+ # 9.5) Fuel efficiency predictions
479
+ efficiency_path = None
480
+ try:
481
+ efficiency_labeler = EfficiencyLabeler.get()
482
+ efficiency_preds = efficiency_labeler.predict_df(df)
483
+ # update global FUEL_EFFICIENCY (overwrite if already exists)
484
+ global FUEL_EFFICIENCY
485
+ FUEL_EFFICIENCY = [float(p) for p in efficiency_preds]
486
+ # write efficiency CSV (fuel_efficiency column)
487
+ df_efficiency = df_for_persist.copy()
488
+ df_efficiency["fuel_efficiency"] = FUEL_EFFICIENCY
489
+ efficiency_path = os.path.join(CLEANED_DIR, f"cleaned_{norm_ts}_efficiency.csv")
490
+ df_efficiency.to_csv(efficiency_path, index=False)
491
+ df_for_persist = df_efficiency
492
+ # Update the global FUEL_EFFICIENCY list
493
+ logger.info(f"✅ Fuel efficiency scores generated ({len(FUEL_EFFICIENCY)}) → {efficiency_path}")
494
+ logger.info(f"📊 Drive efficiency: {FUEL_EFFICIENCY[0]:.1f}%" if FUEL_EFFICIENCY else "No efficiency score")
495
+ except Exception as e:
496
+ logger.error(f"❌ Fuel efficiency scoring failed: {e}")
497
  # 10) Plots
498
  _plot_corr(df, norm_ts)
499
  _plot_trend(df, norm_ts)
 
565
  def models_status():
566
  """Check if models are loaded and available"""
567
  try:
568
+ # Driver behavior model status
569
+ ul_model_dir = pathlib.Path(os.getenv("MODEL_DIR", "/app/models/ul"))
570
+ ul_required_files = ["label_encoder_ul.pkl", "scaler_ul.pkl", "xgb_drivestyle_ul.pkl"]
571
 
572
+ ul_available_files = []
573
+ ul_missing_files = []
574
 
575
+ for file in ul_required_files:
576
+ file_path = ul_model_dir / file
577
  if file_path.exists():
578
+ ul_available_files.append(file)
579
  else:
580
+ ul_missing_files.append(file)
581
 
582
+ ul_status = "ready" if len(ul_available_files) == len(ul_required_files) else "loading"
583
+
584
+ # Fuel efficiency model status
585
+ efficiency_model_dir = pathlib.Path(os.getenv("EFFICIENCY_MODEL_DIR", "/app/models/efficiency"))
586
+ efficiency_required_files = ["efficiency_model.joblib"]
587
+
588
+ efficiency_available_files = []
589
+ efficiency_missing_files = []
590
+
591
+ for file in efficiency_required_files:
592
+ file_path = efficiency_model_dir / file
593
+ if file_path.exists():
594
+ efficiency_available_files.append(file)
595
+ else:
596
+ efficiency_missing_files.append(file)
597
+
598
+ efficiency_status = "ready" if len(efficiency_available_files) == len(efficiency_required_files) else "loading"
599
 
600
  return {
601
+ "driver_behavior": {
602
+ "status": ul_status,
603
+ "model_directory": str(ul_model_dir),
604
+ "available_files": ul_available_files,
605
+ "missing_files": ul_missing_files,
606
+ "total_files": len(ul_required_files),
607
+ "loaded_files": len(ul_available_files)
608
+ },
609
+ "fuel_efficiency": {
610
+ "status": efficiency_status,
611
+ "model_directory": str(efficiency_model_dir),
612
+ "available_files": efficiency_available_files,
613
+ "missing_files": efficiency_missing_files,
614
+ "total_files": len(efficiency_required_files),
615
+ "loaded_files": len(efficiency_available_files)
616
+ },
617
+ "overall_status": "ready" if (ul_status == "ready" and efficiency_status == "ready") else "loading"
618
  }
619
  except Exception as e:
620
  return {
 
629
  def get_events():
630
  return PIPELINE_EVENTS
631
 
632
+ @app.get("/predictions/latest")
633
+ def get_latest_predictions():
634
+ """Get the latest driver behavior and fuel efficiency predictions"""
635
+ return {
636
+ "driver_behavior": DRIVE_STYLE,
637
+ "fuel_efficiency": FUEL_EFFICIENCY,
638
+ "timestamp": datetime.datetime.now().isoformat(),
639
+ "driver_behavior_count": len(DRIVE_STYLE),
640
+ "fuel_efficiency_count": len(FUEL_EFFICIENCY)
641
+ }
642
+
643
 
644
  # ────── Delete event from dashboard ──────────────
645
  @app.delete("/events/remove/{timestamp}")
 
921
  Get the latest model version information for the UI.
922
  """
923
  try:
924
+ from utils.dbehavior_download import get_latest_version
925
 
926
  # Get the latest version from Hugging Face
927
  latest_version = get_latest_version()
 
948
  raise HTTPException(
949
  status_code=500,
950
  detail=f"Failed to get latest model version: {str(e)}"
951
+ )
bulk_mongo_upload.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Bulk MongoDB Upload Script for Fuel Efficiency Data
4
+ Processes all pending CSV files and uploads them to MongoDB when WiFi is available.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import glob
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+
13
+ # Load environment variables from .env file
14
+ try:
15
+ from dotenv import load_dotenv
16
+ load_dotenv()
17
+ except ImportError:
18
+ print("⚠️ python-dotenv not installed. Using system environment variables only.")
19
+ print(" Install with: pip install python-dotenv")
20
+
21
+ # Add parent directory to path to import mongo_saver
22
+ current_dir = os.path.dirname(__file__)
23
+ sys.path.append(current_dir)
24
+
25
+ from mongo_saver import save_csv_to_mongo
26
+
27
+ def check_mongodb_config():
28
+ """Check if MongoDB configuration is available."""
29
+ mongo_uri = os.getenv("MONGO_URI")
30
+ if not mongo_uri:
31
+ print("Error: MONGO_URI not found in .env file")
32
+ return False
33
+
34
+ print(f"MongoDB URI configured)")
35
+ return True
36
+
37
+ def find_pending_csv_files(logs_dir):
38
+ """Find all OBD CSV files that haven't been uploaded yet."""
39
+ fuel_logs_dir = os.path.join(logs_dir, "FuelLogs")
40
+
41
+ if not os.path.exists(fuel_logs_dir):
42
+ print(f"FuelLogs directory not found: {fuel_logs_dir}")
43
+ return []
44
+
45
+ # Find all CSV files matching our naming pattern
46
+ pattern = os.path.join(fuel_logs_dir, "obd_data_log_*.csv")
47
+ csv_files = glob.glob(pattern)
48
+
49
+ # Sort by modification time (newest first)
50
+ csv_files.sort(key=os.path.getmtime, reverse=True)
51
+
52
+ print(f"Found {len(csv_files)} fuel efficiency CSV files to process")
53
+ return csv_files
54
+
55
+ def create_session_id_from_filename(csv_filepath):
56
+ """Generate a session ID from the CSV filename."""
57
+ filename = os.path.basename(csv_filepath)
58
+ # Convert obd_data_log_20231201_120000.csv -> fuel_efficiency_20231201_120000
59
+ session_id = filename.replace('obd_data_log_', 'fuel_efficiency_').replace('.csv', '')
60
+ return session_id
61
+
62
+ def upload_csv_files_to_mongo(csv_files, max_uploads=None):
63
+ if not csv_files:
64
+ print("No CSV files to upload")
65
+ return
66
+
67
+ # Limit uploads if specified
68
+ if max_uploads:
69
+ csv_files = csv_files[:max_uploads]
70
+ print(f"Limiting upload to {max_uploads} files for this batch")
71
+
72
+ upload_stats = {
73
+ 'successful': 0,
74
+ 'failed': 0,
75
+ 'total': len(csv_files)
76
+ }
77
+
78
+ print(f"Starting bulk upload of {len(csv_files)} fuel efficiency sessions...")
79
+ print("=" * 60)
80
+
81
+ for i, csv_file in enumerate(csv_files, 1):
82
+ try:
83
+ # Generate session ID
84
+ session_id = create_session_id_from_filename(csv_file)
85
+ filename = os.path.basename(csv_file)
86
+
87
+ print(f"[{i}/{len(csv_files)}] Processing: {filename}")
88
+ print(f" Session ID: {session_id}")
89
+
90
+ success = save_csv_to_mongo(csv_file, session_id)
91
+
92
+ if success:
93
+ upload_stats['successful'] += 1
94
+ print(f"Upload successful")
95
+
96
+ move_to_processed_folder(csv_file)
97
+
98
+ else:
99
+ upload_stats['failed'] += 1
100
+ print(f"Upload failed")
101
+
102
+ except Exception as e:
103
+ upload_stats['failed'] += 1
104
+ print(f"Error processing {filename}: {e}")
105
+
106
+ print("-" * 40)
107
+
108
+ # Print summary
109
+ print("=" * 60)
110
+ print("BULK UPLOAD SUMMARY")
111
+ print(f"Successful uploads: {upload_stats['successful']}")
112
+ print(f"Failed uploads: {upload_stats['failed']}")
113
+ print(f"Total processed: {upload_stats['total']}")
114
+
115
+ success_rate = (upload_stats['successful'] / upload_stats['total']) * 100 if upload_stats['total'] > 0 else 0
116
+ print(f"Success rate: {success_rate:.1f}%")
117
+
118
+ def move_to_processed_folder(csv_file):
119
+ """Move successfully uploaded CSV to a 'processed' folder."""
120
+ try:
121
+ # Create processed folder if it doesn't exist
122
+ processed_dir = os.path.join(os.path.dirname(csv_file), "processed")
123
+ os.makedirs(processed_dir, exist_ok=True)
124
+
125
+ # Move file
126
+ filename = os.path.basename(csv_file)
127
+ new_path = os.path.join(processed_dir, filename)
128
+ os.rename(csv_file, new_path)
129
+ print(f"Moved to processed folder: {filename}")
130
+
131
+ except Exception as e:
132
+ print(f"Could not move file to processed folder: {e}")
133
+
134
+ def main():
135
+ """Main function to run bulk upload."""
136
+ print("Fuel Efficiency Data - Bulk MongoDB Upload")
137
+ print(f"Started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
138
+ print("=" * 60)
139
+
140
+ # Check MongoDB configuration first
141
+ if not check_mongodb_config():
142
+ return
143
+
144
+ # Find logs directory (relative to script location)
145
+ logs_dir = os.path.join(current_dir, "..", "logs")
146
+ logs_dir = os.path.abspath(logs_dir)
147
+
148
+ print(f"Searching for CSV files in: {logs_dir}")
149
+
150
+ # Find pending CSV files
151
+ csv_files = find_pending_csv_files(logs_dir)
152
+
153
+ if not csv_files:
154
+ print("No pending CSV files to upload - all caught up!")
155
+ return
156
+
157
+ # Show files to be processed
158
+ print("\nFiles to upload:")
159
+ for i, csv_file in enumerate(csv_files[:10], 1): # Show first 10
160
+ filename = os.path.basename(csv_file)
161
+ mod_time = datetime.fromtimestamp(os.path.getmtime(csv_file))
162
+ print(f" {i}. {filename} (modified: {mod_time.strftime('%Y-%m-%d %H:%M')})")
163
+
164
+ if len(csv_files) > 10:
165
+ print(f" ... and {len(csv_files) - 10} more files")
166
+
167
+ # Confirm upload
168
+ print(f"\nUpload {len(csv_files)} fuel efficiency sessions to MongoDB? (y/n): ", end="")
169
+ response = input().strip().lower()
170
+
171
+ if response not in ['y', 'yes']:
172
+ print("Upload cancelled by user")
173
+ return
174
+
175
+ # Perform bulk upload
176
+ upload_csv_files_to_mongo(csv_files)
177
+
178
+ print(f"\nBulk upload completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
179
+
180
+ if __name__ == "__main__":
181
+ main()
data/mongo_saver.py CHANGED
@@ -53,7 +53,8 @@ class MongoSaver:
53
  self.mongo_uri,
54
  serverSelectionTimeoutMS=5000, # 5 second timeout
55
  connectTimeoutMS=10000, # 10 second connection timeout
56
- socketTimeoutMS=10000 # 10 second socket timeout
 
57
  )
58
 
59
  # Test connection
 
53
  self.mongo_uri,
54
  serverSelectionTimeoutMS=5000, # 5 second timeout
55
  connectTimeoutMS=10000, # 10 second connection timeout
56
+ socketTimeoutMS=10000, # 10 second socket timeout
57
+ tlsAllowInvalidCertificates=True # Fix for SSL certificate issues on macOS
58
  )
59
 
60
  # Test connection
efficiency/eval.py ADDED
@@ -0,0 +1,458 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Fuel Efficiency Model Evaluation Script
3
+ Integration-ready evaluation script for fuel efficiency scoring in the main pipeline
4
+ Based on the original eval.py but reformatted for system integration
5
+ """
6
+
7
+ import os
8
+ import glob
9
+ import joblib
10
+ import logging
11
+ import numpy as np
12
+ import pandas as pd
13
+ from pathlib import Path
14
+ from typing import List, Dict, Any, Optional, Tuple
15
+ from datetime import datetime
16
+
17
+ logger = logging.getLogger("efficiency-eval")
18
+ logger.setLevel(logging.INFO)
19
+ if not logger.handlers:
20
+ handler = logging.StreamHandler()
21
+ handler.setFormatter(logging.Formatter("[%(levelname)s] %(asctime)s - %(message)s"))
22
+ logger.addHandler(handler)
23
+
24
+ # Constants
25
+ KMH_TO_MS = 1000.0/3600.0
26
+
27
+ class EfficiencyEvaluator:
28
+ """
29
+ Fuel efficiency evaluator for OBD data using trained model.
30
+ Provides drive-level efficiency scoring for integration into main pipeline.
31
+ """
32
+
33
+ def __init__(self, model_path: Optional[str] = None):
34
+ """
35
+ Initialize the evaluator.
36
+
37
+ Args:
38
+ model_path: Path to the trained model. If None, will try to load from default location.
39
+ """
40
+ self.model_path = model_path or self._find_model_path()
41
+ self.model_artifacts = None
42
+ self.metadata = None
43
+ self._load_model()
44
+
45
+ def _find_model_path(self) -> str:
46
+ """Find the model path from various possible locations"""
47
+ possible_paths = [
48
+ "./efficiency_export/efficiency_model.joblib",
49
+ "/app/models/efficiency/efficiency_model.joblib",
50
+ "./efficiency_model.joblib"
51
+ ]
52
+
53
+ for path in possible_paths:
54
+ if os.path.exists(path):
55
+ logger.info(f"📁 Found model at: {path}")
56
+ return path
57
+
58
+ # Try to download from Hugging Face
59
+ logger.warning("⚠️ Model not found locally, attempting download...")
60
+ try:
61
+ from utils.efficiency_download import download_latest_efficiency_models
62
+ success = download_latest_efficiency_models()
63
+ if success:
64
+ return "/app/models/efficiency/efficiency_model.joblib"
65
+ except Exception as e:
66
+ logger.error(f"❌ Failed to download model: {e}")
67
+
68
+ raise FileNotFoundError("Could not find or download efficiency model")
69
+
70
+ def _load_model(self):
71
+ """Load the efficiency model and metadata"""
72
+ try:
73
+ logger.info(f"📥 Loading efficiency model from: {self.model_path}")
74
+
75
+ # Load model artifacts
76
+ self.model_artifacts = joblib.load(self.model_path)
77
+
78
+ # Load metadata if available
79
+ meta_path = self.model_path.replace("efficiency_model.joblib", "efficiency_meta.json")
80
+ if os.path.exists(meta_path):
81
+ import json
82
+ with open(meta_path, 'r') as f:
83
+ self.metadata = json.load(f)
84
+
85
+ logger.info(f"✅ Model loaded | kind: {self.model_artifacts.get('model_kind', 'unknown')}")
86
+ logger.info(f"📊 Features: {len(self.model_artifacts.get('feature_names', []))}")
87
+
88
+ if self.metadata:
89
+ logger.info(f"📅 Training date: {self.metadata.get('training_date', 'unknown')}")
90
+ logger.info(f"📈 OOF MAE: {self.metadata.get('oof_stats', {}).get('oof_mae_qmap', 'unknown')}")
91
+
92
+ except Exception as e:
93
+ logger.error(f"❌ Error loading model: {e}")
94
+ raise
95
+
96
+ def _ensure_dt(self, s):
97
+ """Ensure datetime conversion"""
98
+ return pd.to_datetime(s, errors="coerce")
99
+
100
+ def _infer_base_interval_seconds(self, ts, fallback=1.0):
101
+ """Infer base interval from timestamps"""
102
+ ts = pd.to_datetime(ts, errors="coerce")
103
+ dt = ts.diff().dt.total_seconds().dropna()
104
+ med = float(np.nanmedian(dt)) if len(dt) else fallback
105
+ return fallback if (not np.isfinite(med) or med <= 0) else med
106
+
107
+ def _rows_for(self, seconds, base_sec):
108
+ """Calculate number of rows for given time window"""
109
+ return max(3, int(round(seconds / max(1e-3, base_sec))))
110
+
111
+ def _add_basic_derivatives(self, d):
112
+ """Add basic derivatives (acceleration, jerk, distance)"""
113
+ d = d.copy()
114
+ d["timestamp"] = self._ensure_dt(d["timestamp"])
115
+ d = d.dropna(subset=["timestamp"]).sort_values("timestamp")
116
+ base = self._infer_base_interval_seconds(d["timestamp"], 1.0)
117
+
118
+ # Convert numeric columns
119
+ for c in ["SPEED","RPM","MAF","ENGINE_LOAD","THROTTLE_POS"]:
120
+ if c in d.columns:
121
+ d[c] = pd.to_numeric(d[c], errors="coerce")
122
+
123
+ # Convert speed to m/s
124
+ if "SPEED_ms" not in d.columns:
125
+ d["SPEED_ms"] = (d["SPEED"] * KMH_TO_MS) if "SPEED" in d.columns else np.nan
126
+
127
+ # Calculate derivatives
128
+ d["ACCEL"] = d["SPEED_ms"].diff()/max(base,1e-3)
129
+ d["JERK"] = d["ACCEL"].diff()/max(base,1e-3)
130
+
131
+ # Calculate distance
132
+ dt = d["timestamp"].diff().dt.total_seconds().fillna(0).clip(lower=0, upper=10*base)
133
+ d["dist_m"] = d["SPEED_ms"] * dt
134
+
135
+ return d
136
+
137
+ def _idle_rule(self, d, thr):
138
+ """Apply idle detection rule"""
139
+ speed_low = (d["SPEED_ms"].abs() <= thr.get("SPEED_IDLE_MPS", 0.6))
140
+ thr_low = (d["THROTTLE_POS"] <= thr.get("THR_LOW_Q10", 0.0)) if "THROTTLE_POS" in d else True
141
+ load_low = (d["ENGINE_LOAD"] <= thr.get("LOAD_LOW_Q15", 0.0)) if "ENGINE_LOAD" in d else True
142
+ maf_low = (d["MAF"] <= thr.get("MAF_LOW_Q10", 0.0)) if "MAF" in d else True
143
+ accel_low = (d["ACCEL"].abs() <= thr.get("ACCEL_LOW_Q20", 0.0))
144
+
145
+ mask = (speed_low & thr_low & load_low & maf_low & accel_low).astype(int)
146
+ k = 5
147
+ return (mask.rolling(k, center=True, min_periods=1).median().round().astype(bool)
148
+ if len(mask) >= k else mask.astype(bool))
149
+
150
+ def _sharp_mask_from_thresholds(self, d, thr):
151
+ """Detect sharp acceleration/deceleration events"""
152
+ thr_a = thr.get("ACCEL_HIGH_Q85",
153
+ np.nanquantile(d["ACCEL"].abs().dropna(), 0.85) if d["ACCEL"].notna().any() else 0.3)
154
+ thr_j = thr.get("JERK_HIGH_Q90",
155
+ np.nanquantile(d["JERK"].abs().dropna(), 0.90) if d["JERK"].notna().any() else 0.5)
156
+ return (d["ACCEL"].abs() > thr_a) | (d["JERK"].abs() > thr_j)
157
+
158
+ def _q(self, s, p):
159
+ """Quantile helper function"""
160
+ s = pd.to_numeric(s, errors="coerce")
161
+ return float(np.nanquantile(s, p)) if s.notna().any() else 0.0
162
+
163
+ def _agg_for_ml_drive(self, g, thr):
164
+ """Aggregate drive-level features for ML model"""
165
+ g = self._add_basic_derivatives(g.copy())
166
+ base = self._infer_base_interval_seconds(g["timestamp"], 1.0)
167
+ g["IDLE_RULE"] = self._idle_rule(g, thr)
168
+
169
+ dt = g["timestamp"].diff().dt.total_seconds().fillna(0).clip(lower=0, upper=10*base)
170
+ T = float(dt.sum())
171
+ mins = max(1e-6, T/60)
172
+
173
+ sharp = self._sharp_mask_from_thresholds(g, thr).values
174
+ edges = np.flatnonzero(np.diff(np.r_[False, sharp, False]))
175
+ sharp_freq_pm = (len(edges)//2)/mins
176
+
177
+ rpm90, maf90 = thr.get("RPM90", np.nan), thr.get("MAF90", np.nan)
178
+ frac_rpm90 = float((g["RPM"] >= rpm90).mean()) if ("RPM" in g and np.isfinite(rpm90)) else 0.0
179
+ frac_maf90 = float((g["MAF"] >= maf90).mean()) if ("MAF" in g and np.isfinite(maf90)) else 0.0
180
+
181
+ W10 = self._rows_for(10, base)
182
+ speed_cv = float((g["SPEED_ms"].rolling(W10,1).std()/(g["SPEED_ms"].rolling(W10,1).mean()+1e-6)).mean())
183
+
184
+ return {
185
+ "duration_min": max(1e-6, T/60),
186
+ "distance_km": g["dist_m"].sum()/1000.0,
187
+ "speed_mean": float(g["SPEED_ms"].mean()),
188
+ "speed_q90": self._q(g["SPEED_ms"], 0.90),
189
+ "speed_cv": speed_cv,
190
+ "accel_q90": self._q(g["ACCEL"].abs(), 0.90),
191
+ "jerk_q90": self._q(g["JERK"].abs(), 0.90),
192
+ "sharp_freq_pm": sharp_freq_pm,
193
+ "idle_frac": float(g["IDLE_RULE"].mean()),
194
+ "idle_epm": (len(np.flatnonzero(np.diff(np.r_[False, g['IDLE_RULE'].values, False])))//2)/mins,
195
+ "rpm_q90": self._q(g["RPM"], 0.90) if "RPM" in g else 0.0,
196
+ "maf_q90": self._q(g["MAF"], 0.90) if "MAF" in g else 0.0,
197
+ "load_q85": self._q(g["ENGINE_LOAD"], 0.85) if "ENGINE_LOAD" in g else 0.0,
198
+ "thr_q85": self._q(g["THROTTLE_POS"], 0.85) if "THROTTLE_POS" in g else 0.0,
199
+ "frac_rpm90": frac_rpm90,
200
+ "frac_maf90": frac_maf90,
201
+ "fuel_intensity": (self._q(g["RPM"], 0.90)*self._q(g["MAF"], 0.90)) if (("RPM" in g) and ("MAF" in g)) else 0.0
202
+ }
203
+
204
+ def _align_to_schema(self, feats, art):
205
+ """Align features to model schema"""
206
+ x = pd.DataFrame([feats])
207
+ for c in art["feature_names"]:
208
+ if c not in x.columns:
209
+ x[c] = 0.0
210
+ x = x[art["feature_names"]]
211
+ if len(art["num_cols"]):
212
+ x.loc[:, art["num_cols"]] = art["scaler"].transform(x[art["num_cols"]])
213
+ return x
214
+
215
+ def _predict_drive(self, df_drive):
216
+ """Predict efficiency for a single drive"""
217
+ art = self.model_artifacts
218
+ thr = art["thr"]
219
+
220
+ feats = self._agg_for_ml_drive(df_drive, thr)
221
+ x = self._align_to_schema(feats, art)
222
+
223
+ # Get model
224
+ mdl = art["rf"] if art.get("model_kind") == "rf" else art["gbm"]
225
+ raw = float(mdl.predict(x)[0])
226
+
227
+ # Apply quantile-mapping calibration
228
+ if art.get("calib", {}).get("type") == "qmap":
229
+ rq = np.array(art["calib"]["rq"])
230
+ yq = np.array(art["calib"]["yq"])
231
+
232
+ # Ensure strictly increasing rq for stable interpolation
233
+ for i in range(1, len(rq)):
234
+ if rq[i] <= rq[i-1]:
235
+ rq[i] = rq[i-1] + 1e-6
236
+
237
+ pred = float(np.clip(np.interp(raw, rq, yq), 0, 100))
238
+ else:
239
+ pred = float(np.clip(raw, 0, 100))
240
+
241
+ return pred, raw, feats
242
+
243
+ def predict_single_drive(self, df: pd.DataFrame) -> Dict[str, Any]:
244
+ """
245
+ Predict fuel efficiency for a single drive.
246
+
247
+ Args:
248
+ df: DataFrame with OBD data including timestamp, SPEED, RPM, MAF, etc.
249
+
250
+ Returns:
251
+ Dictionary containing efficiency prediction and metadata
252
+ """
253
+ try:
254
+ if self.model_artifacts is None:
255
+ raise RuntimeError("Efficiency model not loaded")
256
+
257
+ if len(df) < 5:
258
+ logger.warning("⚠️ Drive too short for efficiency prediction")
259
+ return {
260
+ "efficiency_score": 0.0,
261
+ "raw_score": 0.0,
262
+ "duration_min": 0.0,
263
+ "distance_km": 0.0,
264
+ "note": "too short",
265
+ "features": {}
266
+ }
267
+
268
+ # Calculate basic drive metrics
269
+ g2 = self._add_basic_derivatives(df[["timestamp","SPEED"]].assign(
270
+ RPM=df.get("RPM"), MAF=df.get("MAF"),
271
+ ENGINE_LOAD=df.get("ENGINE_LOAD"), THROTTLE_POS=df.get("THROTTLE_POS")))
272
+
273
+ dt = g2["timestamp"].diff().dt.total_seconds().fillna(0)
274
+ mins = float(dt.sum())/60.0
275
+ dist_km = float(pd.to_numeric(g2["dist_m"], errors="coerce").fillna(0).sum())/1000.0
276
+
277
+ # Predict efficiency
278
+ efficiency_score, raw_score, features = self._predict_drive(df)
279
+
280
+ logger.info(f"📊 Drive efficiency: {efficiency_score:.1f}% (raw: {raw_score:.3f})")
281
+
282
+ return {
283
+ "efficiency_score": round(efficiency_score, 1),
284
+ "raw_score": round(raw_score, 3),
285
+ "duration_min": round(mins, 2),
286
+ "distance_km": round(dist_km, 3),
287
+ "features": features,
288
+ "timestamp": datetime.now().isoformat()
289
+ }
290
+
291
+ except Exception as e:
292
+ logger.error(f"❌ Error predicting efficiency: {e}")
293
+ return {
294
+ "efficiency_score": 0.0,
295
+ "raw_score": 0.0,
296
+ "duration_min": 0.0,
297
+ "distance_km": 0.0,
298
+ "error": str(e),
299
+ "timestamp": datetime.now().isoformat()
300
+ }
301
+
302
+ def predict_batch(self, csv_files: List[str]) -> pd.DataFrame:
303
+ """
304
+ Predict efficiency for multiple CSV files (batch processing).
305
+
306
+ Args:
307
+ csv_files: List of CSV file paths
308
+
309
+ Returns:
310
+ DataFrame with predictions for each file
311
+ """
312
+ logger.info(f"📊 Processing {len(csv_files)} CSV files...")
313
+
314
+ rows = []
315
+ for i, csv_path in enumerate(csv_files, start=1):
316
+ try:
317
+ # Load CSV
318
+ df = pd.read_csv(csv_path)
319
+ df["source_file"] = os.path.basename(csv_path)
320
+ df["drive_id"] = i
321
+ df["timestamp"] = self._ensure_dt(df["timestamp"])
322
+ df = df.dropna(subset=["timestamp"]).sort_values("timestamp")
323
+
324
+ if len(df) < 5:
325
+ rows.append({
326
+ "source_file": os.path.basename(csv_path),
327
+ "drive_id": i,
328
+ "duration_min": np.nan,
329
+ "distance_km": np.nan,
330
+ "pred_efficiency_ml": np.nan,
331
+ "raw": np.nan,
332
+ "note": "too short"
333
+ })
334
+ continue
335
+
336
+ # Predict efficiency
337
+ result = self.predict_single_drive(df)
338
+
339
+ rows.append({
340
+ "source_file": os.path.basename(csv_path),
341
+ "drive_id": i,
342
+ "duration_min": result["duration_min"],
343
+ "distance_km": result["distance_km"],
344
+ "pred_efficiency_ml": result["efficiency_score"],
345
+ "raw": result["raw_score"]
346
+ })
347
+
348
+ except Exception as e:
349
+ logger.error(f"❌ Error processing {csv_path}: {e}")
350
+ rows.append({
351
+ "source_file": os.path.basename(csv_path),
352
+ "drive_id": i,
353
+ "duration_min": np.nan,
354
+ "distance_km": np.nan,
355
+ "pred_efficiency_ml": np.nan,
356
+ "raw": np.nan,
357
+ "error": str(e)
358
+ })
359
+
360
+ pred_df = pd.DataFrame(rows).sort_values("drive_id").reset_index(drop=True)
361
+
362
+ # Calculate statistics
363
+ valid_preds = pred_df["pred_efficiency_ml"].dropna()
364
+ if len(valid_preds) > 0:
365
+ logger.info(f"📊 Batch results: {len(valid_preds)} valid predictions")
366
+ logger.info(f"📈 Efficiency range: {valid_preds.min():.1f}% - {valid_preds.max():.1f}%")
367
+ logger.info(f"📊 Mean efficiency: {valid_preds.mean():.1f}%")
368
+ logger.info(f"📊 Std efficiency: {valid_preds.std():.1f}%")
369
+
370
+ return pred_df
371
+
372
+ def get_model_info(self) -> Dict[str, Any]:
373
+ """Get information about the loaded model"""
374
+ if self.model_artifacts is None:
375
+ return {"error": "Model not loaded"}
376
+
377
+ return {
378
+ "model_kind": self.model_artifacts.get("model_kind", "unknown"),
379
+ "feature_count": len(self.model_artifacts.get("feature_names", [])),
380
+ "features": self.model_artifacts.get("feature_names", []),
381
+ "calibration_type": self.model_artifacts.get("calib", {}).get("type", "none"),
382
+ "oof_stats": self.model_artifacts.get("oof_stats", {}),
383
+ "metadata": self.metadata,
384
+ "model_path": self.model_path
385
+ }
386
+
387
+ def evaluate_csv_files(csv_directory: str = "./") -> pd.DataFrame:
388
+ """
389
+ Convenience function to evaluate all CSV files in a directory.
390
+
391
+ Args:
392
+ csv_directory: Directory containing CSV files
393
+
394
+ Returns:
395
+ DataFrame with efficiency predictions
396
+ """
397
+ # Find CSV files
398
+ csv_patterns = [
399
+ os.path.join(csv_directory, "*.csv"),
400
+ os.path.join("/content", "*.csv") # For Colab compatibility
401
+ ]
402
+
403
+ csv_files = []
404
+ for pattern in csv_patterns:
405
+ csv_files.extend(glob.glob(pattern))
406
+
407
+ csv_files = sorted([p for p in csv_files if os.path.isfile(p)])
408
+
409
+ if not csv_files:
410
+ logger.warning("⚠️ No CSV files found")
411
+ return pd.DataFrame()
412
+
413
+ # Initialize evaluator and process files
414
+ evaluator = EfficiencyEvaluator()
415
+ return evaluator.predict_batch(csv_files)
416
+
417
+ def main():
418
+ """Main function for command-line usage"""
419
+ import argparse
420
+
421
+ parser = argparse.ArgumentParser(description="Evaluate fuel efficiency model")
422
+ parser.add_argument("--csv-dir", default="./", help="Directory containing CSV files")
423
+ parser.add_argument("--model-path", help="Path to efficiency model file")
424
+ parser.add_argument("--output", help="Output CSV file path")
425
+
426
+ args = parser.parse_args()
427
+
428
+ try:
429
+ # Initialize evaluator
430
+ evaluator = EfficiencyEvaluator(model_path=args.model_path)
431
+
432
+ # Print model info
433
+ info = evaluator.get_model_info()
434
+ print(f"📊 Model info: {info}")
435
+
436
+ # Evaluate CSV files
437
+ results_df = evaluate_csv_files(args.csv_dir)
438
+
439
+ if len(results_df) > 0:
440
+ print("\n=== Batch Efficiency Scores (per CSV / drive) ===")
441
+ print(results_df.to_string(index=False))
442
+
443
+ # Save results if output path specified
444
+ if args.output:
445
+ results_df.to_csv(args.output, index=False)
446
+ print(f"\n💾 Results saved to: {args.output}")
447
+ else:
448
+ print("❌ No valid CSV files found for evaluation")
449
+ return 1
450
+
451
+ return 0
452
+
453
+ except Exception as e:
454
+ print(f"❌ Evaluation failed: {e}")
455
+ return 1
456
+
457
+ if __name__ == "__main__":
458
+ exit(main())
efficiency/retrain.py ADDED
@@ -0,0 +1,698 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Fuel Efficiency Model Retraining Script
3
+ Reproducible training script for fuel efficiency model with Hugging Face integration
4
+ Based on the original retrain.py but reformatted for system integration
5
+ """
6
+
7
+ import os
8
+ import glob
9
+ import json
10
+ import math
11
+ import joblib
12
+ import warnings
13
+ import logging
14
+ import numpy as np
15
+ import pandas as pd
16
+ from pathlib import Path
17
+ from typing import Dict, List, Tuple, Optional, Any
18
+ from datetime import datetime
19
+
20
+ # ML imports
21
+ from sklearn.preprocessing import StandardScaler
22
+ from sklearn.ensemble import HistGradientBoostingRegressor, RandomForestRegressor
23
+ from sklearn.model_selection import GroupKFold
24
+ from sklearn.metrics import mean_absolute_error
25
+ from sklearn.linear_model import Ridge
26
+
27
+ # Hugging Face integration
28
+ from huggingface_hub import HfApi, Repository
29
+
30
+ # Suppress warnings
31
+ warnings.filterwarnings("ignore", category=UserWarning)
32
+
33
+ # Setup logging
34
+ logger = logging.getLogger("efficiency-retrain")
35
+ logger.setLevel(logging.INFO)
36
+ if not logger.handlers:
37
+ handler = logging.StreamHandler()
38
+ handler.setFormatter(logging.Formatter("[%(levelname)s] %(asctime)s - %(message)s"))
39
+ logger.addHandler(handler)
40
+
41
+ # Constants
42
+ SEED = 42
43
+ KMH_TO_MS = 1000.0/3600.0
44
+ np.random.seed(SEED)
45
+
46
+ class EfficiencyModelTrainer:
47
+ """
48
+ Fuel efficiency model trainer with Hugging Face integration.
49
+ Handles data loading, feature engineering, model training, and model upload.
50
+ """
51
+
52
+ def __init__(self,
53
+ csv_directory: str = "./",
54
+ export_directory: str = "./efficiency_export",
55
+ repo_id: str = "BinKhoaLe1812/Fuel_Efficiency_OBD"):
56
+ """
57
+ Initialize the trainer.
58
+
59
+ Args:
60
+ csv_directory: Directory containing CSV files for training
61
+ export_directory: Directory to save trained model artifacts
62
+ repo_id: Hugging Face repository ID for model upload
63
+ """
64
+ self.csv_directory = csv_directory
65
+ self.export_directory = Path(export_directory)
66
+ self.repo_id = repo_id
67
+ self.hf_token = os.getenv("HF_TOKEN")
68
+
69
+ # Create export directory
70
+ self.export_directory.mkdir(parents=True, exist_ok=True)
71
+
72
+ # Initialize HF API if token available
73
+ self.hf_api = None
74
+ if self.hf_token:
75
+ self.hf_api = HfApi(token=self.hf_token)
76
+ logger.info(f"✅ Hugging Face API initialized for {repo_id}")
77
+ else:
78
+ logger.warning("⚠️ HF_TOKEN not set - model will not be uploaded to Hugging Face")
79
+
80
+ def load_training_data(self) -> pd.DataFrame:
81
+ """Load and preprocess training data from CSV files"""
82
+ logger.info("📊 Loading training data...")
83
+
84
+ # Find CSV files
85
+ csv_patterns = [
86
+ os.path.join(self.csv_directory, "*.csv"),
87
+ os.path.join("/content", "*.csv") # For Colab compatibility
88
+ ]
89
+
90
+ csvs = []
91
+ for pattern in csv_patterns:
92
+ csvs.extend(glob.glob(pattern))
93
+
94
+ csvs = sorted([p for p in csvs if os.path.isfile(p)])
95
+
96
+ if not csvs:
97
+ raise RuntimeError("No CSV logs found for training")
98
+
99
+ logger.info(f"📁 Found {len(csvs)} CSV files")
100
+
101
+ # Load and combine CSV files
102
+ frames = []
103
+ for i, p in enumerate(csvs, start=1):
104
+ try:
105
+ d = pd.read_csv(p)
106
+ d["source_file"] = os.path.basename(p)
107
+ d["drive_id"] = i
108
+ frames.append(d)
109
+ logger.info(f"✅ Loaded {os.path.basename(p)} ({len(d)} rows)")
110
+ except Exception as e:
111
+ logger.warning(f"⚠️ Failed to load {p}: {e}")
112
+
113
+ if not frames:
114
+ raise RuntimeError("No valid CSV files could be loaded")
115
+
116
+ # Combine all data
117
+ df = pd.concat(frames, ignore_index=True)
118
+ df["timestamp"] = self._ensure_dt(df["timestamp"])
119
+ df = df.dropna(subset=["timestamp"]).sort_values(["drive_id", "timestamp"]).reset_index(drop=True)
120
+ df = self._add_basic_derivatives(df)
121
+
122
+ logger.info(f"📊 Combined dataset: {len(df)} rows, {df['drive_id'].nunique()} drives")
123
+ return df
124
+
125
+ def _ensure_dt(self, s):
126
+ """Ensure datetime conversion"""
127
+ return pd.to_datetime(s, errors="coerce")
128
+
129
+ def _infer_base_interval_seconds(self, ts, fallback=1.0):
130
+ """Infer base interval from timestamps"""
131
+ ts = pd.to_datetime(ts, errors="coerce")
132
+ dt = ts.diff().dt.total_seconds().dropna()
133
+ med = float(np.nanmedian(dt)) if len(dt) else fallback
134
+ return fallback if (not np.isfinite(med) or med <= 0) else med
135
+
136
+ def _rows_for(self, seconds, base_sec):
137
+ """Calculate number of rows for given time window"""
138
+ return max(3, int(round(seconds / max(1e-3, base_sec))))
139
+
140
+ def _add_basic_derivatives(self, d):
141
+ """Add basic derivatives (acceleration, jerk, distance)"""
142
+ d = d.copy()
143
+ d["timestamp"] = self._ensure_dt(d["timestamp"])
144
+ d = d.dropna(subset=["timestamp"]).sort_values("timestamp")
145
+ base = self._infer_base_interval_seconds(d["timestamp"], 1.0)
146
+
147
+ # Convert numeric columns
148
+ for c in ["SPEED","RPM","MAF","ENGINE_LOAD","THROTTLE_POS"]:
149
+ if c in d.columns:
150
+ d[c] = pd.to_numeric(d[c], errors="coerce")
151
+
152
+ # Convert speed to m/s
153
+ if "SPEED_ms" not in d.columns:
154
+ d["SPEED_ms"] = (d["SPEED"] * KMH_TO_MS) if "SPEED" in d.columns else np.nan
155
+
156
+ # Calculate derivatives
157
+ d["ACCEL"] = d["SPEED_ms"].diff()/max(base,1e-3)
158
+ d["JERK"] = d["ACCEL"].diff()/max(base,1e-3)
159
+
160
+ # Calculate distance
161
+ dt = d["timestamp"].diff().dt.total_seconds().fillna(0).clip(lower=0, upper=10*base)
162
+ d["dist_m"] = d["SPEED_ms"] * dt
163
+
164
+ return d
165
+
166
+ def _idle_rule(self, d, thr):
167
+ """Apply idle detection rule"""
168
+ speed_low = (d["SPEED_ms"].abs() <= thr.get("SPEED_IDLE_MPS", 0.6))
169
+ thr_low = (d["THROTTLE_POS"] <= thr.get("THR_LOW_Q10", 0.0)) if "THROTTLE_POS" in d else True
170
+ load_low = (d["ENGINE_LOAD"] <= thr.get("LOAD_LOW_Q15", 0.0)) if "ENGINE_LOAD" in d else True
171
+ maf_low = (d["MAF"] <= thr.get("MAF_LOW_Q10", 0.0)) if "MAF" in d else True
172
+ accel_low = (d["ACCEL"].abs() <= thr.get("ACCEL_LOW_Q20", 0.0))
173
+
174
+ mask = (speed_low & thr_low & load_low & maf_low & accel_low).astype(int)
175
+ k = 5
176
+ return (mask.rolling(k, center=True, min_periods=1).median().round().astype(bool)
177
+ if len(mask) >= k else mask.astype(bool))
178
+
179
+ def _sharp_mask_from_thresholds(self, d, thr):
180
+ """Detect sharp acceleration/deceleration events"""
181
+ thr_a = thr.get("ACCEL_HIGH_Q85",
182
+ np.nanquantile(d["ACCEL"].abs().dropna(), 0.85) if d["ACCEL"].notna().any() else 0.3)
183
+ thr_j = thr.get("JERK_HIGH_Q90",
184
+ np.nanquantile(d["JERK"].abs().dropna(), 0.90) if d["JERK"].notna().any() else 0.5)
185
+ return (d["ACCEL"].abs() > thr_a) | (d["JERK"].abs() > thr_j)
186
+
187
+ def _run_lengths(self, mask):
188
+ """Calculate run lengths from boolean mask"""
189
+ m = np.asarray(mask, dtype=bool)
190
+ if m.size == 0:
191
+ return np.array([], int), np.array([], int)
192
+ dm = np.diff(np.r_[False, m, False].astype(int))
193
+ starts = np.where(dm == 1)[0]
194
+ ends = np.where(dm == -1)[0]
195
+ return starts, (ends - starts)
196
+
197
+ def _penalty(self, series):
198
+ """Calculate penalty function for efficiency scoring"""
199
+ arr = pd.to_numeric(series, errors="coerce").fillna(0).values
200
+ if arr.size == 0:
201
+ return pd.Series([], dtype=float, index=series.index)
202
+ q25, q50, q75 = np.quantile(arr, [0.25, 0.50, 0.75])
203
+ s = (q75-q25)/1.349 if (q75 > q25) else (np.std(arr) if np.std(arr) > 0 else 1.0)
204
+ return pd.Series(1/(1+np.exp(-(arr - q50)/max(1e-6, s))), index=series.index)
205
+
206
+ def compute_fleet_thresholds(self, df: pd.DataFrame) -> Dict[str, float]:
207
+ """Compute fleet-wide thresholds for feature engineering"""
208
+ logger.info("🔧 Computing fleet thresholds...")
209
+
210
+ thr = {}
211
+
212
+ # RPM threshold
213
+ if "RPM" in df and df["RPM"].notna().any():
214
+ thr["RPM90"] = float(np.nanquantile(df["RPM"], 0.90))
215
+
216
+ # MAF threshold
217
+ if "MAF" in df and df["MAF"].notna().any():
218
+ thr["MAF90"] = float(np.nanquantile(df["MAF"], 0.90))
219
+
220
+ # Throttle position thresholds
221
+ if "THROTTLE_POS" in df and df["THROTTLE_POS"].notna().any():
222
+ thr["THR_LOW_Q10"] = float(np.nanquantile(df["THROTTLE_POS"], 0.10))
223
+ thr["THR_Q85"] = float(np.nanquantile(df["THROTTLE_POS"], 0.85))
224
+
225
+ # Engine load thresholds
226
+ if "ENGINE_LOAD" in df and df["ENGINE_LOAD"].notna().any():
227
+ thr["LOAD_LOW_Q15"] = float(np.nanquantile(df["ENGINE_LOAD"], 0.15))
228
+ thr["LOAD_Q85"] = float(np.nanquantile(df["ENGINE_LOAD"], 0.85))
229
+
230
+ # Acceleration and jerk thresholds
231
+ tmpd = self._add_basic_derivatives(df[["timestamp","SPEED"]].assign(
232
+ RPM=df.get("RPM"), MAF=df.get("MAF"),
233
+ THROTTLE_POS=df.get("THROTTLE_POS"), ENGINE_LOAD=df.get("ENGINE_LOAD")))
234
+
235
+ thr["ACCEL_LOW_Q20"] = float(np.nanquantile(tmpd["ACCEL"].abs().dropna(), 0.20)) if tmpd["ACCEL"].notna().any() else 0.05
236
+ thr["ACCEL_HIGH_Q85"] = float(np.nanquantile(tmpd["ACCEL"].abs().dropna(), 0.85)) if tmpd["ACCEL"].notna().any() else 0.3
237
+ thr["JERK_HIGH_Q90"] = float(np.nanquantile(tmpd["JERK"].abs().dropna(), 0.90)) if tmpd["JERK"].notna().any() else 0.5
238
+ thr["SPEED_IDLE_MPS"] = 0.6
239
+
240
+ logger.info(f"✅ Computed {len(thr)} fleet thresholds")
241
+ return thr
242
+
243
+ def create_algorithmic_teacher(self, df: pd.DataFrame, thr: Dict[str, float]) -> pd.DataFrame:
244
+ """Create algorithmic teacher labels for training"""
245
+ logger.info("🎯 Creating algorithmic teacher labels...")
246
+
247
+ # Apply idle rule to all drives
248
+ df["IDLE_RULE"] = False
249
+ for gid, g in df.groupby("drive_id", sort=True):
250
+ df.loc[g.index, "IDLE_RULE"] = self._idle_rule(g, thr)
251
+
252
+ # Extract thresholds
253
+ thr_accel, thr_jerk = thr["ACCEL_HIGH_Q85"], thr["JERK_HIGH_Q90"]
254
+ thr_rpm90, thr_maf90 = thr.get("RPM90", np.nan), thr.get("MAF90", np.nan)
255
+
256
+ # Process each drive
257
+ drv = []
258
+ for gid, g in df.groupby("drive_id", sort=True):
259
+ if len(g) < 5:
260
+ continue
261
+
262
+ base = self._infer_base_interval_seconds(g["timestamp"], 1.0)
263
+ dt_s = g["timestamp"].diff().dt.total_seconds().fillna(0).clip(lower=0, upper=10*base)
264
+ T = float(dt_s.sum())
265
+ mins = max(1e-6, T/60)
266
+
267
+ # Sharp acceleration analysis
268
+ sharp = self._sharp_mask_from_thresholds(g, thr).values
269
+ st, ln = self._run_lengths(sharp)
270
+ freq_pm = len(ln)/mins
271
+ dur_frac = (ln.sum()*base)/max(1e-6, T)
272
+
273
+ # Peak analysis
274
+ peaks = []
275
+ for a, b in zip(st, ln):
276
+ seg = g.iloc[a:a+b]
277
+ pa = float(np.nanmax(np.abs(seg["ACCEL"])))
278
+ pj = float(np.nanmax(np.abs(seg["JERK"])))
279
+ over_a = max(0.0, (pa-thr_accel)/max(1e-6, thr_accel))
280
+ over_j = max(0.0, (pj-thr_jerk)/max(1e-6, thr_jerk))
281
+ peaks.append(min(1.5, 0.7*over_a + 0.3*over_j))
282
+
283
+ sharp_mag = float(np.mean(peaks)) if peaks else 0.0
284
+
285
+ # Idle analysis
286
+ idle_frac = float(g["IDLE_RULE"].mean())
287
+ sti, lni = self._run_lengths(g["IDLE_RULE"].values)
288
+ idle_med_s = float(np.median(lni)*base if len(lni) else 0.0)
289
+ idle_epm = len(lni)/mins
290
+
291
+ # Speed variability
292
+ W10 = self._rows_for(10, base)
293
+ speed_cv = float((g["SPEED_ms"].rolling(W10,1).std()/(g["SPEED_ms"].rolling(W10,1).mean()+1e-6)).mean())
294
+
295
+ # High-load fractions
296
+ frac_rpm90 = float((g["RPM"] >= thr_rpm90).mean()) if ("RPM" in g and np.isfinite(thr_rpm90)) else 0.0
297
+ frac_maf90 = float((g["MAF"] >= thr_maf90).mean()) if ("MAF" in g and np.isfinite(thr_maf90)) else 0.0
298
+ frac_load85 = float((g["ENGINE_LOAD"] >= thr.get("LOAD_Q85", np.inf)).mean()) if "ENGINE_LOAD" in g else 0.0
299
+ frac_thr85 = float((g["THROTTLE_POS"] >= thr.get("THR_Q85", np.inf)).mean()) if "THROTTLE_POS" in g else 0.0
300
+
301
+ # Efficiency proxy
302
+ proxy = (0.80*frac_rpm90 + 0.60*frac_maf90 + 0.15*frac_load85 + 0.10*frac_thr85 + 0.10*idle_frac)
303
+
304
+ drv.append(dict(
305
+ drive_id=gid, duration_min=mins, distance_km=g["dist_m"].sum()/1000.0,
306
+ freq_pm=freq_pm, dur_frac=dur_frac, sharp_mag=sharp_mag,
307
+ idle_frac=idle_frac, idle_med_s=idle_med_s, idle_epm=idle_epm,
308
+ speed_cv=speed_cv, frac_rpm90=frac_rpm90, frac_maf90=frac_maf90, proxy=proxy
309
+ ))
310
+
311
+ dfeat = pd.DataFrame(drv).set_index("drive_id")
312
+
313
+ # Calculate penalty-based features
314
+ P = pd.DataFrame({
315
+ "p_freq": self._penalty(dfeat["freq_pm"]),
316
+ "p_dur": self._penalty(dfeat["dur_frac"]),
317
+ "p_mag": self._penalty(dfeat["sharp_mag"]),
318
+ "p_idle": 0.7*self._penalty(dfeat["idle_frac"]) + 0.3*self._penalty(dfeat["idle_med_s"]),
319
+ "p_cv": self._penalty(dfeat["speed_cv"]),
320
+ "p_rpm": self._penalty(dfeat["frac_rpm90"]),
321
+ "p_maf": self._penalty(dfeat["frac_maf90"]),
322
+ }, index=dfeat.index)
323
+
324
+ # Calculate efficiency scores
325
+ proxy = dfeat["proxy"].clip(0, 1-1e-6)
326
+ target_lin = -np.log(1 - proxy)
327
+ w = np.linalg.lstsq(P.values, target_lin.values, rcond=None)[0]
328
+ dfeat["ineff_model"] = 1 - np.exp(-P.values @ w)
329
+ dfeat["efficiency_algo"] = 100*(1 - dfeat["ineff_model"])
330
+
331
+ logger.info(f"✅ Teacher range: {dfeat['efficiency_algo'].min():.1f} → {dfeat['efficiency_algo'].max():.1f}")
332
+ return dfeat
333
+
334
+ def _q(self, s, p):
335
+ """Quantile helper function"""
336
+ s = pd.to_numeric(s, errors="coerce")
337
+ return float(np.nanquantile(s, p)) if s.notna().any() else 0.0
338
+
339
+ def _agg_for_ml_drive(self, g, thr):
340
+ """Aggregate drive-level features for ML model"""
341
+ g = self._add_basic_derivatives(g.copy())
342
+ base = self._infer_base_interval_seconds(g["timestamp"], 1.0)
343
+ g["IDLE_RULE"] = self._idle_rule(g, thr)
344
+
345
+ dt = g["timestamp"].diff().dt.total_seconds().fillna(0).clip(lower=0, upper=10*base)
346
+ T = float(dt.sum())
347
+ mins = max(1e-6, T/60)
348
+
349
+ sharp = self._sharp_mask_from_thresholds(g, thr).values
350
+ edges = np.flatnonzero(np.diff(np.r_[False, sharp, False]))
351
+ sharp_freq_pm = (len(edges)//2)/mins
352
+
353
+ rpm90, maf90 = thr.get("RPM90", np.nan), thr.get("MAF90", np.nan)
354
+ frac_rpm90 = float((g["RPM"] >= rpm90).mean()) if ("RPM" in g and np.isfinite(rpm90)) else 0.0
355
+ frac_maf90 = float((g["MAF"] >= maf90).mean()) if ("MAF" in g and np.isfinite(maf90)) else 0.0
356
+
357
+ W10 = self._rows_for(10, base)
358
+ speed_cv = float((g["SPEED_ms"].rolling(W10,1).std()/(g["SPEED_ms"].rolling(W10,1).mean()+1e-6)).mean())
359
+
360
+ return {
361
+ "duration_min": max(1e-6, T/60),
362
+ "distance_km": g["dist_m"].sum()/1000.0,
363
+ "speed_mean": float(g["SPEED_ms"].mean()),
364
+ "speed_q90": self._q(g["SPEED_ms"], 0.90),
365
+ "speed_cv": speed_cv,
366
+ "accel_q90": self._q(g["ACCEL"].abs(), 0.90),
367
+ "jerk_q90": self._q(g["JERK"].abs(), 0.90),
368
+ "sharp_freq_pm": sharp_freq_pm,
369
+ "idle_frac": float(g["IDLE_RULE"].mean()),
370
+ "idle_epm": (len(np.flatnonzero(np.diff(np.r_[False, g['IDLE_RULE'].values, False])))//2)/mins,
371
+ "rpm_q90": self._q(g["RPM"], 0.90) if "RPM" in g else 0.0,
372
+ "maf_q90": self._q(g["MAF"], 0.90) if "MAF" in g else 0.0,
373
+ "load_q85": self._q(g["ENGINE_LOAD"], 0.85) if "ENGINE_LOAD" in g else 0.0,
374
+ "thr_q85": self._q(g["THROTTLE_POS"], 0.85) if "THROTTLE_POS" in g else 0.0,
375
+ "frac_rpm90": frac_rpm90,
376
+ "frac_maf90": frac_maf90,
377
+ "fuel_intensity": (self._q(g["RPM"], 0.90)*self._q(g["MAF"], 0.90)) if (("RPM" in g) and ("MAF" in g)) else 0.0
378
+ }
379
+
380
+ def prepare_ml_data(self, df: pd.DataFrame, dfeat: pd.DataFrame, thr: Dict[str, float]) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray]:
381
+ """Prepare data for machine learning training"""
382
+ logger.info("🔧 Preparing ML training data...")
383
+
384
+ rows, y, groups = [], [], []
385
+ for gid, g in df.groupby("drive_id", sort=True):
386
+ if len(g) < 5:
387
+ continue
388
+ rows.append(self._agg_for_ml_drive(g, thr))
389
+ y.append(float(dfeat.loc[gid, "efficiency_algo"]))
390
+ groups.append(g["source_file"].iloc[0] if "source_file" in g.columns else gid)
391
+
392
+ X = pd.DataFrame(rows)
393
+ y = np.asarray(y, float)
394
+ groups = np.asarray(groups)
395
+
396
+ # Remove zero-variance features
397
+ zv = X.std(numeric_only=True).fillna(0.0)
398
+ drop_cols = list(zv[zv <= 1e-10].index)
399
+ if drop_cols:
400
+ X = X.drop(columns=drop_cols)
401
+ logger.info(f"🗑️ Dropped zero-variance features: {drop_cols}")
402
+
403
+ # Scale features
404
+ holdout_cols = ["duration_min", "distance_km"]
405
+ num_cols = [c for c in X.columns if c not in holdout_cols]
406
+ sc = StandardScaler().fit(X[num_cols])
407
+ X[num_cols] = sc.transform(X[num_cols])
408
+
409
+ logger.info(f"✅ Prepared ML data: {X.shape[0]} samples, {X.shape[1]} features")
410
+ return X, y, groups, sc, num_cols, holdout_cols
411
+
412
+ def train_model(self, X: pd.DataFrame, y: np.ndarray, groups: np.ndarray) -> Tuple[Any, str, Dict[str, Any]]:
413
+ """Train the efficiency model with cross-validation"""
414
+ logger.info("🤖 Training efficiency model...")
415
+
416
+ # Out-of-fold predictions for calibration
417
+ gkf = GroupKFold(n_splits=min(5, max(2, len(np.unique(groups)))))
418
+ oof_raw = np.zeros_like(y)
419
+
420
+ for tr, va in gkf.split(X, y, groups):
421
+ gbm_fold = HistGradientBoostingRegressor(
422
+ loss="squared_error", max_depth=6, learning_rate=0.08, max_bins=255,
423
+ early_stopping=True, random_state=SEED
424
+ )
425
+ wtr = np.clip(X.iloc[tr]["duration_min"].values, 0.5, None)
426
+ gbm_fold.fit(X.iloc[tr], y[tr], sample_weight=wtr)
427
+ pred = gbm_fold.predict(X.iloc[va])
428
+
429
+ if np.std(pred) < 1e-6:
430
+ # Ridge rescue to enforce variability
431
+ ridge = Ridge(alpha=1.0, random_state=SEED).fit(X.iloc[tr][X.columns[2:]], y[tr])
432
+ pred = ridge.predict(X.iloc[va][X.columns[2:]])
433
+
434
+ oof_raw[va] = pred
435
+
436
+ # Calculate OOF statistics
437
+ raw_std = float(np.std(oof_raw))
438
+ y_std = float(np.std(y))
439
+ corr = float(np.corrcoef(oof_raw, y)[0,1]) if len(y) > 1 else 1.0
440
+
441
+ logger.info(f"📊 OOF: corr={corr:.3f} | raw_std={raw_std:.3f} | y_std={y_std:.3f}")
442
+
443
+ # Quantile-mapping calibration
444
+ qs = np.linspace(0.05, 0.95, 19)
445
+ rq = np.quantile(oof_raw, qs)
446
+ yq = np.quantile(y, qs)
447
+
448
+ # Ensure strictly increasing rq for stable interpolation
449
+ for i in range(1, len(rq)):
450
+ if rq[i] <= rq[i-1]:
451
+ rq[i] = rq[i-1] + 1e-6
452
+
453
+ calib = {"type": "qmap", "rq": rq.tolist(), "yq": yq.tolist()}
454
+
455
+ def apply_calib_qmap(raw):
456
+ return float(np.clip(np.interp(raw, rq, yq), 0, 100))
457
+
458
+ oof_cal = np.array([apply_calib_qmap(r) for r in oof_raw], float)
459
+ oof_mae = float(mean_absolute_error(y, oof_cal))
460
+
461
+ logger.info(f"📊 OOF MAE (qmap): {oof_mae:.2f}")
462
+
463
+ # Final model training
464
+ gbm = HistGradientBoostingRegressor(
465
+ loss="squared_error", max_depth=6, learning_rate=0.08, max_bins=255,
466
+ early_stopping=False, max_iter=400, random_state=SEED
467
+ )
468
+ w_all = np.clip(X["duration_min"].values, 0.5, None)
469
+ gbm.fit(X, y, sample_weight=w_all)
470
+ raw_all = gbm.predict(X)
471
+
472
+ if np.std(raw_all) < 1e-6:
473
+ logger.warning("⚠️ Final GBM raw constant — switching to RandomForest")
474
+ rf = RandomForestRegressor(n_estimators=600, min_samples_leaf=2, random_state=SEED, n_jobs=-1)
475
+ rf.fit(X, y)
476
+ model_kind, model = "rf", rf
477
+ else:
478
+ model_kind, model = "gbm", gbm
479
+
480
+ oof_stats = {
481
+ "oof_mae_qmap": oof_mae,
482
+ "oof_corr": corr,
483
+ "raw_std": raw_std,
484
+ "y_std": y_std
485
+ }
486
+
487
+ logger.info(f"✅ Model training complete | kind: {model_kind}")
488
+ return model, model_kind, calib, oof_stats
489
+
490
+ def save_model(self, model, model_kind: str, scaler, feature_names: List[str],
491
+ num_cols: List[str], holdout_cols: List[str], thr: Dict[str, float],
492
+ calib: Dict[str, Any], oof_stats: Dict[str, Any]) -> str:
493
+ """Save the trained model and artifacts"""
494
+ logger.info("💾 Saving model artifacts...")
495
+
496
+ # Prepare artifacts
497
+ artifacts = {
498
+ "scaler": scaler,
499
+ "model_kind": model_kind,
500
+ "gbm": model if model_kind == "gbm" else None,
501
+ "rf": model if model_kind == "rf" else None,
502
+ "feature_names": feature_names,
503
+ "num_cols": num_cols,
504
+ "holdout_cols": holdout_cols,
505
+ "windowing": {"size_s": 120, "step_s": 60}, # For future use
506
+ "thr": thr,
507
+ "seed": SEED,
508
+ "calib": calib,
509
+ "oof_stats": oof_stats,
510
+ "training_timestamp": datetime.now().isoformat(),
511
+ "version": "1.0" # Will be updated based on HF versioning
512
+ }
513
+
514
+ # Save model
515
+ model_path = self.export_directory / "efficiency_model.joblib"
516
+ joblib.dump(artifacts, model_path)
517
+
518
+ # Save metadata
519
+ metadata = {
520
+ "model_type": "fuel_efficiency",
521
+ "version": "1.0",
522
+ "training_date": datetime.now().isoformat(),
523
+ "model_kind": model_kind,
524
+ "feature_count": len(feature_names),
525
+ "oof_stats": oof_stats,
526
+ "calibration_type": calib.get("type", "none")
527
+ }
528
+
529
+ meta_path = self.export_directory / "efficiency_meta.json"
530
+ with open(meta_path, 'w') as f:
531
+ json.dump(metadata, f, indent=2)
532
+
533
+ logger.info(f"✅ Model saved to {model_path}")
534
+ logger.info(f"✅ Metadata saved to {meta_path}")
535
+
536
+ return str(model_path)
537
+
538
+ def upload_to_huggingface(self, version: str = None) -> bool:
539
+ """Upload the trained model to Hugging Face Hub"""
540
+ if not self.hf_api:
541
+ logger.warning("⚠️ Hugging Face API not available - skipping upload")
542
+ return False
543
+
544
+ try:
545
+ if version is None:
546
+ version = self._get_next_version()
547
+
548
+ logger.info(f"📤 Uploading model version {version} to Hugging Face...")
549
+
550
+ # Upload model file
551
+ model_path = self.export_directory / "efficiency_model.joblib"
552
+ meta_path = self.export_directory / "efficiency_meta.json"
553
+
554
+ if not model_path.exists():
555
+ logger.error(f"❌ Model file not found: {model_path}")
556
+ return False
557
+
558
+ # Upload files
559
+ self.hf_api.upload_file(
560
+ path_or_fileobj=str(model_path),
561
+ path_in_repo=f"{version}/efficiency_model.joblib",
562
+ repo_id=self.repo_id,
563
+ repo_type="model"
564
+ )
565
+
566
+ if meta_path.exists():
567
+ self.hf_api.upload_file(
568
+ path_or_fileobj=str(meta_path),
569
+ path_in_repo=f"{version}/efficiency_meta.json",
570
+ repo_id=self.repo_id,
571
+ repo_type="model"
572
+ )
573
+
574
+ logger.info(f"✅ Model {version} uploaded successfully to {self.repo_id}")
575
+ return True
576
+
577
+ except Exception as e:
578
+ logger.error(f"❌ Error uploading to Hugging Face: {e}")
579
+ return False
580
+
581
+ def _get_next_version(self) -> str:
582
+ """Get the next version number (1.0, 1.1, 1.2, ..., 1.9, 2.0, etc.)"""
583
+ try:
584
+ repo_files = self.hf_api.list_repo_files(
585
+ repo_id=self.repo_id,
586
+ repo_type="model"
587
+ )
588
+
589
+ # Find existing versions
590
+ versions = []
591
+ for f in repo_files:
592
+ if f.startswith('v') and '/' not in f:
593
+ try:
594
+ version_str = f[1:] # Remove 'v' prefix
595
+ major, minor = map(int, version_str.split('.'))
596
+ versions.append((major, minor))
597
+ except ValueError:
598
+ continue
599
+
600
+ if not versions:
601
+ return "v1.0"
602
+
603
+ # Sort and get next version
604
+ versions.sort(key=lambda x: (x[0], x[1]))
605
+ latest_major, latest_minor = versions[-1]
606
+
607
+ if latest_minor < 9:
608
+ return f"v{latest_major}.{latest_minor + 1}"
609
+ else:
610
+ return f"v{latest_major + 1}.0"
611
+
612
+ except Exception as e:
613
+ logger.warning(f"⚠️ Could not determine next version: {e}")
614
+ return "v1.0"
615
+
616
+ def train_and_upload(self, upload_to_hf: bool = True) -> Dict[str, Any]:
617
+ """Complete training pipeline"""
618
+ try:
619
+ logger.info("🚀 Starting fuel efficiency model training pipeline...")
620
+
621
+ # Load data
622
+ df = self.load_training_data()
623
+
624
+ # Compute thresholds
625
+ thr = self.compute_fleet_thresholds(df)
626
+
627
+ # Create teacher labels
628
+ dfeat = self.create_algorithmic_teacher(df, thr)
629
+
630
+ # Prepare ML data
631
+ X, y, groups, scaler, num_cols, holdout_cols = self.prepare_ml_data(df, dfeat, thr)
632
+
633
+ # Train model
634
+ model, model_kind, calib, oof_stats = self.train_model(X, y, groups)
635
+
636
+ # Save model
637
+ model_path = self.save_model(
638
+ model, model_kind, scaler, list(X.columns),
639
+ num_cols, holdout_cols, thr, calib, oof_stats
640
+ )
641
+
642
+ # Upload to Hugging Face
643
+ upload_success = False
644
+ if upload_to_hf:
645
+ upload_success = self.upload_to_huggingface()
646
+
647
+ result = {
648
+ "success": True,
649
+ "model_path": model_path,
650
+ "model_kind": model_kind,
651
+ "oof_stats": oof_stats,
652
+ "upload_success": upload_success,
653
+ "training_samples": len(X),
654
+ "feature_count": len(X.columns)
655
+ }
656
+
657
+ logger.info("✅ Training pipeline completed successfully")
658
+ return result
659
+
660
+ except Exception as e:
661
+ logger.error(f"❌ Training pipeline failed: {e}")
662
+ return {"success": False, "error": str(e)}
663
+
664
+ def main():
665
+ """Main function for command-line usage"""
666
+ import argparse
667
+
668
+ parser = argparse.ArgumentParser(description="Train fuel efficiency model")
669
+ parser.add_argument("--csv-dir", default="./", help="Directory containing CSV files")
670
+ parser.add_argument("--export-dir", default="./efficiency_export", help="Export directory")
671
+ parser.add_argument("--repo-id", default="BinKhoaLe1812/Fuel_Efficiency_OBD", help="Hugging Face repo ID")
672
+ parser.add_argument("--no-upload", action="store_true", help="Skip Hugging Face upload")
673
+
674
+ args = parser.parse_args()
675
+
676
+ # Initialize trainer
677
+ trainer = EfficiencyModelTrainer(
678
+ csv_directory=args.csv_dir,
679
+ export_directory=args.export_dir,
680
+ repo_id=args.repo_id
681
+ )
682
+
683
+ # Train and upload
684
+ result = trainer.train_and_upload(upload_to_hf=not args.no_upload)
685
+
686
+ if result["success"]:
687
+ print("✅ Training completed successfully!")
688
+ print(f"📊 Model: {result['model_kind']}")
689
+ print(f"📈 OOF MAE: {result['oof_stats']['oof_mae_qmap']:.2f}")
690
+ print(f"📤 Upload: {'✅' if result['upload_success'] else '❌'}")
691
+ else:
692
+ print(f"❌ Training failed: {result['error']}")
693
+ return 1
694
+
695
+ return 0
696
+
697
+ if __name__ == "__main__":
698
+ exit(main())
train/rlhf.py CHANGED
@@ -168,7 +168,7 @@ class RLHFTrainer:
168
  # First, try to download the latest model
169
  logger.info("🔄 Checking for latest model version...")
170
  try:
171
- from utils.download import download_latest_models
172
  download_latest_models()
173
  except Exception as e:
174
  logger.warning(f"⚠️ Failed to download latest models: {e}")
 
168
  # First, try to download the latest model
169
  logger.info("🔄 Checking for latest model version...")
170
  try:
171
+ from utils.dbehavior_download import download_latest_models
172
  download_latest_models()
173
  except Exception as e:
174
  logger.warning(f"⚠️ Failed to download latest models: {e}")
train/saver.py CHANGED
@@ -102,7 +102,7 @@ class ModelSaver:
102
  "performance_metrics": performance_metrics,
103
  "framework": "xgboost",
104
  "task": "driver_behavior_classification",
105
- "labels": ["aggressive", "normal", "conservative"], # Based on ul_label.py
106
  "features": "obd_sensor_data",
107
  "rlhf_metadata": rlhf_metadata or {}
108
  }
 
102
  "performance_metrics": performance_metrics,
103
  "framework": "xgboost",
104
  "task": "driver_behavior_classification",
105
+ "labels": ["aggressive", "normal", "conservative"], # Based on dbehavior_labeler.py
106
  "features": "obd_sensor_data",
107
  "rlhf_metadata": rlhf_metadata or {}
108
  }
utils/{download.py → dbehavior_download.py} RENAMED
File without changes
utils/{ul_label.py → dbehavior_labeler.py} RENAMED
@@ -1,4 +1,4 @@
1
- # ul_label.py
2
  # Load UL models and predict driving style
3
  import os, logging, pickle
4
  import warnings
 
1
+ # dbehavior_labeler.py
2
  # Load UL models and predict driving style
3
  import os, logging, pickle
4
  import warnings
utils/efficiency_download.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Fuel Efficiency Model Downloader
3
+ Downloads the latest fuel efficiency model from Hugging Face Hub
4
+ Similar to utils/download.py but for fuel efficiency models
5
+ """
6
+
7
+ import os
8
+ import pathlib
9
+ import logging
10
+ from typing import Optional, List
11
+ from huggingface_hub import HfApi, hf_hub_download
12
+ import joblib
13
+
14
+ logger = logging.getLogger("efficiency-downloader")
15
+ logger.setLevel(logging.INFO)
16
+ if not logger.handlers:
17
+ handler = logging.StreamHandler()
18
+ handler.setFormatter(logging.Formatter("[%(levelname)s] %(asctime)s - %(message)s"))
19
+ logger.addHandler(handler)
20
+
21
+ def load_env_file():
22
+ """Load .env file if it exists"""
23
+ env_path = pathlib.Path(".env")
24
+ if env_path.exists():
25
+ logger.info("📄 Loading .env file...")
26
+ with open(env_path, 'r') as f:
27
+ for line in f:
28
+ line = line.strip()
29
+ if line and not line.startswith('#') and '=' in line:
30
+ key, value = line.split('=', 1)
31
+ os.environ[key] = value
32
+ return True
33
+ return False
34
+
35
+ # Load .env file first before setting any environment variables
36
+ load_env_file()
37
+
38
+ # Configuration
39
+ EFFICIENCY_REPO_ID = os.getenv("HF_EFFICIENCY_MODEL_REPO", "BinKhoaLe1812/Fuel_Efficiency_OBD")
40
+ EFFICIENCY_MODEL_DIR = pathlib.Path(os.getenv("EFFICIENCY_MODEL_DIR", "/app/models/efficiency")).resolve()
41
+ EFFICIENCY_FILES = ["efficiency_model.joblib", "efficiency_meta.json"]
42
+
43
+ EFFICIENCY_MODEL_DIR.mkdir(parents=True, exist_ok=True)
44
+
45
+ def get_latest_efficiency_version():
46
+ """Get the latest fuel efficiency model version from Hugging Face repo"""
47
+ try:
48
+ hf_token = os.getenv("HF_TOKEN")
49
+ if not hf_token:
50
+ logger.warning("⚠️ HF_TOKEN not set, using default efficiency model files")
51
+ return None
52
+
53
+ api = HfApi(token=hf_token)
54
+ repo_files = api.list_repo_files(
55
+ repo_id=EFFICIENCY_REPO_ID,
56
+ repo_type="model"
57
+ )
58
+
59
+ logger.info(f"🔍 Checking efficiency repository files...")
60
+ logger.info(f"📁 Found {len(repo_files)} files in efficiency repository")
61
+
62
+ # Find version directories (v1.0, v1.1, etc.)
63
+ version_dirs = [f for f in repo_files if f.startswith('v') and '/' not in f]
64
+ logger.info(f"📦 Found efficiency version directories: {version_dirs}")
65
+
66
+ # Also check for version directories with files inside
67
+ version_dirs_with_files = []
68
+ for f in repo_files:
69
+ if f.startswith('v') and '/' in f:
70
+ version_dir = f.split('/')[0]
71
+ if version_dir not in version_dirs_with_files:
72
+ version_dirs_with_files.append(version_dir)
73
+
74
+ if version_dirs_with_files:
75
+ logger.info(f"📦 Found efficiency version directories with files: {version_dirs_with_files}")
76
+ version_dirs.extend(version_dirs_with_files)
77
+
78
+ versions = []
79
+
80
+ for v_dir in version_dirs:
81
+ try:
82
+ # Extract version number (e.g., "v1.0" -> 1.0)
83
+ version_str = v_dir[1:] # Remove 'v' prefix
84
+ major, minor = map(int, version_str.split('.'))
85
+ versions.append((major, minor, v_dir))
86
+ except ValueError:
87
+ logger.warning(f"⚠️ Could not parse version: {v_dir}")
88
+ continue
89
+
90
+ if not versions:
91
+ logger.warning("⚠️ No valid efficiency versions found")
92
+ return None
93
+
94
+ # Sort by major.minor version
95
+ versions.sort(key=lambda x: (x[0], x[1]))
96
+ latest_version = versions[-1][2] # Get the version string
97
+
98
+ logger.info(f"✅ Latest efficiency model version: {latest_version}")
99
+ return latest_version
100
+
101
+ except Exception as e:
102
+ logger.error(f"❌ Error getting latest efficiency version: {e}")
103
+ return None
104
+
105
+ def download_efficiency_model(version: Optional[str] = None) -> bool:
106
+ """Download the specified version of the fuel efficiency model"""
107
+ try:
108
+ hf_token = os.getenv("HF_TOKEN")
109
+ if not hf_token:
110
+ logger.error("❌ HF_TOKEN not set")
111
+ return False
112
+
113
+ if version is None:
114
+ version = get_latest_efficiency_version()
115
+ if version is None:
116
+ logger.error("❌ Could not determine latest efficiency version")
117
+ return False
118
+
119
+ logger.info(f"📥 Downloading efficiency model version: {version}")
120
+
121
+ # Download each required file
122
+ for filename in EFFICIENCY_FILES:
123
+ try:
124
+ file_path = hf_hub_download(
125
+ repo_id=EFFICIENCY_REPO_ID,
126
+ filename=f"{version}/{filename}",
127
+ token=hf_token,
128
+ local_dir=EFFICIENCY_MODEL_DIR,
129
+ local_dir_use_symlinks=False
130
+ )
131
+ logger.info(f"✅ Downloaded: {filename}")
132
+
133
+ except Exception as e:
134
+ logger.error(f"❌ Failed to download {filename}: {e}")
135
+ return False
136
+
137
+ logger.info(f"✅ Efficiency model {version} downloaded successfully")
138
+ return True
139
+
140
+ except Exception as e:
141
+ logger.error(f"❌ Error downloading efficiency model: {e}")
142
+ return False
143
+
144
+ def download_latest_efficiency_models() -> bool:
145
+ """Download the latest fuel efficiency model files"""
146
+ try:
147
+ logger.info("🚀 Starting efficiency model download...")
148
+
149
+ # Get latest version
150
+ latest_version = get_latest_efficiency_version()
151
+ if latest_version is None:
152
+ logger.error("❌ Could not determine latest efficiency version")
153
+ return False
154
+
155
+ # Download the model
156
+ success = download_efficiency_model(latest_version)
157
+ if success:
158
+ logger.info("✅ Latest efficiency model downloaded successfully")
159
+ else:
160
+ logger.error("❌ Failed to download latest efficiency model")
161
+
162
+ return success
163
+
164
+ except Exception as e:
165
+ logger.error(f"❌ Error in download_latest_efficiency_models: {e}")
166
+ return False
167
+
168
+ def load_efficiency_model():
169
+ """Load the efficiency model from local storage"""
170
+ try:
171
+ model_path = EFFICIENCY_MODEL_DIR / "efficiency_model.joblib"
172
+ meta_path = EFFICIENCY_MODEL_DIR / "efficiency_meta.json"
173
+
174
+ if not model_path.exists():
175
+ logger.error(f"❌ Efficiency model not found at {model_path}")
176
+ return None, None
177
+
178
+ # Load model
179
+ model_artifacts = joblib.load(model_path)
180
+
181
+ # Load metadata if available
182
+ metadata = None
183
+ if meta_path.exists():
184
+ import json
185
+ with open(meta_path, 'r') as f:
186
+ metadata = json.load(f)
187
+
188
+ logger.info("✅ Efficiency model loaded successfully")
189
+ return model_artifacts, metadata
190
+
191
+ except Exception as e:
192
+ logger.error(f"❌ Error loading efficiency model: {e}")
193
+ return None, None
194
+
195
+ def check_efficiency_model_exists() -> bool:
196
+ """Check if efficiency model files exist locally"""
197
+ model_path = EFFICIENCY_MODEL_DIR / "efficiency_model.joblib"
198
+ return model_path.exists()
199
+
200
+ if __name__ == "__main__":
201
+ # Test the download functionality
202
+ success = download_latest_efficiency_models()
203
+ if success:
204
+ print("✅ Efficiency model download test successful")
205
+ else:
206
+ print("❌ Efficiency model download test failed")
utils/efficiency_labeler.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Fuel Efficiency Labeler
3
+ Provides fuel efficiency scoring for OBD data using the trained model
4
+ Similar to utils/ul_label.py but for fuel efficiency scoring
5
+ """
6
+
7
+ import os
8
+ import logging
9
+ import joblib
10
+ import numpy as np
11
+ import pandas as pd
12
+ from typing import List, Optional, Dict, Any, Tuple
13
+ from pathlib import Path
14
+
15
+ logger = logging.getLogger("efficiency-labeler")
16
+ logger.setLevel(logging.INFO)
17
+ if not logger.handlers:
18
+ handler = logging.StreamHandler()
19
+ handler.setFormatter(logging.Formatter("[%(levelname)s] %(asctime)s - %(message)s"))
20
+ logger.addHandler(handler)
21
+
22
+ # Constants
23
+ KMH_TO_MS = 1000.0/3600.0
24
+ SEED = 42
25
+
26
+ class EfficiencyLabeler:
27
+ """
28
+ Fuel efficiency scorer for OBD data using machine learning model.
29
+ Provides drive-level efficiency scores (0-100%) for entire drives.
30
+ """
31
+
32
+ _instance = None
33
+ _model_artifacts = None
34
+ _metadata = None
35
+ _initialized = False
36
+
37
+ def __init__(self):
38
+ if not EfficiencyLabeler._initialized:
39
+ self._load_model()
40
+ EfficiencyLabeler._initialized = True
41
+
42
+ @classmethod
43
+ def get(cls):
44
+ """Get singleton instance"""
45
+ if cls._instance is None:
46
+ cls._instance = cls()
47
+ return cls._instance
48
+
49
+ def _load_model(self):
50
+ """Load the efficiency model and metadata"""
51
+ try:
52
+ from utils.efficiency_download import load_efficiency_model, check_efficiency_model_exists
53
+
54
+ # Check if model exists locally
55
+ if not check_efficiency_model_exists():
56
+ logger.warning("⚠️ Efficiency model not found locally, attempting download...")
57
+ from utils.efficiency_download import download_latest_efficiency_models
58
+ success = download_latest_efficiency_models()
59
+ if not success:
60
+ raise RuntimeError("Failed to download efficiency model")
61
+
62
+ # Load model
63
+ model_artifacts, metadata = load_efficiency_model()
64
+ if model_artifacts is None:
65
+ raise RuntimeError("Failed to load efficiency model")
66
+
67
+ EfficiencyLabeler._model_artifacts = model_artifacts
68
+ EfficiencyLabeler._metadata = metadata
69
+
70
+ logger.info(f"✅ Efficiency model loaded | kind: {model_artifacts.get('model_kind', 'unknown')}")
71
+ logger.info(f"📊 Model features: {len(model_artifacts.get('feature_names', []))}")
72
+
73
+ except Exception as e:
74
+ logger.error(f"❌ Error loading efficiency model: {e}")
75
+ raise
76
+
77
+ def _ensure_dt(self, s):
78
+ """Ensure datetime conversion"""
79
+ return pd.to_datetime(s, errors="coerce")
80
+
81
+ def _infer_base_interval_seconds(self, ts, fallback=1.0):
82
+ """Infer base interval from timestamps"""
83
+ ts = pd.to_datetime(ts, errors="coerce")
84
+ dt = ts.diff().dt.total_seconds().dropna()
85
+ med = float(np.nanmedian(dt)) if len(dt) else fallback
86
+ return fallback if (not np.isfinite(med) or med <= 0) else med
87
+
88
+ def _rows_for(self, seconds, base_sec):
89
+ """Calculate number of rows for given time window"""
90
+ return max(3, int(round(seconds / max(1e-3, base_sec))))
91
+
92
+ def _add_basic_derivatives(self, d):
93
+ """Add basic derivatives (acceleration, jerk, distance)"""
94
+ d = d.copy()
95
+ d["timestamp"] = self._ensure_dt(d["timestamp"])
96
+ d = d.dropna(subset=["timestamp"]).sort_values("timestamp")
97
+ base = self._infer_base_interval_seconds(d["timestamp"], 1.0)
98
+
99
+ # Convert numeric columns
100
+ for c in ["SPEED","RPM","MAF","ENGINE_LOAD","THROTTLE_POS"]:
101
+ if c in d.columns:
102
+ d[c] = pd.to_numeric(d[c], errors="coerce")
103
+
104
+ # Convert speed to m/s
105
+ if "SPEED_ms" not in d.columns:
106
+ d["SPEED_ms"] = (d["SPEED"] * KMH_TO_MS) if "SPEED" in d.columns else np.nan
107
+
108
+ # Calculate derivatives
109
+ d["ACCEL"] = d["SPEED_ms"].diff()/max(base,1e-3)
110
+ d["JERK"] = d["ACCEL"].diff()/max(base,1e-3)
111
+
112
+ # Calculate distance
113
+ dt = d["timestamp"].diff().dt.total_seconds().fillna(0).clip(lower=0, upper=10*base)
114
+ d["dist_m"] = d["SPEED_ms"] * dt
115
+
116
+ return d
117
+
118
+ def _idle_rule(self, d, thr):
119
+ """Apply idle detection rule"""
120
+ speed_low = (d["SPEED_ms"].abs() <= thr.get("SPEED_IDLE_MPS", 0.6))
121
+ thr_low = (d["THROTTLE_POS"] <= thr.get("THR_LOW_Q10", 0.0)) if "THROTTLE_POS" in d else True
122
+ load_low = (d["ENGINE_LOAD"] <= thr.get("LOAD_LOW_Q15", 0.0)) if "ENGINE_LOAD" in d else True
123
+ maf_low = (d["MAF"] <= thr.get("MAF_LOW_Q10", 0.0)) if "MAF" in d else True
124
+ accel_low = (d["ACCEL"].abs() <= thr.get("ACCEL_LOW_Q20", 0.0))
125
+
126
+ mask = (speed_low & thr_low & load_low & maf_low & accel_low).astype(int)
127
+ k = 5
128
+ return (mask.rolling(k, center=True, min_periods=1).median().round().astype(bool)
129
+ if len(mask) >= k else mask.astype(bool))
130
+
131
+ def _sharp_mask_from_thresholds(self, d, thr):
132
+ """Detect sharp acceleration/deceleration events"""
133
+ thr_a = thr.get("ACCEL_HIGH_Q85",
134
+ np.nanquantile(d["ACCEL"].abs().dropna(), 0.85) if d["ACCEL"].notna().any() else 0.3)
135
+ thr_j = thr.get("JERK_HIGH_Q90",
136
+ np.nanquantile(d["JERK"].abs().dropna(), 0.90) if d["JERK"].notna().any() else 0.5)
137
+ return (d["ACCEL"].abs() > thr_a) | (d["JERK"].abs() > thr_j)
138
+
139
+ def _agg_for_ml_drive(self, g, thr):
140
+ """Aggregate drive-level features for ML model"""
141
+ g = self._add_basic_derivatives(g.copy())
142
+ base = self._infer_base_interval_seconds(g["timestamp"], 1.0)
143
+ g["IDLE_RULE"] = self._idle_rule(g, thr)
144
+
145
+ dt = g["timestamp"].diff().dt.total_seconds().fillna(0).clip(lower=0, upper=10*base)
146
+ T = float(dt.sum())
147
+ mins = max(1e-6, T/60)
148
+
149
+ sharp = self._sharp_mask_from_thresholds(g, thr).values
150
+ edges = np.flatnonzero(np.diff(np.r_[False, sharp, False]))
151
+ sharp_freq_pm = (len(edges)//2)/mins
152
+
153
+ def q(s, p):
154
+ s = pd.to_numeric(s, errors="coerce")
155
+ return float(np.nanquantile(s, p)) if s.notna().any() else 0.0
156
+
157
+ rpm90, maf90 = thr.get("RPM90", np.nan), thr.get("MAF90", np.nan)
158
+ frac_rpm90 = float((g["RPM"] >= rpm90).mean()) if ("RPM" in g and np.isfinite(rpm90)) else 0.0
159
+ frac_maf90 = float((g["MAF"] >= maf90).mean()) if ("MAF" in g and np.isfinite(maf90)) else 0.0
160
+
161
+ W10 = self._rows_for(10, base)
162
+ speed_cv = float((g["SPEED_ms"].rolling(W10,1).std()/(g["SPEED_ms"].rolling(W10,1).mean()+1e-6)).mean())
163
+
164
+ return {
165
+ "duration_min": max(1e-6, T/60),
166
+ "distance_km": g["dist_m"].sum()/1000.0,
167
+ "speed_mean": float(g["SPEED_ms"].mean()),
168
+ "speed_q90": q(g["SPEED_ms"], 0.90),
169
+ "speed_cv": speed_cv,
170
+ "accel_q90": q(g["ACCEL"].abs(), 0.90),
171
+ "jerk_q90": q(g["JERK"].abs(), 0.90),
172
+ "sharp_freq_pm": sharp_freq_pm,
173
+ "idle_frac": float(g["IDLE_RULE"].mean()),
174
+ "idle_epm": (len(np.flatnonzero(np.diff(np.r_[False, g['IDLE_RULE'].values, False])))//2)/mins,
175
+ "rpm_q90": q(g["RPM"], 0.90) if "RPM" in g else 0.0,
176
+ "maf_q90": q(g["MAF"], 0.90) if "MAF" in g else 0.0,
177
+ "load_q85": q(g["ENGINE_LOAD"], 0.85) if "ENGINE_LOAD" in g else 0.0,
178
+ "thr_q85": q(g["THROTTLE_POS"], 0.85) if "THROTTLE_POS" in g else 0.0,
179
+ "frac_rpm90": frac_rpm90,
180
+ "frac_maf90": frac_maf90,
181
+ "fuel_intensity": (q(g["RPM"], 0.90)*q(g["MAF"], 0.90)) if (("RPM" in g) and ("MAF" in g)) else 0.0
182
+ }
183
+
184
+ def _align_to_schema(self, feats, art):
185
+ """Align features to model schema"""
186
+ x = pd.DataFrame([feats])
187
+ for c in art["feature_names"]:
188
+ if c not in x.columns:
189
+ x[c] = 0.0
190
+ x = x[art["feature_names"]]
191
+ if len(art["num_cols"]):
192
+ x.loc[:, art["num_cols"]] = art["scaler"].transform(x[art["num_cols"]])
193
+ return x
194
+
195
+ def _predict_drive(self, df_drive):
196
+ """Predict efficiency for a single drive"""
197
+ art = EfficiencyLabeler._model_artifacts
198
+ thr = art["thr"]
199
+
200
+ feats = self._agg_for_ml_drive(df_drive, thr)
201
+ x = self._align_to_schema(feats, art)
202
+
203
+ # Get model
204
+ mdl = art["rf"] if art.get("model_kind") == "rf" else art["gbm"]
205
+ raw = float(mdl.predict(x)[0])
206
+
207
+ # Apply quantile-mapping calibration
208
+ if art.get("calib", {}).get("type") == "qmap":
209
+ rq = np.array(art["calib"]["rq"])
210
+ yq = np.array(art["calib"]["yq"])
211
+
212
+ # Ensure strictly increasing rq for stable interpolation
213
+ for i in range(1, len(rq)):
214
+ if rq[i] <= rq[i-1]:
215
+ rq[i] = rq[i-1] + 1e-6
216
+
217
+ pred = float(np.clip(np.interp(raw, rq, yq), 0, 100))
218
+ else:
219
+ pred = float(np.clip(raw, 0, 100))
220
+
221
+ return pred, raw
222
+
223
+ def predict_df(self, df: pd.DataFrame) -> List[float]:
224
+ """
225
+ Predict fuel efficiency for a DataFrame containing OBD data.
226
+ Returns a single efficiency score (0-100%) for the entire drive.
227
+
228
+ Args:
229
+ df: DataFrame with OBD data including timestamp, SPEED, RPM, MAF, etc.
230
+
231
+ Returns:
232
+ List containing single efficiency score for the drive
233
+ """
234
+ try:
235
+ if EfficiencyLabeler._model_artifacts is None:
236
+ raise RuntimeError("Efficiency model not loaded")
237
+
238
+ if len(df) < 5:
239
+ logger.warning("⚠️ Drive too short for efficiency prediction")
240
+ return [0.0] # Return minimum efficiency for very short drives
241
+
242
+ # Ensure timestamp column exists
243
+ if "timestamp" not in df.columns:
244
+ logger.error("❌ No timestamp column found")
245
+ return [0.0]
246
+
247
+ # Predict efficiency for the entire drive
248
+ efficiency_score, raw_score = self._predict_drive(df)
249
+
250
+ logger.info(f"📊 Drive efficiency: {efficiency_score:.1f}% (raw: {raw_score:.3f})")
251
+ return [efficiency_score]
252
+
253
+ except Exception as e:
254
+ logger.error(f"❌ Error predicting efficiency: {e}")
255
+ return [0.0] # Return minimum efficiency on error
256
+
257
+ def get_model_info(self) -> Dict[str, Any]:
258
+ """Get information about the loaded model"""
259
+ if EfficiencyLabeler._model_artifacts is None:
260
+ return {"error": "Model not loaded"}
261
+
262
+ art = EfficiencyLabeler._model_artifacts
263
+ return {
264
+ "model_kind": art.get("model_kind", "unknown"),
265
+ "feature_count": len(art.get("feature_names", [])),
266
+ "features": art.get("feature_names", []),
267
+ "calibration_type": art.get("calib", {}).get("type", "none"),
268
+ "oof_stats": art.get("oof_stats", {}),
269
+ "metadata": EfficiencyLabeler._metadata
270
+ }
271
+
272
+ # Convenience function for backward compatibility
273
+ def predict_efficiency(df: pd.DataFrame) -> List[float]:
274
+ """Convenience function to predict efficiency"""
275
+ labeler = EfficiencyLabeler.get()
276
+ return labeler.predict_df(df)
277
+
278
+ if __name__ == "__main__":
279
+ # Test the efficiency labeler
280
+ try:
281
+ labeler = EfficiencyLabeler.get()
282
+ print("✅ Efficiency labeler initialized successfully")
283
+
284
+ # Print model info
285
+ info = labeler.get_model_info()
286
+ print(f"📊 Model info: {info}")
287
+
288
+ except Exception as e:
289
+ print(f"❌ Error initializing efficiency labeler: {e}")