Spaces:
Sleeping
Sleeping
Commit ·
bc3c386
1
Parent(s): 5503637
Upd fuel efficiency model fusing
Browse files- .DS_Store +0 -0
- .dockerignore +6 -0
- Dockerfile +5 -2
- OBD/DrivingAggressivenessScorer.py +188 -0
- OBD/configScorer.py +97 -0
- OBD/obd_analyzer.py +74 -41
- OBD/obd_logger.py +404 -247
- OBD/scorerConfig/scorerConfig.py +59 -0
- OBD/visualiseScorer.py +237 -0
- README.md +9 -3
- app.py +97 -21
- bulk_mongo_upload.py +181 -0
- data/mongo_saver.py +2 -1
- efficiency/eval.py +458 -0
- efficiency/retrain.py +698 -0
- train/rlhf.py +1 -1
- train/saver.py +1 -1
- utils/{download.py → dbehavior_download.py} +0 -0
- utils/{ul_label.py → dbehavior_labeler.py} +1 -1
- utils/efficiency_download.py +206 -0
- utils/efficiency_labeler.py +289 -0
.DS_Store
CHANGED
|
Binary files a/.DS_Store and b/.DS_Store differ
|
|
|
.dockerignore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
diagram
|
| 2 |
+
OBD
|
| 3 |
+
*.md
|
| 4 |
+
data.json
|
| 5 |
+
organize.py
|
| 6 |
+
bulk_mongo_upload.py
|
Dockerfile
CHANGED
|
@@ -28,11 +28,14 @@ RUN mkdir -p $HOME/app/logs \
|
|
| 28 |
$HOME/app/cache \
|
| 29 |
$HOME/app/cache/obd_data \
|
| 30 |
$HOME/app/cache/obd_data/plots \
|
| 31 |
-
$HOME/app/models/ul
|
|
|
|
| 32 |
|
| 33 |
-
# ── Environment variables for HuggingFace
|
| 34 |
ENV MODEL_DIR=$HOME/app/models/ul
|
| 35 |
ENV HF_MODEL_REPO=BinKhoaLe1812/Driver_Behavior_OBD
|
|
|
|
|
|
|
| 36 |
|
| 37 |
# ── Models will be downloaded at runtime when app starts ──
|
| 38 |
|
|
|
|
| 28 |
$HOME/app/cache \
|
| 29 |
$HOME/app/cache/obd_data \
|
| 30 |
$HOME/app/cache/obd_data/plots \
|
| 31 |
+
$HOME/app/models/ul \
|
| 32 |
+
$HOME/app/models/efficiency
|
| 33 |
|
| 34 |
+
# ── Environment variables for HuggingFace models ──
|
| 35 |
ENV MODEL_DIR=$HOME/app/models/ul
|
| 36 |
ENV HF_MODEL_REPO=BinKhoaLe1812/Driver_Behavior_OBD
|
| 37 |
+
ENV EFFICIENCY_MODEL_DIR=$HOME/app/models/efficiency
|
| 38 |
+
ENV HF_EFFICIENCY_MODEL_REPO=BinKhoaLe1812/Fuel_Efficiency_OBD
|
| 39 |
|
| 40 |
# ── Models will be downloaded at runtime when app starts ──
|
| 41 |
|
OBD/DrivingAggressivenessScorer.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import json
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Dict, List, Tuple
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class DrivingAggressivenessScorer:
|
| 10 |
+
def __init__(self, bounds_file: str = 'obd_bounds.json', weights: Dict = None):
|
| 11 |
+
self.bounds_file = Path(bounds_file)
|
| 12 |
+
self.weights = weights if weights else self.weights.copy()
|
| 13 |
+
self.bounds = self._load_bounds()
|
| 14 |
+
|
| 15 |
+
weight_sum = sum(self.weights.values())
|
| 16 |
+
if not np.isclose(weight_sum, 1.0):
|
| 17 |
+
print(f"Warning: Weights sum to {weight_sum:.3f}, normalizing to 1.0")
|
| 18 |
+
self.weights = {k: v/weight_sum for k, v in self.weights.items()}
|
| 19 |
+
|
| 20 |
+
def _load_bounds(self) -> Dict:
|
| 21 |
+
if self.bounds_file.exists():
|
| 22 |
+
with open(self.bounds_file, 'r') as f:
|
| 23 |
+
return json.load(f)
|
| 24 |
+
|
| 25 |
+
def _save_bounds(self):
|
| 26 |
+
with open(self.bounds_file, 'w') as f:
|
| 27 |
+
json.dump(self.bounds, f, indent=2)
|
| 28 |
+
print(f"✓ Bounds updated and saved to {self.bounds_file}")
|
| 29 |
+
|
| 30 |
+
def update_bounds(self, df: pd.DataFrame):
|
| 31 |
+
updated = False
|
| 32 |
+
for param in self.weights.keys():
|
| 33 |
+
if param in df.columns:
|
| 34 |
+
data_min = df[param].min()
|
| 35 |
+
data_max = df[param].max()
|
| 36 |
+
|
| 37 |
+
# Update bounds if new extremes found
|
| 38 |
+
if data_min < self.bounds[param]['min']:
|
| 39 |
+
self.bounds[param]['min'] = data_min
|
| 40 |
+
updated = True
|
| 41 |
+
print(f" New MIN for {param}: {data_min:.2f}")
|
| 42 |
+
|
| 43 |
+
if data_max > self.bounds[param]['max']:
|
| 44 |
+
self.bounds[param]['max'] = data_max
|
| 45 |
+
updated = True
|
| 46 |
+
print(f" New MAX for {param}: {data_max:.2f}")
|
| 47 |
+
|
| 48 |
+
if updated:
|
| 49 |
+
self._save_bounds()
|
| 50 |
+
return updated
|
| 51 |
+
|
| 52 |
+
def normalize_value(self, value: float, param: str) -> float:
|
| 53 |
+
|
| 54 |
+
min_val = self.bounds[param]['min']
|
| 55 |
+
max_val = self.bounds[param]['max']
|
| 56 |
+
|
| 57 |
+
if max_val == min_val:
|
| 58 |
+
return 0.0
|
| 59 |
+
|
| 60 |
+
normalized = (value - min_val) / (max_val - min_val)
|
| 61 |
+
return np.clip(normalized, 0.0, 1.0)
|
| 62 |
+
|
| 63 |
+
def calculate_row_score(self, row: pd.Series) -> float:
|
| 64 |
+
|
| 65 |
+
weighted_score = 0.0
|
| 66 |
+
|
| 67 |
+
for param, weight in self.weights.items():
|
| 68 |
+
if param in row and pd.notna(row[param]):
|
| 69 |
+
normalized = self.normalize_value(row[param], param)
|
| 70 |
+
weighted_score += normalized * weight
|
| 71 |
+
|
| 72 |
+
# Convert to 0-100 scale
|
| 73 |
+
return weighted_score * 100
|
| 74 |
+
|
| 75 |
+
def calculate_drive_scores(self, df: pd.DataFrame) -> pd.DataFrame:
|
| 76 |
+
|
| 77 |
+
df = df.copy()
|
| 78 |
+
df['aggressiveness_score'] = df.apply(self.calculate_row_score, axis=1)
|
| 79 |
+
return df
|
| 80 |
+
|
| 81 |
+
def calculate_aggregate_score(self, scores: np.ndarray) -> Dict:
|
| 82 |
+
|
| 83 |
+
mean_score = np.mean(scores)
|
| 84 |
+
median_score = np.median(scores)
|
| 85 |
+
std_score = np.std(scores)
|
| 86 |
+
|
| 87 |
+
# Percentile analysis for spike detection
|
| 88 |
+
p50 = np.percentile(scores, 50)
|
| 89 |
+
p75 = np.percentile(scores, 75)
|
| 90 |
+
p90 = np.percentile(scores, 90)
|
| 91 |
+
p95 = np.percentile(scores, 95)
|
| 92 |
+
p99 = np.percentile(scores, 99)
|
| 93 |
+
max_score = np.max(scores)
|
| 94 |
+
|
| 95 |
+
# Detect aggressive spikes (scores > 70)
|
| 96 |
+
spike_threshold = 70
|
| 97 |
+
spike_count = np.sum(scores >= spike_threshold)
|
| 98 |
+
spike_percentage = (spike_count / len(scores)) * 100
|
| 99 |
+
|
| 100 |
+
# Detect extreme spikes (scores > 85)
|
| 101 |
+
extreme_threshold = 85
|
| 102 |
+
extreme_count = np.sum(scores >= extreme_threshold)
|
| 103 |
+
extreme_percentage = (extreme_count / len(scores)) * 100
|
| 104 |
+
|
| 105 |
+
# Penalty increases exponentially with spike frequency and intensity
|
| 106 |
+
spike_penalty = 0.0
|
| 107 |
+
|
| 108 |
+
if p95 > 70:
|
| 109 |
+
spike_penalty += (p95 - 70) * 0.3
|
| 110 |
+
if p99 > 80:
|
| 111 |
+
spike_penalty += (p99 - 80) * 0.5
|
| 112 |
+
|
| 113 |
+
# Penalty for frequency of spikes
|
| 114 |
+
if spike_percentage > 5:
|
| 115 |
+
spike_penalty += (spike_percentage - 5) * 2.0
|
| 116 |
+
if extreme_percentage > 2:
|
| 117 |
+
spike_penalty += (extreme_percentage - 2) * 3.0
|
| 118 |
+
|
| 119 |
+
# Calculate final aggregate score
|
| 120 |
+
base_score = (mean_score * 0.7) + (p75 * 0.3)
|
| 121 |
+
|
| 122 |
+
# Apply spike penalty
|
| 123 |
+
final_score = np.clip(base_score + spike_penalty, 0, 100)
|
| 124 |
+
|
| 125 |
+
return {
|
| 126 |
+
'final_score': round(final_score, 2),
|
| 127 |
+
'mean_score': round(mean_score, 2),
|
| 128 |
+
'median_score': round(median_score, 2),
|
| 129 |
+
'std_score': round(std_score, 2),
|
| 130 |
+
'p75_score': round(p75, 2),
|
| 131 |
+
'p90_score': round(p90, 2),
|
| 132 |
+
'p95_score': round(p95, 2),
|
| 133 |
+
'p99_score': round(p99, 2),
|
| 134 |
+
'max_score': round(max_score, 2),
|
| 135 |
+
'spike_percentage': round(spike_percentage, 2),
|
| 136 |
+
'extreme_percentage': round(extreme_percentage, 2),
|
| 137 |
+
'spike_penalty': round(spike_penalty, 2)
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
def analyze_drive(self, csv_path: str, update_bounds: bool = True) -> Tuple[pd.DataFrame, Dict]:
|
| 141 |
+
|
| 142 |
+
print(f"\n{'='*60}")
|
| 143 |
+
print(f"ANALYZING DRIVE: {csv_path}")
|
| 144 |
+
print(f"{'='*60}")
|
| 145 |
+
|
| 146 |
+
# Load data
|
| 147 |
+
df = pd.read_csv(csv_path)
|
| 148 |
+
print(f"✓ Loaded {len(df)} data points")
|
| 149 |
+
|
| 150 |
+
# Update bounds if requested
|
| 151 |
+
if update_bounds:
|
| 152 |
+
print("\nUpdating bounds...")
|
| 153 |
+
self.update_bounds(df)
|
| 154 |
+
|
| 155 |
+
# Calculate scores
|
| 156 |
+
print("\nCalculating aggressiveness scores...")
|
| 157 |
+
df_scored = self.calculate_drive_scores(df)
|
| 158 |
+
|
| 159 |
+
# Calculate aggregate
|
| 160 |
+
aggregate = self.calculate_aggregate_score(df_scored['aggressiveness_score'].values)
|
| 161 |
+
|
| 162 |
+
return df_scored, aggregate
|
| 163 |
+
|
| 164 |
+
def get_current_bounds(self) -> Dict:
|
| 165 |
+
return self.bounds
|
| 166 |
+
|
| 167 |
+
def print_bounds(self):
|
| 168 |
+
print("\nCurrent Parameter Bounds:")
|
| 169 |
+
print("-" * 50)
|
| 170 |
+
for param in self.weights.keys():
|
| 171 |
+
min_val = self.bounds[param]['min']
|
| 172 |
+
max_val = self.bounds[param]['max']
|
| 173 |
+
print(f"{param:20s}: {min_val:8.2f} to {max_val:8.2f}")
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
if __name__ == "__main__":
|
| 177 |
+
scorer = DrivingAggressivenessScorer()
|
| 178 |
+
|
| 179 |
+
# Analyze a drive
|
| 180 |
+
df_scored, results = scorer.analyze_drive('obd_data_log_20251012_121810.csv')
|
| 181 |
+
|
| 182 |
+
# Save scored data
|
| 183 |
+
output_path = 'obd_data_scored.csv'
|
| 184 |
+
df_scored.to_csv(output_path, index=False)
|
| 185 |
+
print(f"✓ Scored data saved to {output_path}")
|
| 186 |
+
|
| 187 |
+
# Display current bounds
|
| 188 |
+
scorer.print_bounds()
|
OBD/configScorer.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import yaml
|
| 2 |
+
from driving_aggressiveness_scorer import DrivingAggressivenessScorer
|
| 3 |
+
from driving_analyzer import visualize_drive, compare_drives
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def load_config(config_path: str = 'config.yaml') -> dict:
|
| 7 |
+
"""Load configuration from YAML file."""
|
| 8 |
+
try:
|
| 9 |
+
with open(config_path, 'r') as f:
|
| 10 |
+
return yaml.safe_load(f)
|
| 11 |
+
except FileNotFoundError:
|
| 12 |
+
print(f"Config file not found: {config_path}")
|
| 13 |
+
print("Using default configuration.")
|
| 14 |
+
return None
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def create_scorer_from_config(config_path: str = 'config.yaml') -> DrivingAggressivenessScorer:
|
| 18 |
+
"""Create scorer instance from configuration file."""
|
| 19 |
+
config = load_config(config_path)
|
| 20 |
+
|
| 21 |
+
if config:
|
| 22 |
+
weights = config.get('weights', None)
|
| 23 |
+
bounds_file = config.get('bounds', {}).get('file', 'obd_bounds.json')
|
| 24 |
+
scorer = DrivingAggressivenessScorer(bounds_file=bounds_file, weights=weights)
|
| 25 |
+
print(f"✓ Scorer initialized with config from {config_path}")
|
| 26 |
+
else:
|
| 27 |
+
scorer = DrivingAggressivenessScorer()
|
| 28 |
+
print("✓ Scorer initialized with default settings")
|
| 29 |
+
|
| 30 |
+
return scorer
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# Quick start examples
|
| 34 |
+
if __name__ == "__main__":
|
| 35 |
+
|
| 36 |
+
# METHOD 1: Use with config file (recommended)
|
| 37 |
+
print("\n" + "="*60)
|
| 38 |
+
print("METHOD 1: Config-based scoring")
|
| 39 |
+
print("="*60)
|
| 40 |
+
scorer = create_scorer_from_config('config.yaml')
|
| 41 |
+
df_scored, results = scorer.analyze_drive('obd_data_log_20251012_121810.csv')
|
| 42 |
+
visualize_drive(df_scored, results, save_path='drive_analysis_config.png')
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# METHOD 2: Use with custom weights (no config file)
|
| 46 |
+
print("\n" + "="*60)
|
| 47 |
+
print("METHOD 2: Custom weights")
|
| 48 |
+
print("="*60)
|
| 49 |
+
custom_weights = {
|
| 50 |
+
'RPM': 0.20,
|
| 51 |
+
'THROTTLE_POS': 0.35, # More emphasis on throttle
|
| 52 |
+
'ENGINE_LOAD': 0.25,
|
| 53 |
+
'MAF': 0.10,
|
| 54 |
+
'SPEED': 0.05,
|
| 55 |
+
'INTAKE_PRESSURE': 0.05
|
| 56 |
+
}
|
| 57 |
+
scorer_custom = DrivingAggressivenessScorer(weights=custom_weights)
|
| 58 |
+
df_scored2, results2 = scorer_custom.analyze_drive('obd_data_log_20251012_121810.csv')
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
# METHOD 3: Analyze without updating bounds (testing)
|
| 62 |
+
print("\n" + "="*60)
|
| 63 |
+
print("METHOD 3: Analysis without updating bounds")
|
| 64 |
+
print("="*60)
|
| 65 |
+
scorer_test = DrivingAggressivenessScorer()
|
| 66 |
+
df_test, results_test = scorer_test.analyze_drive(
|
| 67 |
+
'obd_data_log_20251012_121810.csv',
|
| 68 |
+
update_bounds=False # Don't update global bounds
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
# METHOD 4: Quick comparison script
|
| 73 |
+
print("\n" + "="*60)
|
| 74 |
+
print("METHOD 4: Compare multiple drives")
|
| 75 |
+
print("="*60)
|
| 76 |
+
"""
|
| 77 |
+
# Uncomment when you have multiple CSV files:
|
| 78 |
+
comparison = compare_drives(scorer, [
|
| 79 |
+
'obd_data_log_20251012_121810.csv',
|
| 80 |
+
'obd_data_log_20251013_101234.csv',
|
| 81 |
+
'obd_data_log_20251014_155030.csv'
|
| 82 |
+
])
|
| 83 |
+
"""
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
print("\n" + "="*60)
|
| 87 |
+
print("SETUP COMPLETE!")
|
| 88 |
+
print("="*60)
|
| 89 |
+
print("\nYour system is ready to:")
|
| 90 |
+
print(" 1. Analyze individual drives")
|
| 91 |
+
print(" 2. Compare multiple drives")
|
| 92 |
+
print(" 3. Batch process folders")
|
| 93 |
+
print(" 4. Dynamically update bounds")
|
| 94 |
+
print(" 5. Generate visualizations")
|
| 95 |
+
print("\nBounds file: obd_bounds.json")
|
| 96 |
+
print("Config file: config.yaml")
|
| 97 |
+
print("="*60 + "\n")
|
OBD/obd_analyzer.py
CHANGED
|
@@ -24,18 +24,22 @@ KPH_TO_MPS = 1 / 3.6
|
|
| 24 |
G_ACCELERATION = 9.80665
|
| 25 |
MIN_MOVING_SPEED_KPH = 2 # have to be moving
|
| 26 |
|
| 27 |
-
|
|
|
|
| 28 |
AGGRESSIVE_THROTTLE_ENTRY_THRESHOLD = 40
|
| 29 |
-
AGGRESSIVE_RPM_HOLD_THRESHOLD =
|
| 30 |
-
HARSH_BRAKING_THRESHOLD_G = -0.25
|
| 31 |
|
| 32 |
-
|
| 33 |
-
AGGRESSIVE_RPM_ROC_THRESHOLD = 500
|
| 34 |
-
AGGRESSIVE_THROTTLE_ROC_THRESHOLD = 45
|
| 35 |
-
POSITIVE_ACCEL_FOR_ROC_CHECK_G = 0.1
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
MIN_DATA_POINTS_FOR_ROC = 2
|
| 41 |
|
|
@@ -67,15 +71,26 @@ def load_and_preprocess_data(csv_filepath):
|
|
| 67 |
# Handle empty DataFrame after potential filtering or if it was empty to begin with
|
| 68 |
return df # Or handle error appropriately
|
| 69 |
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
df[col] = np.nan
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
if 'SPEED' in df.columns:
|
| 81 |
df['SPEED_mps'] = df['SPEED'] * KPH_TO_MPS
|
|
@@ -115,8 +130,8 @@ def load_and_preprocess_data(csv_filepath):
|
|
| 115 |
return df
|
| 116 |
|
| 117 |
def classify_driving_style_stateful(df):
|
| 118 |
-
if df.empty or not all(col in df.columns for col in ['RPM', 'THROTTLE_POS', 'SPEED', 'acceleration_g']):
|
| 119 |
-
print("Warning: Missing
|
| 120 |
return pd.Series([DRIVING_STYLE_UNKNOWN] * len(df), index=df.index, dtype=str)
|
| 121 |
|
| 122 |
driving_styles = [DRIVING_STYLE_UNKNOWN] * len(df)
|
|
@@ -130,45 +145,63 @@ def classify_driving_style_stateful(df):
|
|
| 130 |
rpm_roc = df.loc[i, 'RPM_roc']
|
| 131 |
throttle_roc = df.loc[i, 'THROTTLE_roc']
|
| 132 |
|
| 133 |
-
row_style = DRIVING_STYLE_PASSIVE
|
| 134 |
is_moving = speed_kph > MIN_MOVING_SPEED_KPH
|
| 135 |
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
throttle > AGGRESSIVE_THROTTLE_ENTRY_THRESHOLD and
|
| 140 |
-
is_moving)
|
| 141 |
-
|
| 142 |
-
is_actively_accelerating = accel_g > POSITIVE_ACCEL_FOR_ROC_CHECK_G
|
| 143 |
-
|
| 144 |
-
is_high_roc_trigger = (is_moving and
|
| 145 |
-
is_actively_accelerating and
|
| 146 |
-
(rpm_roc > AGGRESSIVE_RPM_ROC_THRESHOLD or
|
| 147 |
-
throttle_roc > AGGRESSIVE_THROTTLE_ROC_THRESHOLD))
|
| 148 |
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
| 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
if current_style == DRIVING_STYLE_AGGRESSIVE:
|
| 152 |
-
if is_currently_aggressive_event:
|
| 153 |
row_style = DRIVING_STYLE_AGGRESSIVE
|
| 154 |
-
elif rpm > AGGRESSIVE_RPM_HOLD_THRESHOLD and is_moving:
|
| 155 |
row_style = DRIVING_STYLE_AGGRESSIVE
|
| 156 |
-
else:
|
| 157 |
if (rpm > MODERATE_RPM_THRESHOLD or throttle > MODERATE_THROTTLE_THRESHOLD) and is_moving:
|
| 158 |
row_style = DRIVING_STYLE_MODERATE
|
| 159 |
else:
|
| 160 |
row_style = DRIVING_STYLE_PASSIVE
|
| 161 |
-
else:
|
| 162 |
if is_currently_aggressive_event:
|
| 163 |
-
row_style = DRIVING_STYLE_AGGRESSIVE
|
| 164 |
-
else:
|
| 165 |
if (rpm > MODERATE_RPM_THRESHOLD or throttle > MODERATE_THROTTLE_THRESHOLD) and is_moving:
|
| 166 |
row_style = DRIVING_STYLE_MODERATE
|
| 167 |
else:
|
| 168 |
row_style = DRIVING_STYLE_PASSIVE
|
| 169 |
|
| 170 |
driving_styles[i] = row_style
|
| 171 |
-
current_style = row_style
|
| 172 |
|
| 173 |
print("Stateful driving style classification complete.")
|
| 174 |
return pd.Series(driving_styles, index=df.index)
|
|
@@ -206,7 +239,7 @@ def main():
|
|
| 206 |
print(f"Error saving output CSV to {args.output_csv}: {e}")
|
| 207 |
else:
|
| 208 |
print("\n--- First 20 Rows of Analyzed Data (showing key fields) ---")
|
| 209 |
-
display_cols = ['timestamp', 'SPEED', 'RPM', 'THROTTLE_POS', 'acceleration_g', 'driving_style_analyzed']
|
| 210 |
display_cols = [col for col in display_cols if col in df.columns]
|
| 211 |
if display_cols: print(df[display_cols].head(20))
|
| 212 |
else: print("Key display columns not found in DataFrame.")
|
|
|
|
| 24 |
G_ACCELERATION = 9.80665
|
| 25 |
MIN_MOVING_SPEED_KPH = 2 # have to be moving
|
| 26 |
|
| 27 |
+
VERY_HIGH_RPM_AGGRESSIVE_THRESHOLD = 3500
|
| 28 |
+
AGGRESSIVE_RPM_ENTRY_THRESHOLD = 2900
|
| 29 |
AGGRESSIVE_THROTTLE_ENTRY_THRESHOLD = 40
|
| 30 |
+
AGGRESSIVE_RPM_HOLD_THRESHOLD = 2400
|
| 31 |
+
HARSH_BRAKING_THRESHOLD_G = -0.25
|
| 32 |
|
| 33 |
+
HIGH_RPM_FOR_ROC_AGGRESSIVE_THRESHOLD = 2300
|
| 34 |
+
AGGRESSIVE_RPM_ROC_THRESHOLD = 500
|
| 35 |
+
AGGRESSIVE_THROTTLE_ROC_THRESHOLD = 45
|
| 36 |
+
POSITIVE_ACCEL_FOR_ROC_CHECK_G = 0.1
|
| 37 |
|
| 38 |
+
MIN_SPEED_FOR_HOLDING_GEAR_CHECK_KPH = 15
|
| 39 |
+
LOW_G_FOR_HOLDING_GEAR = 0.1
|
| 40 |
+
|
| 41 |
+
MODERATE_RPM_THRESHOLD = 2100
|
| 42 |
+
MODERATE_THROTTLE_THRESHOLD = 25
|
| 43 |
|
| 44 |
MIN_DATA_POINTS_FOR_ROC = 2
|
| 45 |
|
|
|
|
| 71 |
# Handle empty DataFrame after potential filtering or if it was empty to begin with
|
| 72 |
return df # Or handle error appropriately
|
| 73 |
|
| 74 |
+
# Define all possible numeric columns from current fuel efficiency logging
|
| 75 |
+
all_numeric_cols = ['SPEED', 'RPM', 'THROTTLE_POS', 'MAF', 'ENGINE_LOAD', 'INTAKE_PRESSURE',
|
| 76 |
+
'SHORT_FUEL_TRIM_1', 'SHORT_FUEL_TRIM_2', 'LONG_FUEL_TRIM_1', 'LONG_FUEL_TRIM_2']
|
| 77 |
+
|
| 78 |
+
# Only process columns that exist in the dataframe
|
| 79 |
+
numeric_cols = [col for col in all_numeric_cols if col in df.columns]
|
| 80 |
+
required_cols = ['SPEED', 'RPM', 'THROTTLE_POS'] # Essential for driving style analysis
|
| 81 |
+
|
| 82 |
+
# Ensure required columns exist
|
| 83 |
+
for col in required_cols:
|
| 84 |
+
if col not in df.columns:
|
| 85 |
+
print(f"Warning: Required column {col} not found. It will be filled with NaN.")
|
| 86 |
df[col] = np.nan
|
| 87 |
+
|
| 88 |
+
# Convert all numeric columns to numeric type
|
| 89 |
+
for col in numeric_cols:
|
| 90 |
+
df[col] = pd.to_numeric(df[col], errors='coerce')
|
| 91 |
+
|
| 92 |
+
# Fill missing values for all numeric columns
|
| 93 |
+
df[numeric_cols] = df[numeric_cols].ffill().fillna(0)
|
| 94 |
|
| 95 |
if 'SPEED' in df.columns:
|
| 96 |
df['SPEED_mps'] = df['SPEED'] * KPH_TO_MPS
|
|
|
|
| 130 |
return df
|
| 131 |
|
| 132 |
def classify_driving_style_stateful(df):
|
| 133 |
+
if df.empty or not all(col in df.columns for col in ['RPM', 'THROTTLE_POS', 'SPEED', 'acceleration_g', 'RPM_roc', 'THROTTLE_roc']):
|
| 134 |
+
print("Warning: Missing required columns for stateful classification.")
|
| 135 |
return pd.Series([DRIVING_STYLE_UNKNOWN] * len(df), index=df.index, dtype=str)
|
| 136 |
|
| 137 |
driving_styles = [DRIVING_STYLE_UNKNOWN] * len(df)
|
|
|
|
| 145 |
rpm_roc = df.loc[i, 'RPM_roc']
|
| 146 |
throttle_roc = df.loc[i, 'THROTTLE_roc']
|
| 147 |
|
| 148 |
+
row_style = DRIVING_STYLE_PASSIVE # Default for this row
|
| 149 |
is_moving = speed_kph > MIN_MOVING_SPEED_KPH
|
| 150 |
|
| 151 |
+
# --- Define Aggressive Triggers for this specific row ---
|
| 152 |
+
# 1. Absolute very high RPM
|
| 153 |
+
trigger_very_high_rpm = (rpm > VERY_HIGH_RPM_AGGRESSIVE_THRESHOLD and is_moving)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
+
# 2. High RPM + High Throttle (user's primary combo)
|
| 156 |
+
trigger_high_rpm_throttle = (rpm > AGGRESSIVE_RPM_ENTRY_THRESHOLD and
|
| 157 |
+
throttle > AGGRESSIVE_THROTTLE_ENTRY_THRESHOLD and
|
| 158 |
+
is_moving)
|
| 159 |
|
| 160 |
+
# 3. RoC-based (RPM or Throttle) during active acceleration, with RPM already elevated
|
| 161 |
+
is_actively_accelerating = accel_g > POSITIVE_ACCEL_FOR_ROC_CHECK_G
|
| 162 |
+
trigger_high_roc = (is_moving and is_actively_accelerating and
|
| 163 |
+
rpm > HIGH_RPM_FOR_ROC_AGGRESSIVE_THRESHOLD and
|
| 164 |
+
(rpm_roc > AGGRESSIVE_RPM_ROC_THRESHOLD or
|
| 165 |
+
throttle_roc > AGGRESSIVE_THROTTLE_ROC_THRESHOLD))
|
| 166 |
+
|
| 167 |
+
# 4. Holding gear aggressively (high RPM, moving, but low change in speed)
|
| 168 |
+
trigger_holding_gear = (rpm > AGGRESSIVE_RPM_HOLD_THRESHOLD and # Using hold RPM as base for this check
|
| 169 |
+
is_moving and
|
| 170 |
+
speed_kph > MIN_SPEED_FOR_HOLDING_GEAR_CHECK_KPH and
|
| 171 |
+
abs(accel_g) < LOW_G_FOR_HOLDING_GEAR)
|
| 172 |
+
|
| 173 |
+
# 5. Hard braking
|
| 174 |
+
trigger_hard_braking = (accel_g < HARSH_BRAKING_THRESHOLD_G and is_moving)
|
| 175 |
+
|
| 176 |
+
# Combine all triggers for the current row
|
| 177 |
+
is_currently_aggressive_event = (trigger_very_high_rpm or
|
| 178 |
+
trigger_high_rpm_throttle or
|
| 179 |
+
trigger_high_roc or
|
| 180 |
+
trigger_holding_gear or
|
| 181 |
+
trigger_hard_braking)
|
| 182 |
+
|
| 183 |
+
# --- Stateful Logic ---
|
| 184 |
if current_style == DRIVING_STYLE_AGGRESSIVE:
|
| 185 |
+
if is_currently_aggressive_event: # Re-triggered by a new event this row
|
| 186 |
row_style = DRIVING_STYLE_AGGRESSIVE
|
| 187 |
+
elif rpm > AGGRESSIVE_RPM_HOLD_THRESHOLD and is_moving: # Maintain based on RPM hold
|
| 188 |
row_style = DRIVING_STYLE_AGGRESSIVE
|
| 189 |
+
else: # Conditions to stay aggressive not met, transition out
|
| 190 |
if (rpm > MODERATE_RPM_THRESHOLD or throttle > MODERATE_THROTTLE_THRESHOLD) and is_moving:
|
| 191 |
row_style = DRIVING_STYLE_MODERATE
|
| 192 |
else:
|
| 193 |
row_style = DRIVING_STYLE_PASSIVE
|
| 194 |
+
else: # current_style is Passive or Moderate
|
| 195 |
if is_currently_aggressive_event:
|
| 196 |
+
row_style = DRIVING_STYLE_AGGRESSIVE # Enter aggressive state
|
| 197 |
+
else: # Not an aggressive event, classify as Moderate or Passive
|
| 198 |
if (rpm > MODERATE_RPM_THRESHOLD or throttle > MODERATE_THROTTLE_THRESHOLD) and is_moving:
|
| 199 |
row_style = DRIVING_STYLE_MODERATE
|
| 200 |
else:
|
| 201 |
row_style = DRIVING_STYLE_PASSIVE
|
| 202 |
|
| 203 |
driving_styles[i] = row_style
|
| 204 |
+
current_style = row_style # Update the overall state for the next iteration
|
| 205 |
|
| 206 |
print("Stateful driving style classification complete.")
|
| 207 |
return pd.Series(driving_styles, index=df.index)
|
|
|
|
| 239 |
print(f"Error saving output CSV to {args.output_csv}: {e}")
|
| 240 |
else:
|
| 241 |
print("\n--- First 20 Rows of Analyzed Data (showing key fields) ---")
|
| 242 |
+
display_cols = ['timestamp', 'SPEED', 'RPM', 'THROTTLE_POS', 'acceleration_g', 'RPM_roc', 'THROTTLE_roc', 'driving_style_analyzed']
|
| 243 |
display_cols = [col for col in display_cols if col in df.columns]
|
| 244 |
if display_cols: print(df[display_cols].head(20))
|
| 245 |
else: print("Key display columns not found in DataFrame.")
|
OBD/obd_logger.py
CHANGED
|
@@ -3,80 +3,54 @@ import time
|
|
| 3 |
import datetime
|
| 4 |
import csv
|
| 5 |
import os
|
| 6 |
-
from collections import deque
|
| 7 |
-
import numpy as np
|
| 8 |
import shutil
|
| 9 |
import subprocess
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
MIN_SAMPLES_FOR_ROC_CHECK = SHORT_ROC_WINDOW_SIZE
|
| 33 |
-
ROC_THROTTLE_AGGRESSIVE_THRESHOLD = 25.0
|
| 34 |
-
ROC_RPM_AGGRESSIVE_THRESHOLD = 700.0
|
| 35 |
-
ROC_SPEED_AGGRESSIVE_THRESHOLD = 8.0
|
| 36 |
-
MIN_RPM_FOR_AGGRESSIVE_TRIGGER = 1000.0
|
| 37 |
-
AGGRESSIVE_EVENT_COOLDOWN_SAMPLES = 15
|
| 38 |
-
|
| 39 |
-
HIGH_FREQUENCY_PIDS = [
|
| 40 |
-
obd.commands.RPM,
|
| 41 |
-
obd.commands.THROTTLE_POS,
|
| 42 |
-
obd.commands.SPEED,
|
| 43 |
]
|
| 44 |
|
| 45 |
-
|
| 46 |
-
obd.commands.
|
| 47 |
-
obd.commands.
|
| 48 |
-
obd.commands.
|
| 49 |
-
obd.commands.INTAKE_TEMP,
|
| 50 |
-
obd.commands.TIMING_ADVANCE,
|
| 51 |
-
obd.commands.MAF,
|
| 52 |
-
obd.commands.INTAKE_PRESSURE,
|
| 53 |
-
obd.commands.SHORT_FUEL_TRIM_1,
|
| 54 |
-
obd.commands.LONG_FUEL_TRIM_1,
|
| 55 |
-
obd.commands.SHORT_FUEL_TRIM_2,
|
| 56 |
obd.commands.LONG_FUEL_TRIM_2,
|
| 57 |
-
obd.commands.COMMANDED_EQUIV_RATIO,
|
| 58 |
-
obd.commands.O2_B1S2,
|
| 59 |
-
obd.commands.O2_B2S2,
|
| 60 |
-
obd.commands.O2_S1_WR_VOLTAGE,
|
| 61 |
-
obd.commands.COMMANDED_EGR,
|
| 62 |
]
|
| 63 |
|
|
|
|
|
|
|
|
|
|
| 64 |
ALL_PIDS_TO_LOG = HIGH_FREQUENCY_PIDS + LOW_FREQUENCY_PIDS_POOL
|
| 65 |
|
| 66 |
CSV_FILENAME_BASE = "obd_data_log"
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
DUPLICATE_CSV_DIR = os.path.join(LOGS_BASE_DIR, "DuplicateCSV")
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
WIFI_PROTOCOL = "6"
|
| 76 |
-
USE_WIFI_SETTINGS = False # using socat to mimic serial connection
|
| 77 |
|
| 78 |
def get_pid_value(connection, pid_command):
|
| 79 |
-
"""Queries a PID and returns its value
|
| 80 |
try:
|
| 81 |
response = connection.query(pid_command, force=True)
|
| 82 |
if response.is_null() or response.value is None:
|
|
@@ -87,47 +61,98 @@ def get_pid_value(connection, pid_command):
|
|
| 87 |
except Exception as e:
|
| 88 |
print(f"Error querying {pid_command.name}: {e}")
|
| 89 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
-
def perform_logging_session():
|
| 92 |
-
|
| 93 |
-
print("
|
| 94 |
-
print("
|
|
|
|
|
|
|
| 95 |
|
| 96 |
-
|
| 97 |
-
initial_driving_style = ""
|
| 98 |
-
initial_road_type = ""
|
| 99 |
-
initial_traffic_condition = ""
|
| 100 |
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
# Prepare Low-Frequency PID groups
|
| 106 |
-
low_frequency_pid_groups = []
|
| 107 |
-
if LOW_FREQUENCY_PIDS_POOL:
|
| 108 |
-
chunk_size = (len(LOW_FREQUENCY_PIDS_POOL) + NUM_LOW_FREQUENCY_GROUPS - 1) // NUM_LOW_FREQUENCY_GROUPS
|
| 109 |
-
for i in range(0, len(LOW_FREQUENCY_PIDS_POOL), chunk_size):
|
| 110 |
-
low_frequency_pid_groups.append(LOW_FREQUENCY_PIDS_POOL[i:i + chunk_size])
|
| 111 |
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
current_low_frequency_group_index = 0
|
| 118 |
|
| 119 |
current_pid_values = {pid.name: '' for pid in ALL_PIDS_TO_LOG}
|
| 120 |
|
| 121 |
-
|
| 122 |
-
for dir_path in [ORIGINAL_CSV_DIR, DUPLICATE_CSV_DIR]: # Add ANALYZED_OUTPUT_DIR if used
|
| 123 |
try:
|
| 124 |
os.makedirs(dir_path, exist_ok=True)
|
| 125 |
print(f"Ensured directory exists: {dir_path}")
|
| 126 |
except OSError as e:
|
| 127 |
print(f"Error creating directory {dir_path}: {e}. Attempting to use current directory.")
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
print("Cannot create original log directory. Exiting.")
|
| 131 |
return None
|
| 132 |
|
| 133 |
current_session_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
@@ -135,81 +160,72 @@ def perform_logging_session():
|
|
| 135 |
original_csv_filepath = os.path.join(ORIGINAL_CSV_DIR, csv_file_name_only)
|
| 136 |
|
| 137 |
try:
|
| 138 |
-
if
|
| 139 |
-
print(
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
fast=False,
|
| 144 |
-
timeout=30)
|
| 145 |
-
else:
|
| 146 |
-
print("Attempting to connect via socat PTY /dev/ttys011...")
|
| 147 |
-
connection = obd.OBD("/dev/ttys086", fast=True, timeout=30) # Auto-scan for USB/Bluetooth
|
| 148 |
-
|
| 149 |
-
if not connection.is_connected():
|
| 150 |
-
print("Failed to connect to OBD-II adapter.")
|
| 151 |
-
print(f"Connection status: {connection.status()}")
|
| 152 |
-
return None
|
| 153 |
-
|
| 154 |
-
print(f"Successfully connected to OBD-II adapter: {connection.port_name()}")
|
| 155 |
-
print(f"Adapter status: {connection.status()}")
|
| 156 |
-
print(f"Supported PIDs (sample):")
|
| 157 |
-
supported_commands = connection.supported_commands
|
| 158 |
-
for i, cmd in enumerate(supported_commands):
|
| 159 |
-
print(f" - {cmd.name}")
|
| 160 |
-
if not supported_commands:
|
| 161 |
-
print("No commands")
|
| 162 |
|
| 163 |
# Creating initial full PID sample to have fully populated rows from beginning
|
| 164 |
print("\nPerforming initial full PID sample...")
|
| 165 |
initial_log_entry = {
|
| 166 |
-
'timestamp': datetime.datetime.now().isoformat()
|
| 167 |
-
'driving_style': initial_driving_style,
|
| 168 |
-
'road_type': initial_road_type,
|
| 169 |
-
'traffic_condition': initial_traffic_condition
|
| 170 |
}
|
| 171 |
|
| 172 |
-
print("Polling initial
|
| 173 |
-
for pid_command in
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
for pid_obj in ALL_PIDS_TO_LOG:
|
| 189 |
if pid_obj.name not in initial_log_entry:
|
| 190 |
-
initial_log_entry[pid_obj.name] = ''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
except Exception as e:
|
| 193 |
print(f"An error occurred during connection or initial PID sample: {e}")
|
| 194 |
if connection and connection.is_connected():
|
| 195 |
connection.close()
|
| 196 |
-
return None
|
| 197 |
|
| 198 |
file_exists = os.path.isfile(original_csv_filepath)
|
| 199 |
try:
|
| 200 |
with open(original_csv_filepath, 'a', newline='') as csvfile:
|
| 201 |
-
|
| 202 |
-
header_names = ['timestamp',
|
| 203 |
-
'driving_style', 'road_type', 'traffic_condition', # Original placeholder columns
|
| 204 |
-
'driving_style_analyzed', 'road_type_analyzed', 'traffic_condition_analyzed' # For analyzer
|
| 205 |
-
] + [pid.name for pid in ALL_PIDS_TO_LOG]
|
| 206 |
-
|
| 207 |
-
# Remove duplicates if any PID name is already in the first part
|
| 208 |
-
processed_headers = []
|
| 209 |
-
for item in header_names:
|
| 210 |
-
if item not in processed_headers:
|
| 211 |
-
processed_headers.append(item)
|
| 212 |
-
header_names = processed_headers
|
| 213 |
|
| 214 |
writer = csv.DictWriter(csvfile, fieldnames=header_names)
|
| 215 |
|
|
@@ -218,74 +234,106 @@ def perform_logging_session():
|
|
| 218 |
print(f"Created new CSV file: {original_csv_filepath} with headers: {header_names}")
|
| 219 |
|
| 220 |
if initial_log_entry:
|
| 221 |
-
# Add placeholder columns for analyzer to the initial entry
|
| 222 |
-
initial_log_entry['driving_style_analyzed'] = ''
|
| 223 |
-
initial_log_entry['road_type_analyzed'] = ''
|
| 224 |
-
initial_log_entry['traffic_condition_analyzed'] = ''
|
| 225 |
writer.writerow(initial_log_entry)
|
| 226 |
csvfile.flush()
|
| 227 |
-
print(f"Logged initial full sample
|
| 228 |
|
| 229 |
-
|
| 230 |
-
|
| 231 |
|
| 232 |
-
print(f"
|
| 233 |
-
print(f"Polling one group of low-frequency PIDs every {LOW_FREQUENCY_GROUP_POLL_INTERVAL} second(s).")
|
| 234 |
-
print(f"Low-frequency PIDs divided into {len(low_frequency_pid_groups)} groups.")
|
| 235 |
|
| 236 |
-
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
loop_start_time = time.monotonic()
|
| 239 |
current_datetime = datetime.datetime.now()
|
| 240 |
timestamp_iso = current_datetime.isoformat()
|
| 241 |
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
current_pid_values[pid_command.name] = value if value is not None else ''
|
| 246 |
-
if value is not None:
|
| 247 |
-
hf_reads += 1
|
| 248 |
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
|
|
|
| 256 |
value = get_pid_value(connection, pid_command)
|
| 257 |
current_pid_values[pid_command.name] = value if value is not None else ''
|
| 258 |
if value is not None:
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
|
| 267 |
final_log_entry = {
|
| 268 |
-
'timestamp': timestamp_iso
|
| 269 |
-
'driving_style': initial_driving_style,
|
| 270 |
-
'road_type': initial_road_type,
|
| 271 |
-
'traffic_condition': initial_traffic_condition,
|
| 272 |
-
'driving_style_analyzed': '',
|
| 273 |
-
'road_type_analyzed': '',
|
| 274 |
-
'traffic_condition_analyzed': ''
|
| 275 |
}
|
| 276 |
-
# Add all PID values for this cycle from current_pid_values
|
| 277 |
for pid_obj in ALL_PIDS_TO_LOG:
|
| 278 |
final_log_entry[pid_obj.name] = current_pid_values.get(pid_obj.name, '')
|
| 279 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
writer.writerow(final_log_entry)
|
| 281 |
csvfile.flush()
|
| 282 |
|
| 283 |
log_count += 1
|
| 284 |
if log_count % 10 == 0:
|
| 285 |
-
status_msg = f"
|
| 286 |
-
if
|
| 287 |
-
|
| 288 |
-
|
|
|
|
|
|
|
| 289 |
|
| 290 |
elapsed_time_in_loop = time.monotonic() - loop_start_time
|
| 291 |
sleep_duration = max(0, BASE_LOG_INTERVAL - elapsed_time_in_loop)
|
|
@@ -296,79 +344,188 @@ def perform_logging_session():
|
|
| 296 |
except Exception as e:
|
| 297 |
print(f"An error occurred during logging: {e}")
|
| 298 |
finally:
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
connection.close()
|
| 302 |
-
print(f"Data logging stopped. Original CSV file '{original_csv_filepath}' saved.")
|
| 303 |
|
| 304 |
-
return original_csv_filepath
|
| 305 |
|
| 306 |
-
def
|
| 307 |
-
if not
|
| 308 |
-
print(
|
| 309 |
return None
|
| 310 |
|
| 311 |
-
|
| 312 |
-
|
|
|
|
| 313 |
|
| 314 |
-
|
| 315 |
-
original_filename = os.path.basename(original_filepath)
|
| 316 |
-
base, ext = os.path.splitext(original_filename)
|
| 317 |
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
duplicate_filepath = os.path.join(DUPLICATE_CSV_DIR, duplicate_filename)
|
| 321 |
|
| 322 |
try:
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
except Exception as e:
|
| 327 |
-
print(f"Error
|
|
|
|
|
|
|
| 328 |
return None
|
| 329 |
|
| 330 |
-
def run_analyzer_on_csv(csv_to_analyze_path):
|
| 331 |
-
if not csv_to_analyze_path or not os.path.exists(csv_to_analyze_path):
|
| 332 |
-
print(f"Error: Analyzer input CSV not found: {csv_to_analyze_path}")
|
| 333 |
-
return
|
| 334 |
|
| 335 |
-
|
| 336 |
-
|
|
|
|
| 337 |
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
|
| 342 |
-
analyzed_file_basename = os.path.basename(csv_to_analyze_path).replace("_to_analyze.csv", "_final_analyzed.csv")
|
| 343 |
-
final_output_path = os.path.join(DUPLICATE_CSV_DIR, analyzed_file_basename)
|
| 344 |
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
|
| 353 |
-
print(f"Running analyzer: {' '.join(command)}")
|
| 354 |
try:
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
|
| 364 |
-
if __name__ == "__main__":
|
| 365 |
-
original_log_file = perform_logging_session()
|
| 366 |
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
if duplicated_log_file:
|
| 371 |
-
run_analyzer_on_csv(duplicated_log_file)
|
| 372 |
-
print(f"Process complete. Original log: {original_log_file}, Analyzed log copy: {duplicated_log_file}")
|
| 373 |
-
else:
|
| 374 |
-
print("OBD logging did not produce a valid CSV file. Skipping analysis.")
|
|
|
|
| 3 |
import datetime
|
| 4 |
import csv
|
| 5 |
import os
|
|
|
|
|
|
|
| 6 |
import shutil
|
| 7 |
import subprocess
|
| 8 |
+
import sys
|
| 9 |
+
import select
|
| 10 |
+
|
| 11 |
+
try:
|
| 12 |
+
from logging_wrapper import auto_score_on_completion
|
| 13 |
+
SCORING_AVAILABLE = True
|
| 14 |
+
print("Auto-scoring module loaded")
|
| 15 |
+
except ImportError:
|
| 16 |
+
SCORING_AVAILABLE = False
|
| 17 |
+
print("Auto-scoring module not found - scoring will be skipped")
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
CRITICAL_FUEL_PIDS = [
|
| 21 |
+
obd.commands.RPM,
|
| 22 |
+
obd.commands.SPEED,
|
| 23 |
+
obd.commands.THROTTLE_POS,
|
| 24 |
+
obd.commands.MAF,
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
SECONDARY_FUEL_PIDS = [
|
| 28 |
+
obd.commands.ENGINE_LOAD,
|
| 29 |
+
obd.commands.INTAKE_PRESSURE,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
]
|
| 31 |
|
| 32 |
+
TERTIARY_FUEL_PIDS = [
|
| 33 |
+
obd.commands.SHORT_FUEL_TRIM_1,
|
| 34 |
+
obd.commands.SHORT_FUEL_TRIM_2,
|
| 35 |
+
obd.commands.LONG_FUEL_TRIM_1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
obd.commands.LONG_FUEL_TRIM_2,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
]
|
| 38 |
|
| 39 |
+
HIGH_FREQUENCY_PIDS = CRITICAL_FUEL_PIDS
|
| 40 |
+
LOW_FREQUENCY_PIDS_POOL = SECONDARY_FUEL_PIDS + TERTIARY_FUEL_PIDS
|
| 41 |
+
|
| 42 |
ALL_PIDS_TO_LOG = HIGH_FREQUENCY_PIDS + LOW_FREQUENCY_PIDS_POOL
|
| 43 |
|
| 44 |
CSV_FILENAME_BASE = "obd_data_log"
|
| 45 |
+
LOGS_BASE_DIR = os.path.join(os.path.dirname(__file__), "..", "..", "logs")
|
| 46 |
+
FUEL_LOGS_DIR = os.path.join(LOGS_BASE_DIR, "FuelLogs")
|
| 47 |
+
ANALYSED_LOGS_DIR = os.path.join(LOGS_BASE_DIR, "analysedLogsAutomated")
|
|
|
|
| 48 |
|
| 49 |
+
SCORED_LOGS_DIR = os.path.join(LOGS_BASE_DIR, "ScoredLogs")
|
| 50 |
+
ORIGINAL_CSV_DIR = FUEL_LOGS_DIR
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
def get_pid_value(connection, pid_command):
|
| 53 |
+
"""Queries a PID and returns its value"""
|
| 54 |
try:
|
| 55 |
response = connection.query(pid_command, force=True)
|
| 56 |
if response.is_null() or response.value is None:
|
|
|
|
| 61 |
except Exception as e:
|
| 62 |
print(f"Error querying {pid_command.name}: {e}")
|
| 63 |
return None
|
| 64 |
+
|
| 65 |
+
ef calculate_fuel_metrics(csv_path):
|
| 66 |
+
"""Calculate fuel consumption and efficiency from MAF and SPEED data."""
|
| 67 |
+
try:
|
| 68 |
+
df = pd.read_csv(csv_path)
|
| 69 |
+
|
| 70 |
+
# Constants
|
| 71 |
+
AFR = 14.7 # Air-Fuel Ratio for petrol
|
| 72 |
+
FUEL_DENSITY = 737 # gg/ for petrol
|
| 73 |
+
|
| 74 |
+
# Calculate time delta between rows (in seconds)
|
| 75 |
+
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
| 76 |
+
df['time_delta'] = df['timestamp'].diff().dt.total_seconds()
|
| 77 |
+
df.loc[0, 'time_delta'] = 0 # First row has no previous row
|
| 78 |
+
|
| 79 |
+
# Calculate instantaneous fuel rate (L/hr) from MAF
|
| 80 |
+
df['fuel_rate_L_per_hr'] = (df['MAF'] * 3600) / (AFR * FUEL_DENSITY)
|
| 81 |
+
|
| 82 |
+
# Calculate fuel used in this time interval (L)
|
| 83 |
+
df['fuel_used_interval'] = (df['fuel_rate_L_per_hr'] / 3600) * df['time_delta']
|
| 84 |
+
|
| 85 |
+
# Calculate distance traveled in this interval (km)
|
| 86 |
+
df['distance_interval'] = (df['SPEED'] / 3600) * df['time_delta']
|
| 87 |
+
|
| 88 |
+
# Calculate cumulative values
|
| 89 |
+
df['Fuel_Used'] = df['fuel_used_interval'].cumsum()
|
| 90 |
+
df['Distance'] = df['distance_interval'].cumsum()
|
| 91 |
+
|
| 92 |
+
# Calculate fuel efficiency (L/100km)
|
| 93 |
+
df['Fuel_efficiency (L/100km)'] = np.where(
|
| 94 |
+
df['Distance'] > 0,
|
| 95 |
+
(df['Fuel_Used'] / df['Distance']) * 100,
|
| 96 |
+
0
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
df['Fuel_Used'] = df['Fuel_Used'].round(3)
|
| 100 |
+
df['Distance'] = df['Distance'].round(2)
|
| 101 |
+
df['Fuel_efficiency (L/100km)'] = df['Fuel_efficiency (L/100km)'].round(2)
|
| 102 |
+
|
| 103 |
+
# Drop intermediate calculation columns
|
| 104 |
+
df = df.drop(columns=['time_delta', 'fuel_rate_L_per_hr',
|
| 105 |
+
'fuel_used_interval', 'distance_interval'])
|
| 106 |
+
|
| 107 |
+
# Save back to CSV
|
| 108 |
+
df.to_csv(csv_path, index=False)
|
| 109 |
+
|
| 110 |
+
# Print summary
|
| 111 |
+
total_fuel = df['Fuel_Used'].iloc[-1]
|
| 112 |
+
total_distance = df['Distance'].iloc[-1]
|
| 113 |
+
avg_efficiency = df['Fuel_efficiency (L/100km)'].iloc[-1]
|
| 114 |
+
|
| 115 |
+
print(f"Total Fuel Used: {total_fuel:.3f} L")
|
| 116 |
+
print(f"Total Distance: {total_distance:.2f} km")
|
| 117 |
+
print(f"Average Efficiency: {avg_efficiency:.2f} L/100km")
|
| 118 |
+
|
| 119 |
+
return csv_path
|
| 120 |
+
|
| 121 |
+
except Exception as e:
|
| 122 |
+
print(f"Error calculating fuel metrics: {e}")
|
| 123 |
+
import traceback
|
| 124 |
+
traceback.print_exc()
|
| 125 |
+
return None
|
| 126 |
+
|
| 127 |
|
| 128 |
+
def perform_logging_session(connection):
|
| 129 |
+
"""Perform a single logging session with an existing OBD connection."""
|
| 130 |
+
print(f"\nStarting new fuel efficiency logging session")
|
| 131 |
+
print("Commands:")
|
| 132 |
+
print(" - Type 'next' and press Enter to finish this drive and start a new one")
|
| 133 |
+
print(" - Type 'quit' and press Enter to stop all logging")
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
+
CRITICAL_PID_INTERVAL = 0.65
|
| 137 |
+
SECONDARY_PID_INTERVAL = 2.0
|
| 138 |
+
TERTIARY_PID_INTERVAL = 5.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
+
last_critical_poll_time = time.monotonic() - CRITICAL_PID_INTERVAL
|
| 141 |
+
last_secondary_poll_time = time.monotonic() - SECONDARY_PID_INTERVAL
|
| 142 |
+
last_tertiary_poll_time = time.monotonic() - TERTIARY_PID_INTERVAL
|
| 143 |
+
|
| 144 |
+
BASE_LOG_INTERVAL = CRITICAL_PID_INTERVAL
|
|
|
|
| 145 |
|
| 146 |
current_pid_values = {pid.name: '' for pid in ALL_PIDS_TO_LOG}
|
| 147 |
|
| 148 |
+
for dir_path in [FUEL_LOGS_DIR, ANALYSED_LOGS_DIR, SCORED_LOGS_DIR]:
|
|
|
|
| 149 |
try:
|
| 150 |
os.makedirs(dir_path, exist_ok=True)
|
| 151 |
print(f"Ensured directory exists: {dir_path}")
|
| 152 |
except OSError as e:
|
| 153 |
print(f"Error creating directory {dir_path}: {e}. Attempting to use current directory.")
|
| 154 |
+
if dir_path == FUEL_LOGS_DIR:
|
| 155 |
+
print("Cannot create fuel log directory. Exiting.")
|
|
|
|
| 156 |
return None
|
| 157 |
|
| 158 |
current_session_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
|
| 160 |
original_csv_filepath = os.path.join(ORIGINAL_CSV_DIR, csv_file_name_only)
|
| 161 |
|
| 162 |
try:
|
| 163 |
+
if not connection or not connection.is_connected():
|
| 164 |
+
print("OBD connection not available")
|
| 165 |
+
return None, "quit"
|
| 166 |
+
|
| 167 |
+
print(f"Using existing OBD connection: {connection.port_name()}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
# Creating initial full PID sample to have fully populated rows from beginning
|
| 170 |
print("\nPerforming initial full PID sample...")
|
| 171 |
initial_log_entry = {
|
| 172 |
+
'timestamp': datetime.datetime.now().isoformat()
|
|
|
|
|
|
|
|
|
|
| 173 |
}
|
| 174 |
|
| 175 |
+
print("Polling initial Critical Fuel PIDs...")
|
| 176 |
+
for pid_command in CRITICAL_FUEL_PIDS:
|
| 177 |
+
try:
|
| 178 |
+
value = get_pid_value(connection, pid_command)
|
| 179 |
+
current_pid_values[pid_command.name] = value if value is not None else ''
|
| 180 |
+
initial_log_entry[pid_command.name] = current_pid_values[pid_command.name]
|
| 181 |
+
except Exception as e:
|
| 182 |
+
print(f"Warning: Failed to get {pid_command.name}: {e}")
|
| 183 |
+
current_pid_values[pid_command.name] = ''
|
| 184 |
+
initial_log_entry[pid_command.name] = ''
|
| 185 |
+
|
| 186 |
+
print("Polling initial Secondary Fuel PIDs...")
|
| 187 |
+
for pid_command in SECONDARY_FUEL_PIDS:
|
| 188 |
+
try:
|
| 189 |
+
value = get_pid_value(connection, pid_command)
|
| 190 |
+
current_pid_values[pid_command.name] = value if value is not None else ''
|
| 191 |
+
initial_log_entry[pid_command.name] = current_pid_values[pid_command.name]
|
| 192 |
+
except Exception as e:
|
| 193 |
+
print(f"Warning: Failed to get {pid_command.name}: {e}")
|
| 194 |
+
current_pid_values[pid_command.name] = ''
|
| 195 |
+
initial_log_entry[pid_command.name] = ''
|
| 196 |
+
|
| 197 |
+
print("Polling initial Tertiary Fuel PIDs...")
|
| 198 |
+
for pid_command in TERTIARY_FUEL_PIDS:
|
| 199 |
+
try:
|
| 200 |
+
value = get_pid_value(connection, pid_command)
|
| 201 |
+
current_pid_values[pid_command.name] = value if value is not None else ''
|
| 202 |
+
initial_log_entry[pid_command.name] = current_pid_values[pid_command.name]
|
| 203 |
+
except Exception as e:
|
| 204 |
+
print(f"Warning: Failed to get {pid_command.name}: {e}")
|
| 205 |
+
current_pid_values[pid_command.name] = ''
|
| 206 |
+
initial_log_entry[pid_command.name] = ''
|
| 207 |
|
| 208 |
for pid_obj in ALL_PIDS_TO_LOG:
|
| 209 |
if pid_obj.name not in initial_log_entry:
|
| 210 |
+
initial_log_entry[pid_obj.name] = ''
|
| 211 |
+
|
| 212 |
+
# Empty driving style and fuel columns
|
| 213 |
+
initial_log_entry['Driving_style'] = ''
|
| 214 |
+
initial_log_entry['Fuel_efficiency (L/100km)'] = ''
|
| 215 |
+
initial_log_entry['Distance'] = ''
|
| 216 |
+
initial_log_entry['Fuel_Used'] = ''
|
| 217 |
+
initial_log_entry['Route'] = ''
|
| 218 |
|
| 219 |
except Exception as e:
|
| 220 |
print(f"An error occurred during connection or initial PID sample: {e}")
|
| 221 |
if connection and connection.is_connected():
|
| 222 |
connection.close()
|
| 223 |
+
return None, "quit"
|
| 224 |
|
| 225 |
file_exists = os.path.isfile(original_csv_filepath)
|
| 226 |
try:
|
| 227 |
with open(original_csv_filepath, 'a', newline='') as csvfile:
|
| 228 |
+
header_names = ['timestamp'] + [pid.name for pid in ALL_PIDS_TO_LOG] + ['Driving_style', 'Fuel_efficiency (L/100km)', 'Distance', 'Fuel_Used', 'Route']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
writer = csv.DictWriter(csvfile, fieldnames=header_names)
|
| 231 |
|
|
|
|
| 234 |
print(f"Created new CSV file: {original_csv_filepath} with headers: {header_names}")
|
| 235 |
|
| 236 |
if initial_log_entry:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
writer.writerow(initial_log_entry)
|
| 238 |
csvfile.flush()
|
| 239 |
+
print(f"Logged initial full sample with all fuel efficiency PIDs.")
|
| 240 |
|
| 241 |
+
log_count = 0
|
| 242 |
+
user_stop_requested = False
|
| 243 |
|
| 244 |
+
print(f"Started logging")
|
|
|
|
|
|
|
| 245 |
|
| 246 |
+
while not user_stop_requested:
|
| 247 |
+
if log_count % 100 == 0 and log_count > 0:
|
| 248 |
+
print(f"Debug: Main loop running, iteration {log_count}")
|
| 249 |
+
|
| 250 |
+
# Check for non-blocking input
|
| 251 |
+
if select.select([sys.stdin], [], [], 0.0)[0]:
|
| 252 |
+
user_command = sys.stdin.readline().strip().lower()
|
| 253 |
+
if user_command == "next":
|
| 254 |
+
print("\nUser typed 'next'. Finishing current drive...")
|
| 255 |
+
user_stop_requested = True
|
| 256 |
+
break
|
| 257 |
+
elif user_command == "quit":
|
| 258 |
+
print("\nUser typed 'quit'. Stopping all logging...")
|
| 259 |
+
user_stop_requested = True
|
| 260 |
+
return original_csv_filepath, "quit"
|
| 261 |
+
else:
|
| 262 |
+
print(f"Input detected: '{user_command}'. Type 'next' or 'quit'.", end='\r')
|
| 263 |
+
|
| 264 |
loop_start_time = time.monotonic()
|
| 265 |
current_datetime = datetime.datetime.now()
|
| 266 |
timestamp_iso = current_datetime.isoformat()
|
| 267 |
|
| 268 |
+
critical_reads = 0
|
| 269 |
+
secondary_reads = 0
|
| 270 |
+
tertiary_reads = 0
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
+
# Always poll critical PIDs (highest frequency)
|
| 273 |
+
if (time.monotonic() - last_critical_poll_time) >= CRITICAL_PID_INTERVAL:
|
| 274 |
+
if not connection or not connection.is_connected():
|
| 275 |
+
print("\nOBD connection lost during logging. Ending session.")
|
| 276 |
+
user_stop_requested = True
|
| 277 |
+
break
|
| 278 |
+
|
| 279 |
+
for pid_command in CRITICAL_FUEL_PIDS:
|
| 280 |
value = get_pid_value(connection, pid_command)
|
| 281 |
current_pid_values[pid_command.name] = value if value is not None else ''
|
| 282 |
if value is not None:
|
| 283 |
+
critical_reads += 1
|
| 284 |
+
last_critical_poll_time = time.monotonic()
|
| 285 |
+
|
| 286 |
+
# Poll secondary PIDs at medium frequency
|
| 287 |
+
if (time.monotonic() - last_secondary_poll_time) >= SECONDARY_PID_INTERVAL:
|
| 288 |
+
if not connection or not connection.is_connected():
|
| 289 |
+
print("\nOBD connection lost during logging. Ending session.")
|
| 290 |
+
user_stop_requested = True
|
| 291 |
+
break
|
| 292 |
|
| 293 |
+
for pid_command in SECONDARY_FUEL_PIDS:
|
| 294 |
+
value = get_pid_value(connection, pid_command)
|
| 295 |
+
current_pid_values[pid_command.name] = value if value is not None else ''
|
| 296 |
+
if value is not None:
|
| 297 |
+
secondary_reads += 1
|
| 298 |
+
last_secondary_poll_time = time.monotonic()
|
| 299 |
+
|
| 300 |
+
# Poll tertiary PIDs at low frequency
|
| 301 |
+
if (time.monotonic() - last_tertiary_poll_time) >= TERTIARY_PID_INTERVAL:
|
| 302 |
+
if not connection or not connection.is_connected():
|
| 303 |
+
print("\nOBD connection lost during logging. Ending session.")
|
| 304 |
+
user_stop_requested = True
|
| 305 |
+
break
|
| 306 |
+
|
| 307 |
+
for pid_command in TERTIARY_FUEL_PIDS:
|
| 308 |
+
value = get_pid_value(connection, pid_command)
|
| 309 |
+
current_pid_values[pid_command.name] = value if value is not None else ''
|
| 310 |
+
if value is not None:
|
| 311 |
+
tertiary_reads += 1
|
| 312 |
+
last_tertiary_poll_time = time.monotonic()
|
| 313 |
|
| 314 |
final_log_entry = {
|
| 315 |
+
'timestamp': timestamp_iso
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
}
|
|
|
|
| 317 |
for pid_obj in ALL_PIDS_TO_LOG:
|
| 318 |
final_log_entry[pid_obj.name] = current_pid_values.get(pid_obj.name, '')
|
| 319 |
|
| 320 |
+
final_log_entry['Driving_style'] = ''
|
| 321 |
+
final_log_entry['Fuel_efficiency (L/100km)'] = ''
|
| 322 |
+
final_log_entry['Distance'] = ''
|
| 323 |
+
final_log_entry['Fuel_Used'] = ''
|
| 324 |
+
final_log_entry['Route'] = ''
|
| 325 |
+
|
| 326 |
writer.writerow(final_log_entry)
|
| 327 |
csvfile.flush()
|
| 328 |
|
| 329 |
log_count += 1
|
| 330 |
if log_count % 10 == 0:
|
| 331 |
+
status_msg = f"Entry {log_count} - Critical: {critical_reads}/{len(CRITICAL_FUEL_PIDS)}"
|
| 332 |
+
if secondary_reads > 0:
|
| 333 |
+
status_msg += f" Secondary: {secondary_reads}/{len(SECONDARY_FUEL_PIDS)}"
|
| 334 |
+
if tertiary_reads > 0:
|
| 335 |
+
status_msg += f" Tertiary: {tertiary_reads}/{len(TERTIARY_FUEL_PIDS)}"
|
| 336 |
+
print(status_msg + " " * 20, end='\r')
|
| 337 |
|
| 338 |
elapsed_time_in_loop = time.monotonic() - loop_start_time
|
| 339 |
sleep_duration = max(0, BASE_LOG_INTERVAL - elapsed_time_in_loop)
|
|
|
|
| 344 |
except Exception as e:
|
| 345 |
print(f"An error occurred during logging: {e}")
|
| 346 |
finally:
|
| 347 |
+
print(" " * 100, end='\r')
|
| 348 |
+
print(f"Drive completed - data saved to: {os.path.basename(original_csv_filepath)}")
|
|
|
|
|
|
|
| 349 |
|
| 350 |
+
return original_csv_filepath, "next"
|
| 351 |
|
| 352 |
+
def run_scorer_on_csv(original_csv_path):
|
| 353 |
+
if not SCORING_AVAILABLE:
|
| 354 |
+
print("Scoring module not available, skipping aggressiveness scoring")
|
| 355 |
return None
|
| 356 |
|
| 357 |
+
if not original_csv_path or not os.path.exists(original_csv_path):
|
| 358 |
+
print(f"Error: Original CSV not found for scoring: {original_csv_path}")
|
| 359 |
+
return None
|
| 360 |
|
| 361 |
+
print(f"\nRunning aggressiveness scorer...")
|
|
|
|
|
|
|
| 362 |
|
| 363 |
+
original_filename = os.path.basename(original_csv_path)
|
| 364 |
+
base, ext = os.path.splitext(original_filename)
|
|
|
|
| 365 |
|
| 366 |
try:
|
| 367 |
+
# Import and configure the scorer
|
| 368 |
+
from driving_aggressiveness_scorer import DrivingAggressivenessScorer
|
| 369 |
+
import json
|
| 370 |
+
|
| 371 |
+
# Initialize scorer with bounds file in logs directory
|
| 372 |
+
bounds_file = os.path.join(LOGS_BASE_DIR, 'obd_bounds.json')
|
| 373 |
+
scorer = DrivingAggressivenessScorer(bounds_file=bounds_file)
|
| 374 |
+
|
| 375 |
+
# Run analysis
|
| 376 |
+
df_scored, results = scorer.analyze_drive(str(original_csv_path), update_bounds=True)
|
| 377 |
+
|
| 378 |
+
df_scored['drive_score'] = results['final_score']
|
| 379 |
+
|
| 380 |
+
# Save scored CSV to ScoredLogs directory
|
| 381 |
+
scored_csv_path = os.path.join(SCORED_LOGS_DIR, f"{base}_scored{ext}")
|
| 382 |
+
df_scored.to_csv(scored_csv_path, index=False)
|
| 383 |
+
print(f"Scored CSV saved: {os.path.basename(scored_csv_path)}")
|
| 384 |
+
|
| 385 |
+
# Save summary JSON to ScoredLogs directory
|
| 386 |
+
summary_json_path = os.path.join(SCORED_LOGS_DIR, f"{base}_score_summary.json")
|
| 387 |
+
summary = {
|
| 388 |
+
'timestamp': datetime.datetime.now().isoformat(),
|
| 389 |
+
'original_file': str(original_csv_path),
|
| 390 |
+
'scored_file': str(scored_csv_path),
|
| 391 |
+
'results': results
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
with open(summary_json_path, 'w') as f:
|
| 395 |
+
json.dump(summary, f, indent=2)
|
| 396 |
+
print(f"Score summary saved: {os.path.basename(summary_json_path)}")
|
| 397 |
+
|
| 398 |
+
try:
|
| 399 |
+
from visualiseScorer import visualize_drive
|
| 400 |
+
visualization_path = os.path.join(SCORED_LOGS_DIR, f"{base}_visualization.png")
|
| 401 |
+
visualize_drive(df_scored, results, save_path=visualization_path)
|
| 402 |
+
print(f"Visualization saved: {os.path.basename(visualization_path)}")
|
| 403 |
+
except Exception as viz_error:
|
| 404 |
+
print(f"Warning: Could not generate visualization: {viz_error}")
|
| 405 |
+
|
| 406 |
+
# Print quick summary
|
| 407 |
+
print(f"Drive Score: {results['final_score']:.1f}/100")
|
| 408 |
+
|
| 409 |
+
return scored_csv_path
|
| 410 |
+
|
| 411 |
except Exception as e:
|
| 412 |
+
print(f"Error running scorer: {e}")
|
| 413 |
+
import traceback
|
| 414 |
+
traceback.print_exc()
|
| 415 |
return None
|
| 416 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
|
| 418 |
+
def initialize_obd_connection():
|
| 419 |
+
"""Initialize OBD connection once for multiple sessions."""
|
| 420 |
+
connection = None
|
| 421 |
|
| 422 |
+
try:
|
| 423 |
+
|
| 424 |
+
print("Attempting to connect via socat PTY /dev/ttys006...")
|
| 425 |
+
connection = obd.OBD("/dev/ttys002", fast=True, timeout=30)
|
| 426 |
+
|
| 427 |
+
if not connection.is_connected():
|
| 428 |
+
print("Failed to connect to OBD-II adapter.")
|
| 429 |
+
print(f"Connection status: {connection.status()}")
|
| 430 |
+
return None
|
| 431 |
+
|
| 432 |
+
print(f"Successfully connected to OBD-II adapter: {connection.port_name()}")
|
| 433 |
+
print(f"Adapter status: {connection.status()}")
|
| 434 |
+
return connection
|
| 435 |
+
|
| 436 |
+
except Exception as e:
|
| 437 |
+
print(f"An error occurred during OBD connection: {e}")
|
| 438 |
+
return None
|
| 439 |
|
|
|
|
|
|
|
| 440 |
|
| 441 |
+
def main():
|
| 442 |
+
print("Fuel Efficiency OBD Logger - Multi-Session Mode")
|
| 443 |
+
if SCORING_AVAILABLE:
|
| 444 |
+
print("Aggressiveness scoring enabled")
|
| 445 |
+
print("=" * 50)
|
| 446 |
+
|
| 447 |
+
# Initialize OBD connection once
|
| 448 |
+
connection = initialize_obd_connection()
|
| 449 |
+
if not connection:
|
| 450 |
+
print("Could not establish OBD connection. Exiting.")
|
| 451 |
+
return
|
| 452 |
+
|
| 453 |
+
session_count = 0
|
| 454 |
+
logged_files = []
|
| 455 |
|
|
|
|
| 456 |
try:
|
| 457 |
+
while True:
|
| 458 |
+
session_count += 1
|
| 459 |
+
print(f"\n📊 Session {session_count} ready to start")
|
| 460 |
+
|
| 461 |
+
# Check if connection is still available before starting new session
|
| 462 |
+
if not connection or not connection.is_connected():
|
| 463 |
+
print("OBD connection not available. Attempting to reconnect...")
|
| 464 |
+
connection = initialize_obd_connection()
|
| 465 |
+
if not connection:
|
| 466 |
+
print("Could not re-establish OBD connection. Exiting.")
|
| 467 |
+
break
|
| 468 |
+
|
| 469 |
+
result = perform_logging_session(connection)
|
| 470 |
+
|
| 471 |
+
if isinstance(result, tuple):
|
| 472 |
+
csv_file, command = result
|
| 473 |
+
else:
|
| 474 |
+
csv_file, command = result, "quit"
|
| 475 |
+
|
| 476 |
+
# Handle the result
|
| 477 |
+
if csv_file and os.path.exists(csv_file):
|
| 478 |
+
try:
|
| 479 |
+
with open(csv_file, 'r') as f:
|
| 480 |
+
lines = f.readlines()
|
| 481 |
+
if len(lines) > 1: # More than just the header
|
| 482 |
+
logged_files.append(csv_file)
|
| 483 |
+
print(f"Drive {session_count} saved: {os.path.basename(csv_file)}")
|
| 484 |
+
|
| 485 |
+
calculate_fuel_metrics(csv_file)
|
| 486 |
+
|
| 487 |
+
print(f"\nStarting aggressiveness scoring for drive {session_count}...")
|
| 488 |
+
scored_file = run_scorer_on_csv(csv_file)
|
| 489 |
+
if scored_file:
|
| 490 |
+
print(f"Aggressiveness scoring complete for drive {session_count}")
|
| 491 |
+
else:
|
| 492 |
+
print(f"Aggressiveness scoring failed for drive {session_count}, but drive data is still saved")
|
| 493 |
+
|
| 494 |
+
else:
|
| 495 |
+
print(f"⚠️ Drive {session_count} had no data, skipping analysis")
|
| 496 |
+
os.remove(csv_file)
|
| 497 |
+
except Exception as e:
|
| 498 |
+
print(f"Error checking file {csv_file}: {e}")
|
| 499 |
+
|
| 500 |
+
# Check if user wants to quit
|
| 501 |
+
if command == "quit":
|
| 502 |
+
print("\nStopping all logging as requested")
|
| 503 |
+
break
|
| 504 |
+
|
| 505 |
+
# Otherwise continue to next session
|
| 506 |
+
print(f"\n Ready for next drive (Session {session_count + 1})")
|
| 507 |
+
|
| 508 |
+
except KeyboardInterrupt:
|
| 509 |
+
print("\n Logging stopped by user (Ctrl+C)")
|
| 510 |
+
|
| 511 |
+
finally:
|
| 512 |
+
if connection and connection.is_connected():
|
| 513 |
+
print("Closing OBD-II connection...")
|
| 514 |
+
connection.close()
|
| 515 |
+
|
| 516 |
+
print("\n" + "=" * 50)
|
| 517 |
+
print(f"📈 LOGGING SUMMARY")
|
| 518 |
+
print(f"Total drives logged: {len(logged_files)}")
|
| 519 |
+
if logged_files:
|
| 520 |
+
print("📁 Files saved to:")
|
| 521 |
+
print(" - Raw logs: logs/FuelLogs/")
|
| 522 |
+
if SCORING_AVAILABLE:
|
| 523 |
+
print(" - Scored logs: logs/ScoredLogs/")
|
| 524 |
+
print("\n📝 Files created:")
|
| 525 |
+
for file in logged_files:
|
| 526 |
+
print(f" - {os.path.basename(file)}")
|
| 527 |
+
print("=" * 50)
|
| 528 |
|
|
|
|
|
|
|
| 529 |
|
| 530 |
+
if __name__ == "__main__":
|
| 531 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
OBD/scorerConfig/scorerConfig.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
weights:
|
| 3 |
+
RPM: 0.25
|
| 4 |
+
THROTTLE_POS: 0.25
|
| 5 |
+
ENGINE_LOAD: 0.25
|
| 6 |
+
MAF: 0.25
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
# Spike Detection Thresholds
|
| 10 |
+
spike_thresholds:
|
| 11 |
+
moderate_spike: 65
|
| 12 |
+
extreme_spike: 85
|
| 13 |
+
spike_percentage_threshold: 3
|
| 14 |
+
extreme_percentage_threshold: 1
|
| 15 |
+
|
| 16 |
+
# Penalty Multipliers
|
| 17 |
+
penalty_multipliers:
|
| 18 |
+
p95_multiplier: 0.3
|
| 19 |
+
p99_multiplier: 0.5
|
| 20 |
+
spike_freq_multiplier: 2.0
|
| 21 |
+
extreme_freq_multiplier: 3.0
|
| 22 |
+
|
| 23 |
+
# Aggregate Score Calculation
|
| 24 |
+
aggregate_weights:
|
| 25 |
+
mean_weight: 0.7
|
| 26 |
+
p75_weight: 0.3
|
| 27 |
+
|
| 28 |
+
style_categories:
|
| 29 |
+
very_calm: [0, 20]
|
| 30 |
+
calm: [20, 40]
|
| 31 |
+
moderate: [40, 55]
|
| 32 |
+
aggressive: [55, 70]
|
| 33 |
+
very_aggressive: [70, 100]
|
| 34 |
+
|
| 35 |
+
bounds:
|
| 36 |
+
file: "obd_bounds.json"
|
| 37 |
+
auto_update: true # Automatically update bounds with new data
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
theoretical_maxes:
|
| 41 |
+
RPM: 6000
|
| 42 |
+
THROTTLE_POS: 100
|
| 43 |
+
ENGINE_LOAD: 100
|
| 44 |
+
MAF: 300
|
| 45 |
+
SPEED: 250
|
| 46 |
+
INTAKE_PRESSURE: 250
|
| 47 |
+
|
| 48 |
+
theoretical_mins:
|
| 49 |
+
RPM: 0
|
| 50 |
+
THROTTLE_POS: 0
|
| 51 |
+
ENGINE_LOAD: 0
|
| 52 |
+
MAF: 0
|
| 53 |
+
SPEED: 0
|
| 54 |
+
INTAKE_PRESSURE: 0
|
| 55 |
+
|
| 56 |
+
output:
|
| 57 |
+
save_scored_csv: true
|
| 58 |
+
visualization: true
|
| 59 |
+
verbose: true
|
OBD/visualiseScorer.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
from driving_aggressiveness_scorer import DrivingAggressivenessScorer
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def visualize_drive(df_scored: pd.DataFrame, results: dict, save_path: str = None):
|
| 8 |
+
"""
|
| 9 |
+
Create comprehensive visualization of drive analysis.
|
| 10 |
+
|
| 11 |
+
Args:
|
| 12 |
+
df_scored: DataFrame with aggressiveness scores
|
| 13 |
+
results: Aggregate results dictionary
|
| 14 |
+
save_path: Optional path to save figure
|
| 15 |
+
"""
|
| 16 |
+
fig, axes = plt.subplots(3, 2, figsize=(15, 12))
|
| 17 |
+
fig.suptitle(f"Drive Analysis - Score: {results['final_score']:.1f}/100",
|
| 18 |
+
fontsize=16, fontweight='bold')
|
| 19 |
+
|
| 20 |
+
# 1. Aggressiveness Score Over Time
|
| 21 |
+
ax = axes[0, 0]
|
| 22 |
+
ax.plot(df_scored['aggressiveness_score'], linewidth=1, color='#2E86AB')
|
| 23 |
+
ax.axhline(y=results['mean_score'], color='green', linestyle='--',
|
| 24 |
+
label=f"Mean: {results['mean_score']:.1f}")
|
| 25 |
+
ax.axhline(y=70, color='orange', linestyle='--', alpha=0.5, label='Spike Threshold')
|
| 26 |
+
ax.axhline(y=85, color='red', linestyle='--', alpha=0.5, label='Extreme Threshold')
|
| 27 |
+
ax.set_title('Aggressiveness Score Timeline')
|
| 28 |
+
ax.set_ylabel('Score (0-100)')
|
| 29 |
+
ax.set_xlabel('Sample Number')
|
| 30 |
+
ax.legend()
|
| 31 |
+
ax.grid(True, alpha=0.3)
|
| 32 |
+
|
| 33 |
+
# 2. Score Distribution
|
| 34 |
+
ax = axes[0, 1]
|
| 35 |
+
ax.hist(df_scored['aggressiveness_score'], bins=50, color='#A23B72', alpha=0.7, edgecolor='black')
|
| 36 |
+
ax.axvline(x=results['mean_score'], color='green', linestyle='--', linewidth=2, label='Mean')
|
| 37 |
+
ax.axvline(x=results['median_score'], color='blue', linestyle='--', linewidth=2, label='Median')
|
| 38 |
+
ax.set_title('Score Distribution')
|
| 39 |
+
ax.set_xlabel('Aggressiveness Score')
|
| 40 |
+
ax.set_ylabel('Frequency')
|
| 41 |
+
ax.legend()
|
| 42 |
+
ax.grid(True, alpha=0.3)
|
| 43 |
+
|
| 44 |
+
# 3. RPM vs Throttle Position (colored by score)
|
| 45 |
+
ax = axes[1, 0]
|
| 46 |
+
scatter = ax.scatter(df_scored['THROTTLE_POS'], df_scored['RPM'],
|
| 47 |
+
c=df_scored['aggressiveness_score'], cmap='RdYlGn_r',
|
| 48 |
+
s=10, alpha=0.6)
|
| 49 |
+
ax.set_title('RPM vs Throttle Position')
|
| 50 |
+
ax.set_xlabel('Throttle Position (%)')
|
| 51 |
+
ax.set_ylabel('RPM')
|
| 52 |
+
plt.colorbar(scatter, ax=ax, label='Aggressiveness')
|
| 53 |
+
ax.grid(True, alpha=0.3)
|
| 54 |
+
|
| 55 |
+
# 4. Speed vs Engine Load (colored by score)
|
| 56 |
+
ax = axes[1, 1]
|
| 57 |
+
scatter = ax.scatter(df_scored['SPEED'], df_scored['ENGINE_LOAD'],
|
| 58 |
+
c=df_scored['aggressiveness_score'], cmap='RdYlGn_r',
|
| 59 |
+
s=10, alpha=0.6)
|
| 60 |
+
ax.set_title('Speed vs Engine Load')
|
| 61 |
+
ax.set_xlabel('Speed (km/h)')
|
| 62 |
+
ax.set_ylabel('Engine Load (%)')
|
| 63 |
+
plt.colorbar(scatter, ax=ax, label='Aggressiveness')
|
| 64 |
+
ax.grid(True, alpha=0.3)
|
| 65 |
+
|
| 66 |
+
# 5. Key Metrics Over Time
|
| 67 |
+
ax = axes[2, 0]
|
| 68 |
+
ax2 = ax.twinx()
|
| 69 |
+
|
| 70 |
+
ln1 = ax.plot(df_scored['RPM'] / 100, label='RPM/100', color='#E63946', linewidth=0.8)
|
| 71 |
+
ln2 = ax.plot(df_scored['THROTTLE_POS'], label='Throttle %', color='#F77F00', linewidth=0.8)
|
| 72 |
+
ln3 = ax2.plot(df_scored['SPEED'], label='Speed', color='#06FFA5', linewidth=0.8)
|
| 73 |
+
|
| 74 |
+
ax.set_title('Key Metrics Timeline')
|
| 75 |
+
ax.set_xlabel('Sample Number')
|
| 76 |
+
ax.set_ylabel('RPM/100 & Throttle %')
|
| 77 |
+
ax2.set_ylabel('Speed (km/h)')
|
| 78 |
+
|
| 79 |
+
# Combine legends
|
| 80 |
+
lns = ln1 + ln2 + ln3
|
| 81 |
+
labs = [l.get_label() for l in lns]
|
| 82 |
+
ax.legend(lns, labs, loc='upper left')
|
| 83 |
+
ax.grid(True, alpha=0.3)
|
| 84 |
+
|
| 85 |
+
# 6. Score Statistics Summary
|
| 86 |
+
ax = axes[2, 1]
|
| 87 |
+
ax.axis('off')
|
| 88 |
+
|
| 89 |
+
stats_text = f"""
|
| 90 |
+
AGGREGATE SCORE BREAKDOWN
|
| 91 |
+
{'─' * 40}
|
| 92 |
+
|
| 93 |
+
Final Score: {results['final_score']:.1f} / 100
|
| 94 |
+
|
| 95 |
+
SCORE STATISTICS
|
| 96 |
+
Mean: {results['mean_score']:.1f}
|
| 97 |
+
Median: {results['median_score']:.1f}
|
| 98 |
+
Std Dev: {results['std_score']:.1f}
|
| 99 |
+
|
| 100 |
+
PERCENTILES
|
| 101 |
+
75th: {results['p75_score']:.1f}
|
| 102 |
+
90th: {results['p90_score']:.1f}
|
| 103 |
+
95th: {results['p95_score']:.1f}
|
| 104 |
+
99th: {results['p99_score']:.1f}
|
| 105 |
+
Max: {results['max_score']:.1f}
|
| 106 |
+
|
| 107 |
+
SPIKE ANALYSIS
|
| 108 |
+
Spikes (>70): {results['spike_percentage']:.1f}%
|
| 109 |
+
Extreme (>85): {results['extreme_percentage']:.1f}%
|
| 110 |
+
Spike Penalty: +{results['spike_penalty']:.1f}
|
| 111 |
+
"""
|
| 112 |
+
|
| 113 |
+
ax.text(0.1, 0.95, stats_text, transform=ax.transAxes,
|
| 114 |
+
fontfamily='monospace', fontsize=10, verticalalignment='top',
|
| 115 |
+
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))
|
| 116 |
+
|
| 117 |
+
plt.tight_layout()
|
| 118 |
+
|
| 119 |
+
if save_path:
|
| 120 |
+
plt.savefig(save_path, dpi=150, bbox_inches='tight')
|
| 121 |
+
print(f"✓ Visualization saved to {save_path}")
|
| 122 |
+
plt.close()
|
| 123 |
+
else:
|
| 124 |
+
plt.show()
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def compare_drives(scorer: DrivingAggressivenessScorer, csv_paths: list):
|
| 128 |
+
"""
|
| 129 |
+
Compare multiple drives side-by-side.
|
| 130 |
+
|
| 131 |
+
Args:
|
| 132 |
+
scorer: DrivingAggressivenessScorer instance
|
| 133 |
+
csv_paths: List of CSV file paths to compare
|
| 134 |
+
"""
|
| 135 |
+
results_list = []
|
| 136 |
+
|
| 137 |
+
for csv_path in csv_paths:
|
| 138 |
+
_, results = scorer.analyze_drive(csv_path, update_bounds=True)
|
| 139 |
+
results['file'] = csv_path
|
| 140 |
+
results_list.append(results)
|
| 141 |
+
|
| 142 |
+
# Create comparison DataFrame
|
| 143 |
+
comparison_df = pd.DataFrame(results_list)
|
| 144 |
+
|
| 145 |
+
print("\n" + "="*80)
|
| 146 |
+
print("DRIVE COMPARISON")
|
| 147 |
+
print("="*80)
|
| 148 |
+
print(comparison_df[['file', 'final_score', 'mean_score',
|
| 149 |
+
'spike_percentage', 'spike_penalty']].to_string(index=False))
|
| 150 |
+
print("="*80 + "\n")
|
| 151 |
+
|
| 152 |
+
return comparison_df
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def batch_analyze_folder(folder_path: str, pattern: str = "*.csv"):
|
| 156 |
+
|
| 157 |
+
from pathlib import Path
|
| 158 |
+
|
| 159 |
+
scorer = DrivingAggressivenessScorer()
|
| 160 |
+
csv_files = list(Path(folder_path).glob(pattern))
|
| 161 |
+
|
| 162 |
+
if not csv_files:
|
| 163 |
+
print(f"No CSV files found in {folder_path}")
|
| 164 |
+
return
|
| 165 |
+
|
| 166 |
+
print(f"Found {len(csv_files)} CSV files")
|
| 167 |
+
|
| 168 |
+
all_results = []
|
| 169 |
+
for csv_file in csv_files:
|
| 170 |
+
try:
|
| 171 |
+
df_scored, results = scorer.analyze_drive(str(csv_file), update_bounds=True)
|
| 172 |
+
results['filename'] = csv_file.name
|
| 173 |
+
all_results.append(results)
|
| 174 |
+
|
| 175 |
+
# Save individual scored file
|
| 176 |
+
output_path = csv_file.parent / f"{csv_file.stem}_scored.csv"
|
| 177 |
+
df_scored.to_csv(output_path, index=False)
|
| 178 |
+
|
| 179 |
+
except Exception as e:
|
| 180 |
+
print(f"Error processing {csv_file}: {e}")
|
| 181 |
+
continue
|
| 182 |
+
|
| 183 |
+
summary_df = pd.DataFrame(all_results)
|
| 184 |
+
summary_path = Path(folder_path) / "drive_summary_report.csv"
|
| 185 |
+
summary_df.to_csv(summary_path, index=False)
|
| 186 |
+
print(f"\n✓ Summary report saved to {summary_path}")
|
| 187 |
+
|
| 188 |
+
return summary_df
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def export_bounds_report(scorer: DrivingAggressivenessScorer, output_path: str = "bounds_report.txt"):
|
| 192 |
+
bounds = scorer.get_current_bounds()
|
| 193 |
+
|
| 194 |
+
report = []
|
| 195 |
+
report.append("="*60)
|
| 196 |
+
report.append("DRIVING AGGRESSIVENESS SCORER - BOUNDS REPORT")
|
| 197 |
+
report.append("="*60)
|
| 198 |
+
report.append(f"\nGenerated: {pd.Timestamp.now()}\n")
|
| 199 |
+
|
| 200 |
+
report.append("PARAMETER WEIGHTS:")
|
| 201 |
+
report.append("-"*60)
|
| 202 |
+
for param, weight in scorer.weights.items():
|
| 203 |
+
report.append(f"{param:20s}: {weight:.3f} ({weight*100:.1f}%)")
|
| 204 |
+
|
| 205 |
+
report.append("\n\nCURRENT BOUNDS:")
|
| 206 |
+
report.append("-"*60)
|
| 207 |
+
report.append(f"{'Parameter':<20s} {'Min':>12s} {'Max':>12s} {'Range':>12s}")
|
| 208 |
+
report.append("-"*60)
|
| 209 |
+
|
| 210 |
+
for param in scorer.weights.keys():
|
| 211 |
+
min_val = bounds[param]['min']
|
| 212 |
+
max_val = bounds[param]['max']
|
| 213 |
+
range_val = max_val - min_val
|
| 214 |
+
report.append(f"{param:<20s} {min_val:>12.2f} {max_val:>12.2f} {range_val:>12.2f}")
|
| 215 |
+
|
| 216 |
+
report.append("="*60)
|
| 217 |
+
|
| 218 |
+
report_text = "\n".join(report)
|
| 219 |
+
|
| 220 |
+
with open(output_path, 'w') as f:
|
| 221 |
+
f.write(report_text)
|
| 222 |
+
|
| 223 |
+
print(report_text)
|
| 224 |
+
print(f"\n✓ Report saved to {output_path}")
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
# Example usage
|
| 228 |
+
if __name__ == "__main__":
|
| 229 |
+
scorer = DrivingAggressivenessScorer()
|
| 230 |
+
|
| 231 |
+
csv_path = 'obd_data_log_20251012_121810.csv'
|
| 232 |
+
df_scored, results = scorer.analyze_drive(csv_path)
|
| 233 |
+
visualize_drive(df_scored, results, save_path='drive_analysis.png')
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
# Export bounds report
|
| 237 |
+
export_bounds_report(scorer)
|
README.md
CHANGED
|
@@ -11,7 +11,7 @@ short_description: OBD-logging FastAPI server with data processing pipelines
|
|
| 11 |
|
| 12 |
# OBD Logger
|
| 13 |
|
| 14 |
-
A comprehensive OBD-II data logging and processing system built with FastAPI, featuring advanced data cleaning, Google Drive integration, MongoDB storage capabilities,
|
| 15 |
|
| 16 |

|
| 17 |
|
|
@@ -24,6 +24,7 @@ A comprehensive OBD-II data logging and processing system built with FastAPI, fe
|
|
| 24 |
- Firebase for structured data storage and querying
|
| 25 |
- MongoDB Atlas for structured data storage and querying
|
| 26 |
- **Driver Behavior Classification**: XGBoost-based ML model for driving style prediction
|
|
|
|
| 27 |
- **RLHF Training System**: Continuous model improvement through human feedback
|
| 28 |
- **Data Visualization**: Automatic generation of correlation heatmaps and trend plots
|
| 29 |
- **RESTful API**: Comprehensive endpoints for data management and retrieval
|
|
@@ -45,6 +46,9 @@ The application is structured into modular components:
|
|
| 45 |
- **`rlhf.py`**: Main RLHF training pipeline for continuous model improvement
|
| 46 |
- **`OBD/`**: OBD-specific modules for data analysis and logging
|
| 47 |
- **`utils/`**: Utility modules for model management and data processing
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
## Quick Start
|
| 50 |
|
|
@@ -58,8 +62,10 @@ The application is structured into modular components:
|
|
| 58 |
- `FIREBASE_SERVICE_ACCOUNT_JSON`: Firebase connection string
|
| 59 |
- `FIREBASE_ADMIN_JSON`: Firebase Admin SDK credentials
|
| 60 |
- `HF_TOKEN`: Hugging Face authentication token
|
| 61 |
-
- `HF_MODEL_REPO`:
|
| 62 |
-
- `
|
|
|
|
|
|
|
| 63 |
|
| 64 |
3. **Run the Application**:
|
| 65 |
```bash
|
|
|
|
| 11 |
|
| 12 |
# OBD Logger
|
| 13 |
|
| 14 |
+
A comprehensive OBD-II data logging and processing system built with FastAPI, featuring advanced data cleaning, Google Drive integration, MongoDB storage capabilities, **Reinforcement Learning from Human Feedback (RLHF)** for driver behavior classification, and **fuel efficiency scoring** using machine learning models.
|
| 15 |
|
| 16 |

|
| 17 |
|
|
|
|
| 24 |
- Firebase for structured data storage and querying
|
| 25 |
- MongoDB Atlas for structured data storage and querying
|
| 26 |
- **Driver Behavior Classification**: XGBoost-based ML model for driving style prediction
|
| 27 |
+
- **Fuel Efficiency Scoring**: ML model for drive-level fuel efficiency prediction (0-100%)
|
| 28 |
- **RLHF Training System**: Continuous model improvement through human feedback
|
| 29 |
- **Data Visualization**: Automatic generation of correlation heatmaps and trend plots
|
| 30 |
- **RESTful API**: Comprehensive endpoints for data management and retrieval
|
|
|
|
| 46 |
- **`rlhf.py`**: Main RLHF training pipeline for continuous model improvement
|
| 47 |
- **`OBD/`**: OBD-specific modules for data analysis and logging
|
| 48 |
- **`utils/`**: Utility modules for model management and data processing
|
| 49 |
+
- **`efficiency/`**: Fuel efficiency model training and evaluation
|
| 50 |
+
- **`retrain.py`**: Train and upload fuel efficiency models to Hugging Face
|
| 51 |
+
- **`eval.py`**: Evaluate fuel efficiency on OBD data
|
| 52 |
|
| 53 |
## Quick Start
|
| 54 |
|
|
|
|
| 62 |
- `FIREBASE_SERVICE_ACCOUNT_JSON`: Firebase connection string
|
| 63 |
- `FIREBASE_ADMIN_JSON`: Firebase Admin SDK credentials
|
| 64 |
- `HF_TOKEN`: Hugging Face authentication token
|
| 65 |
+
- `HF_MODEL_REPO`: Driver behavior model repository (default: `BinKhoaLe1812/Driver_Behavior_OBD`)
|
| 66 |
+
- `HF_EFFICIENCY_MODEL_REPO`: Fuel efficiency model repository (default: `BinKhoaLe1812/Fuel_Efficiency_OBD`)
|
| 67 |
+
- `MODEL_DIR`: Driver behavior model directory (default: `/app/models/ul`)
|
| 68 |
+
- `EFFICIENCY_MODEL_DIR`: Fuel efficiency model directory (default: `/app/models/efficiency`)
|
| 69 |
|
| 70 |
3. **Run the Application**:
|
| 71 |
```bash
|
app.py
CHANGED
|
@@ -15,8 +15,8 @@ import numpy as np
|
|
| 15 |
import matplotlib.pyplot as plt
|
| 16 |
import seaborn as sns
|
| 17 |
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
| 18 |
-
from sklearn.impute import KNNImputer
|
| 19 |
# Utils
|
|
|
|
| 20 |
import os, datetime, json, logging, re
|
| 21 |
from datetime import timedelta
|
| 22 |
import pathlib
|
|
@@ -29,7 +29,10 @@ from data.mongo_saver import MongoSaver, save_csv_to_mongo, save_dataframe_to_mo
|
|
| 29 |
from data.firebase_saver import FirebaseSaver, save_csv_increment, save_dataframe_increment
|
| 30 |
|
| 31 |
# UL Model
|
| 32 |
-
from utils.
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
# RLHF Training
|
| 35 |
from train import RLHFTrainer
|
|
@@ -58,6 +61,7 @@ os.makedirs(CLEANED_DIR, exist_ok=True)
|
|
| 58 |
os.makedirs(PLOT_DIR, exist_ok=True)
|
| 59 |
|
| 60 |
DRIVE_STYLE = [] # latest UL predictions (string labels) — overwritten each run
|
|
|
|
| 61 |
|
| 62 |
# Init temp empty file
|
| 63 |
if not os.path.exists(RAW_CSV):
|
|
@@ -78,7 +82,7 @@ async def startup_event():
|
|
| 78 |
"""Download models on app startup"""
|
| 79 |
try:
|
| 80 |
logger.info("🚀 Starting model download...")
|
| 81 |
-
from utils.
|
| 82 |
|
| 83 |
# Load .env file if it exists
|
| 84 |
env_path = pathlib.Path(".env")
|
|
@@ -96,7 +100,20 @@ async def startup_event():
|
|
| 96 |
if success:
|
| 97 |
logger.info("✅ Models downloaded successfully on startup")
|
| 98 |
else:
|
| 99 |
-
logger.warning("⚠️
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
except Exception as e:
|
| 102 |
logger.error(f"❌ Startup model download failed: {e}")
|
|
@@ -457,6 +474,26 @@ def _process_and_save(df, norm_ts):
|
|
| 457 |
logger.info(f"✅ UL labels generated ({len(DRIVE_STYLE)}) → {labeled_path}")
|
| 458 |
except Exception as e:
|
| 459 |
logger.error(f"❌ UL labeling failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
# 10) Plots
|
| 461 |
_plot_corr(df, norm_ts)
|
| 462 |
_plot_trend(df, norm_ts)
|
|
@@ -528,28 +565,56 @@ def health():
|
|
| 528 |
def models_status():
|
| 529 |
"""Check if models are loaded and available"""
|
| 530 |
try:
|
| 531 |
-
|
| 532 |
-
|
|
|
|
| 533 |
|
| 534 |
-
|
| 535 |
-
|
| 536 |
|
| 537 |
-
for file in
|
| 538 |
-
file_path =
|
| 539 |
if file_path.exists():
|
| 540 |
-
|
| 541 |
else:
|
| 542 |
-
|
| 543 |
|
| 544 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 545 |
|
| 546 |
return {
|
| 547 |
-
"
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 553 |
}
|
| 554 |
except Exception as e:
|
| 555 |
return {
|
|
@@ -564,6 +629,17 @@ def models_status():
|
|
| 564 |
def get_events():
|
| 565 |
return PIPELINE_EVENTS
|
| 566 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 567 |
|
| 568 |
# ────── Delete event from dashboard ──────────────
|
| 569 |
@app.delete("/events/remove/{timestamp}")
|
|
@@ -845,7 +921,7 @@ async def get_latest_model_version():
|
|
| 845 |
Get the latest model version information for the UI.
|
| 846 |
"""
|
| 847 |
try:
|
| 848 |
-
from utils.
|
| 849 |
|
| 850 |
# Get the latest version from Hugging Face
|
| 851 |
latest_version = get_latest_version()
|
|
@@ -872,4 +948,4 @@ async def get_latest_model_version():
|
|
| 872 |
raise HTTPException(
|
| 873 |
status_code=500,
|
| 874 |
detail=f"Failed to get latest model version: {str(e)}"
|
| 875 |
-
)
|
|
|
|
| 15 |
import matplotlib.pyplot as plt
|
| 16 |
import seaborn as sns
|
| 17 |
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
|
|
|
| 18 |
# Utils
|
| 19 |
+
from sklearn.impute import KNNImputer
|
| 20 |
import os, datetime, json, logging, re
|
| 21 |
from datetime import timedelta
|
| 22 |
import pathlib
|
|
|
|
| 29 |
from data.firebase_saver import FirebaseSaver, save_csv_increment, save_dataframe_increment
|
| 30 |
|
| 31 |
# UL Model
|
| 32 |
+
from utils.dbehavior_labeler import ULLabeler
|
| 33 |
+
|
| 34 |
+
# Fuel Efficiency Model
|
| 35 |
+
from utils.efficiency_labeler import EfficiencyLabeler
|
| 36 |
|
| 37 |
# RLHF Training
|
| 38 |
from train import RLHFTrainer
|
|
|
|
| 61 |
os.makedirs(PLOT_DIR, exist_ok=True)
|
| 62 |
|
| 63 |
DRIVE_STYLE = [] # latest UL predictions (string labels) — overwritten each run
|
| 64 |
+
FUEL_EFFICIENCY = [] # latest fuel efficiency predictions (0-100%) — overwritten each run
|
| 65 |
|
| 66 |
# Init temp empty file
|
| 67 |
if not os.path.exists(RAW_CSV):
|
|
|
|
| 82 |
"""Download models on app startup"""
|
| 83 |
try:
|
| 84 |
logger.info("🚀 Starting model download...")
|
| 85 |
+
from utils.dbehavior_download import download_latest_models
|
| 86 |
|
| 87 |
# Load .env file if it exists
|
| 88 |
env_path = pathlib.Path(".env")
|
|
|
|
| 100 |
if success:
|
| 101 |
logger.info("✅ Models downloaded successfully on startup")
|
| 102 |
else:
|
| 103 |
+
logger.warning("⚠️ Driver behavior model download failed - some features may not work")
|
| 104 |
+
|
| 105 |
+
# Download fuel efficiency models
|
| 106 |
+
from utils.efficiency_download import download_latest_efficiency_models
|
| 107 |
+
success_efficiency = download_latest_efficiency_models()
|
| 108 |
+
if success_efficiency:
|
| 109 |
+
logger.info("✅ Fuel efficiency models downloaded successfully")
|
| 110 |
+
else:
|
| 111 |
+
logger.warning("⚠️ Fuel efficiency model download failed - some features may not work")
|
| 112 |
+
|
| 113 |
+
if success_ul or success_efficiency:
|
| 114 |
+
logger.info("✅ At least one model type downloaded successfully")
|
| 115 |
+
else:
|
| 116 |
+
logger.warning("⚠️ All model downloads failed - some features may not work")
|
| 117 |
|
| 118 |
except Exception as e:
|
| 119 |
logger.error(f"❌ Startup model download failed: {e}")
|
|
|
|
| 474 |
logger.info(f"✅ UL labels generated ({len(DRIVE_STYLE)}) → {labeled_path}")
|
| 475 |
except Exception as e:
|
| 476 |
logger.error(f"❌ UL labeling failed: {e}")
|
| 477 |
+
|
| 478 |
+
# 9.5) Fuel efficiency predictions
|
| 479 |
+
efficiency_path = None
|
| 480 |
+
try:
|
| 481 |
+
efficiency_labeler = EfficiencyLabeler.get()
|
| 482 |
+
efficiency_preds = efficiency_labeler.predict_df(df)
|
| 483 |
+
# update global FUEL_EFFICIENCY (overwrite if already exists)
|
| 484 |
+
global FUEL_EFFICIENCY
|
| 485 |
+
FUEL_EFFICIENCY = [float(p) for p in efficiency_preds]
|
| 486 |
+
# write efficiency CSV (fuel_efficiency column)
|
| 487 |
+
df_efficiency = df_for_persist.copy()
|
| 488 |
+
df_efficiency["fuel_efficiency"] = FUEL_EFFICIENCY
|
| 489 |
+
efficiency_path = os.path.join(CLEANED_DIR, f"cleaned_{norm_ts}_efficiency.csv")
|
| 490 |
+
df_efficiency.to_csv(efficiency_path, index=False)
|
| 491 |
+
df_for_persist = df_efficiency
|
| 492 |
+
# Update the global FUEL_EFFICIENCY list
|
| 493 |
+
logger.info(f"✅ Fuel efficiency scores generated ({len(FUEL_EFFICIENCY)}) → {efficiency_path}")
|
| 494 |
+
logger.info(f"📊 Drive efficiency: {FUEL_EFFICIENCY[0]:.1f}%" if FUEL_EFFICIENCY else "No efficiency score")
|
| 495 |
+
except Exception as e:
|
| 496 |
+
logger.error(f"❌ Fuel efficiency scoring failed: {e}")
|
| 497 |
# 10) Plots
|
| 498 |
_plot_corr(df, norm_ts)
|
| 499 |
_plot_trend(df, norm_ts)
|
|
|
|
| 565 |
def models_status():
|
| 566 |
"""Check if models are loaded and available"""
|
| 567 |
try:
|
| 568 |
+
# Driver behavior model status
|
| 569 |
+
ul_model_dir = pathlib.Path(os.getenv("MODEL_DIR", "/app/models/ul"))
|
| 570 |
+
ul_required_files = ["label_encoder_ul.pkl", "scaler_ul.pkl", "xgb_drivestyle_ul.pkl"]
|
| 571 |
|
| 572 |
+
ul_available_files = []
|
| 573 |
+
ul_missing_files = []
|
| 574 |
|
| 575 |
+
for file in ul_required_files:
|
| 576 |
+
file_path = ul_model_dir / file
|
| 577 |
if file_path.exists():
|
| 578 |
+
ul_available_files.append(file)
|
| 579 |
else:
|
| 580 |
+
ul_missing_files.append(file)
|
| 581 |
|
| 582 |
+
ul_status = "ready" if len(ul_available_files) == len(ul_required_files) else "loading"
|
| 583 |
+
|
| 584 |
+
# Fuel efficiency model status
|
| 585 |
+
efficiency_model_dir = pathlib.Path(os.getenv("EFFICIENCY_MODEL_DIR", "/app/models/efficiency"))
|
| 586 |
+
efficiency_required_files = ["efficiency_model.joblib"]
|
| 587 |
+
|
| 588 |
+
efficiency_available_files = []
|
| 589 |
+
efficiency_missing_files = []
|
| 590 |
+
|
| 591 |
+
for file in efficiency_required_files:
|
| 592 |
+
file_path = efficiency_model_dir / file
|
| 593 |
+
if file_path.exists():
|
| 594 |
+
efficiency_available_files.append(file)
|
| 595 |
+
else:
|
| 596 |
+
efficiency_missing_files.append(file)
|
| 597 |
+
|
| 598 |
+
efficiency_status = "ready" if len(efficiency_available_files) == len(efficiency_required_files) else "loading"
|
| 599 |
|
| 600 |
return {
|
| 601 |
+
"driver_behavior": {
|
| 602 |
+
"status": ul_status,
|
| 603 |
+
"model_directory": str(ul_model_dir),
|
| 604 |
+
"available_files": ul_available_files,
|
| 605 |
+
"missing_files": ul_missing_files,
|
| 606 |
+
"total_files": len(ul_required_files),
|
| 607 |
+
"loaded_files": len(ul_available_files)
|
| 608 |
+
},
|
| 609 |
+
"fuel_efficiency": {
|
| 610 |
+
"status": efficiency_status,
|
| 611 |
+
"model_directory": str(efficiency_model_dir),
|
| 612 |
+
"available_files": efficiency_available_files,
|
| 613 |
+
"missing_files": efficiency_missing_files,
|
| 614 |
+
"total_files": len(efficiency_required_files),
|
| 615 |
+
"loaded_files": len(efficiency_available_files)
|
| 616 |
+
},
|
| 617 |
+
"overall_status": "ready" if (ul_status == "ready" and efficiency_status == "ready") else "loading"
|
| 618 |
}
|
| 619 |
except Exception as e:
|
| 620 |
return {
|
|
|
|
| 629 |
def get_events():
|
| 630 |
return PIPELINE_EVENTS
|
| 631 |
|
| 632 |
+
@app.get("/predictions/latest")
|
| 633 |
+
def get_latest_predictions():
|
| 634 |
+
"""Get the latest driver behavior and fuel efficiency predictions"""
|
| 635 |
+
return {
|
| 636 |
+
"driver_behavior": DRIVE_STYLE,
|
| 637 |
+
"fuel_efficiency": FUEL_EFFICIENCY,
|
| 638 |
+
"timestamp": datetime.datetime.now().isoformat(),
|
| 639 |
+
"driver_behavior_count": len(DRIVE_STYLE),
|
| 640 |
+
"fuel_efficiency_count": len(FUEL_EFFICIENCY)
|
| 641 |
+
}
|
| 642 |
+
|
| 643 |
|
| 644 |
# ────── Delete event from dashboard ──────────────
|
| 645 |
@app.delete("/events/remove/{timestamp}")
|
|
|
|
| 921 |
Get the latest model version information for the UI.
|
| 922 |
"""
|
| 923 |
try:
|
| 924 |
+
from utils.dbehavior_download import get_latest_version
|
| 925 |
|
| 926 |
# Get the latest version from Hugging Face
|
| 927 |
latest_version = get_latest_version()
|
|
|
|
| 948 |
raise HTTPException(
|
| 949 |
status_code=500,
|
| 950 |
detail=f"Failed to get latest model version: {str(e)}"
|
| 951 |
+
)
|
bulk_mongo_upload.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Bulk MongoDB Upload Script for Fuel Efficiency Data
|
| 4 |
+
Processes all pending CSV files and uploads them to MongoDB when WiFi is available.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
import glob
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
|
| 13 |
+
# Load environment variables from .env file
|
| 14 |
+
try:
|
| 15 |
+
from dotenv import load_dotenv
|
| 16 |
+
load_dotenv()
|
| 17 |
+
except ImportError:
|
| 18 |
+
print("⚠️ python-dotenv not installed. Using system environment variables only.")
|
| 19 |
+
print(" Install with: pip install python-dotenv")
|
| 20 |
+
|
| 21 |
+
# Add parent directory to path to import mongo_saver
|
| 22 |
+
current_dir = os.path.dirname(__file__)
|
| 23 |
+
sys.path.append(current_dir)
|
| 24 |
+
|
| 25 |
+
from mongo_saver import save_csv_to_mongo
|
| 26 |
+
|
| 27 |
+
def check_mongodb_config():
|
| 28 |
+
"""Check if MongoDB configuration is available."""
|
| 29 |
+
mongo_uri = os.getenv("MONGO_URI")
|
| 30 |
+
if not mongo_uri:
|
| 31 |
+
print("Error: MONGO_URI not found in .env file")
|
| 32 |
+
return False
|
| 33 |
+
|
| 34 |
+
print(f"MongoDB URI configured)")
|
| 35 |
+
return True
|
| 36 |
+
|
| 37 |
+
def find_pending_csv_files(logs_dir):
|
| 38 |
+
"""Find all OBD CSV files that haven't been uploaded yet."""
|
| 39 |
+
fuel_logs_dir = os.path.join(logs_dir, "FuelLogs")
|
| 40 |
+
|
| 41 |
+
if not os.path.exists(fuel_logs_dir):
|
| 42 |
+
print(f"FuelLogs directory not found: {fuel_logs_dir}")
|
| 43 |
+
return []
|
| 44 |
+
|
| 45 |
+
# Find all CSV files matching our naming pattern
|
| 46 |
+
pattern = os.path.join(fuel_logs_dir, "obd_data_log_*.csv")
|
| 47 |
+
csv_files = glob.glob(pattern)
|
| 48 |
+
|
| 49 |
+
# Sort by modification time (newest first)
|
| 50 |
+
csv_files.sort(key=os.path.getmtime, reverse=True)
|
| 51 |
+
|
| 52 |
+
print(f"Found {len(csv_files)} fuel efficiency CSV files to process")
|
| 53 |
+
return csv_files
|
| 54 |
+
|
| 55 |
+
def create_session_id_from_filename(csv_filepath):
|
| 56 |
+
"""Generate a session ID from the CSV filename."""
|
| 57 |
+
filename = os.path.basename(csv_filepath)
|
| 58 |
+
# Convert obd_data_log_20231201_120000.csv -> fuel_efficiency_20231201_120000
|
| 59 |
+
session_id = filename.replace('obd_data_log_', 'fuel_efficiency_').replace('.csv', '')
|
| 60 |
+
return session_id
|
| 61 |
+
|
| 62 |
+
def upload_csv_files_to_mongo(csv_files, max_uploads=None):
|
| 63 |
+
if not csv_files:
|
| 64 |
+
print("No CSV files to upload")
|
| 65 |
+
return
|
| 66 |
+
|
| 67 |
+
# Limit uploads if specified
|
| 68 |
+
if max_uploads:
|
| 69 |
+
csv_files = csv_files[:max_uploads]
|
| 70 |
+
print(f"Limiting upload to {max_uploads} files for this batch")
|
| 71 |
+
|
| 72 |
+
upload_stats = {
|
| 73 |
+
'successful': 0,
|
| 74 |
+
'failed': 0,
|
| 75 |
+
'total': len(csv_files)
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
print(f"Starting bulk upload of {len(csv_files)} fuel efficiency sessions...")
|
| 79 |
+
print("=" * 60)
|
| 80 |
+
|
| 81 |
+
for i, csv_file in enumerate(csv_files, 1):
|
| 82 |
+
try:
|
| 83 |
+
# Generate session ID
|
| 84 |
+
session_id = create_session_id_from_filename(csv_file)
|
| 85 |
+
filename = os.path.basename(csv_file)
|
| 86 |
+
|
| 87 |
+
print(f"[{i}/{len(csv_files)}] Processing: {filename}")
|
| 88 |
+
print(f" Session ID: {session_id}")
|
| 89 |
+
|
| 90 |
+
success = save_csv_to_mongo(csv_file, session_id)
|
| 91 |
+
|
| 92 |
+
if success:
|
| 93 |
+
upload_stats['successful'] += 1
|
| 94 |
+
print(f"Upload successful")
|
| 95 |
+
|
| 96 |
+
move_to_processed_folder(csv_file)
|
| 97 |
+
|
| 98 |
+
else:
|
| 99 |
+
upload_stats['failed'] += 1
|
| 100 |
+
print(f"Upload failed")
|
| 101 |
+
|
| 102 |
+
except Exception as e:
|
| 103 |
+
upload_stats['failed'] += 1
|
| 104 |
+
print(f"Error processing {filename}: {e}")
|
| 105 |
+
|
| 106 |
+
print("-" * 40)
|
| 107 |
+
|
| 108 |
+
# Print summary
|
| 109 |
+
print("=" * 60)
|
| 110 |
+
print("BULK UPLOAD SUMMARY")
|
| 111 |
+
print(f"Successful uploads: {upload_stats['successful']}")
|
| 112 |
+
print(f"Failed uploads: {upload_stats['failed']}")
|
| 113 |
+
print(f"Total processed: {upload_stats['total']}")
|
| 114 |
+
|
| 115 |
+
success_rate = (upload_stats['successful'] / upload_stats['total']) * 100 if upload_stats['total'] > 0 else 0
|
| 116 |
+
print(f"Success rate: {success_rate:.1f}%")
|
| 117 |
+
|
| 118 |
+
def move_to_processed_folder(csv_file):
|
| 119 |
+
"""Move successfully uploaded CSV to a 'processed' folder."""
|
| 120 |
+
try:
|
| 121 |
+
# Create processed folder if it doesn't exist
|
| 122 |
+
processed_dir = os.path.join(os.path.dirname(csv_file), "processed")
|
| 123 |
+
os.makedirs(processed_dir, exist_ok=True)
|
| 124 |
+
|
| 125 |
+
# Move file
|
| 126 |
+
filename = os.path.basename(csv_file)
|
| 127 |
+
new_path = os.path.join(processed_dir, filename)
|
| 128 |
+
os.rename(csv_file, new_path)
|
| 129 |
+
print(f"Moved to processed folder: {filename}")
|
| 130 |
+
|
| 131 |
+
except Exception as e:
|
| 132 |
+
print(f"Could not move file to processed folder: {e}")
|
| 133 |
+
|
| 134 |
+
def main():
|
| 135 |
+
"""Main function to run bulk upload."""
|
| 136 |
+
print("Fuel Efficiency Data - Bulk MongoDB Upload")
|
| 137 |
+
print(f"Started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
| 138 |
+
print("=" * 60)
|
| 139 |
+
|
| 140 |
+
# Check MongoDB configuration first
|
| 141 |
+
if not check_mongodb_config():
|
| 142 |
+
return
|
| 143 |
+
|
| 144 |
+
# Find logs directory (relative to script location)
|
| 145 |
+
logs_dir = os.path.join(current_dir, "..", "logs")
|
| 146 |
+
logs_dir = os.path.abspath(logs_dir)
|
| 147 |
+
|
| 148 |
+
print(f"Searching for CSV files in: {logs_dir}")
|
| 149 |
+
|
| 150 |
+
# Find pending CSV files
|
| 151 |
+
csv_files = find_pending_csv_files(logs_dir)
|
| 152 |
+
|
| 153 |
+
if not csv_files:
|
| 154 |
+
print("No pending CSV files to upload - all caught up!")
|
| 155 |
+
return
|
| 156 |
+
|
| 157 |
+
# Show files to be processed
|
| 158 |
+
print("\nFiles to upload:")
|
| 159 |
+
for i, csv_file in enumerate(csv_files[:10], 1): # Show first 10
|
| 160 |
+
filename = os.path.basename(csv_file)
|
| 161 |
+
mod_time = datetime.fromtimestamp(os.path.getmtime(csv_file))
|
| 162 |
+
print(f" {i}. {filename} (modified: {mod_time.strftime('%Y-%m-%d %H:%M')})")
|
| 163 |
+
|
| 164 |
+
if len(csv_files) > 10:
|
| 165 |
+
print(f" ... and {len(csv_files) - 10} more files")
|
| 166 |
+
|
| 167 |
+
# Confirm upload
|
| 168 |
+
print(f"\nUpload {len(csv_files)} fuel efficiency sessions to MongoDB? (y/n): ", end="")
|
| 169 |
+
response = input().strip().lower()
|
| 170 |
+
|
| 171 |
+
if response not in ['y', 'yes']:
|
| 172 |
+
print("Upload cancelled by user")
|
| 173 |
+
return
|
| 174 |
+
|
| 175 |
+
# Perform bulk upload
|
| 176 |
+
upload_csv_files_to_mongo(csv_files)
|
| 177 |
+
|
| 178 |
+
print(f"\nBulk upload completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
| 179 |
+
|
| 180 |
+
if __name__ == "__main__":
|
| 181 |
+
main()
|
data/mongo_saver.py
CHANGED
|
@@ -53,7 +53,8 @@ class MongoSaver:
|
|
| 53 |
self.mongo_uri,
|
| 54 |
serverSelectionTimeoutMS=5000, # 5 second timeout
|
| 55 |
connectTimeoutMS=10000, # 10 second connection timeout
|
| 56 |
-
socketTimeoutMS=10000
|
|
|
|
| 57 |
)
|
| 58 |
|
| 59 |
# Test connection
|
|
|
|
| 53 |
self.mongo_uri,
|
| 54 |
serverSelectionTimeoutMS=5000, # 5 second timeout
|
| 55 |
connectTimeoutMS=10000, # 10 second connection timeout
|
| 56 |
+
socketTimeoutMS=10000, # 10 second socket timeout
|
| 57 |
+
tlsAllowInvalidCertificates=True # Fix for SSL certificate issues on macOS
|
| 58 |
)
|
| 59 |
|
| 60 |
# Test connection
|
efficiency/eval.py
ADDED
|
@@ -0,0 +1,458 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Fuel Efficiency Model Evaluation Script
|
| 3 |
+
Integration-ready evaluation script for fuel efficiency scoring in the main pipeline
|
| 4 |
+
Based on the original eval.py but reformatted for system integration
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import glob
|
| 9 |
+
import joblib
|
| 10 |
+
import logging
|
| 11 |
+
import numpy as np
|
| 12 |
+
import pandas as pd
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
from typing import List, Dict, Any, Optional, Tuple
|
| 15 |
+
from datetime import datetime
|
| 16 |
+
|
| 17 |
+
logger = logging.getLogger("efficiency-eval")
|
| 18 |
+
logger.setLevel(logging.INFO)
|
| 19 |
+
if not logger.handlers:
|
| 20 |
+
handler = logging.StreamHandler()
|
| 21 |
+
handler.setFormatter(logging.Formatter("[%(levelname)s] %(asctime)s - %(message)s"))
|
| 22 |
+
logger.addHandler(handler)
|
| 23 |
+
|
| 24 |
+
# Constants
|
| 25 |
+
KMH_TO_MS = 1000.0/3600.0
|
| 26 |
+
|
| 27 |
+
class EfficiencyEvaluator:
|
| 28 |
+
"""
|
| 29 |
+
Fuel efficiency evaluator for OBD data using trained model.
|
| 30 |
+
Provides drive-level efficiency scoring for integration into main pipeline.
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
def __init__(self, model_path: Optional[str] = None):
|
| 34 |
+
"""
|
| 35 |
+
Initialize the evaluator.
|
| 36 |
+
|
| 37 |
+
Args:
|
| 38 |
+
model_path: Path to the trained model. If None, will try to load from default location.
|
| 39 |
+
"""
|
| 40 |
+
self.model_path = model_path or self._find_model_path()
|
| 41 |
+
self.model_artifacts = None
|
| 42 |
+
self.metadata = None
|
| 43 |
+
self._load_model()
|
| 44 |
+
|
| 45 |
+
def _find_model_path(self) -> str:
|
| 46 |
+
"""Find the model path from various possible locations"""
|
| 47 |
+
possible_paths = [
|
| 48 |
+
"./efficiency_export/efficiency_model.joblib",
|
| 49 |
+
"/app/models/efficiency/efficiency_model.joblib",
|
| 50 |
+
"./efficiency_model.joblib"
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
for path in possible_paths:
|
| 54 |
+
if os.path.exists(path):
|
| 55 |
+
logger.info(f"📁 Found model at: {path}")
|
| 56 |
+
return path
|
| 57 |
+
|
| 58 |
+
# Try to download from Hugging Face
|
| 59 |
+
logger.warning("⚠️ Model not found locally, attempting download...")
|
| 60 |
+
try:
|
| 61 |
+
from utils.efficiency_download import download_latest_efficiency_models
|
| 62 |
+
success = download_latest_efficiency_models()
|
| 63 |
+
if success:
|
| 64 |
+
return "/app/models/efficiency/efficiency_model.joblib"
|
| 65 |
+
except Exception as e:
|
| 66 |
+
logger.error(f"❌ Failed to download model: {e}")
|
| 67 |
+
|
| 68 |
+
raise FileNotFoundError("Could not find or download efficiency model")
|
| 69 |
+
|
| 70 |
+
def _load_model(self):
|
| 71 |
+
"""Load the efficiency model and metadata"""
|
| 72 |
+
try:
|
| 73 |
+
logger.info(f"📥 Loading efficiency model from: {self.model_path}")
|
| 74 |
+
|
| 75 |
+
# Load model artifacts
|
| 76 |
+
self.model_artifacts = joblib.load(self.model_path)
|
| 77 |
+
|
| 78 |
+
# Load metadata if available
|
| 79 |
+
meta_path = self.model_path.replace("efficiency_model.joblib", "efficiency_meta.json")
|
| 80 |
+
if os.path.exists(meta_path):
|
| 81 |
+
import json
|
| 82 |
+
with open(meta_path, 'r') as f:
|
| 83 |
+
self.metadata = json.load(f)
|
| 84 |
+
|
| 85 |
+
logger.info(f"✅ Model loaded | kind: {self.model_artifacts.get('model_kind', 'unknown')}")
|
| 86 |
+
logger.info(f"📊 Features: {len(self.model_artifacts.get('feature_names', []))}")
|
| 87 |
+
|
| 88 |
+
if self.metadata:
|
| 89 |
+
logger.info(f"📅 Training date: {self.metadata.get('training_date', 'unknown')}")
|
| 90 |
+
logger.info(f"📈 OOF MAE: {self.metadata.get('oof_stats', {}).get('oof_mae_qmap', 'unknown')}")
|
| 91 |
+
|
| 92 |
+
except Exception as e:
|
| 93 |
+
logger.error(f"❌ Error loading model: {e}")
|
| 94 |
+
raise
|
| 95 |
+
|
| 96 |
+
def _ensure_dt(self, s):
|
| 97 |
+
"""Ensure datetime conversion"""
|
| 98 |
+
return pd.to_datetime(s, errors="coerce")
|
| 99 |
+
|
| 100 |
+
def _infer_base_interval_seconds(self, ts, fallback=1.0):
|
| 101 |
+
"""Infer base interval from timestamps"""
|
| 102 |
+
ts = pd.to_datetime(ts, errors="coerce")
|
| 103 |
+
dt = ts.diff().dt.total_seconds().dropna()
|
| 104 |
+
med = float(np.nanmedian(dt)) if len(dt) else fallback
|
| 105 |
+
return fallback if (not np.isfinite(med) or med <= 0) else med
|
| 106 |
+
|
| 107 |
+
def _rows_for(self, seconds, base_sec):
|
| 108 |
+
"""Calculate number of rows for given time window"""
|
| 109 |
+
return max(3, int(round(seconds / max(1e-3, base_sec))))
|
| 110 |
+
|
| 111 |
+
def _add_basic_derivatives(self, d):
|
| 112 |
+
"""Add basic derivatives (acceleration, jerk, distance)"""
|
| 113 |
+
d = d.copy()
|
| 114 |
+
d["timestamp"] = self._ensure_dt(d["timestamp"])
|
| 115 |
+
d = d.dropna(subset=["timestamp"]).sort_values("timestamp")
|
| 116 |
+
base = self._infer_base_interval_seconds(d["timestamp"], 1.0)
|
| 117 |
+
|
| 118 |
+
# Convert numeric columns
|
| 119 |
+
for c in ["SPEED","RPM","MAF","ENGINE_LOAD","THROTTLE_POS"]:
|
| 120 |
+
if c in d.columns:
|
| 121 |
+
d[c] = pd.to_numeric(d[c], errors="coerce")
|
| 122 |
+
|
| 123 |
+
# Convert speed to m/s
|
| 124 |
+
if "SPEED_ms" not in d.columns:
|
| 125 |
+
d["SPEED_ms"] = (d["SPEED"] * KMH_TO_MS) if "SPEED" in d.columns else np.nan
|
| 126 |
+
|
| 127 |
+
# Calculate derivatives
|
| 128 |
+
d["ACCEL"] = d["SPEED_ms"].diff()/max(base,1e-3)
|
| 129 |
+
d["JERK"] = d["ACCEL"].diff()/max(base,1e-3)
|
| 130 |
+
|
| 131 |
+
# Calculate distance
|
| 132 |
+
dt = d["timestamp"].diff().dt.total_seconds().fillna(0).clip(lower=0, upper=10*base)
|
| 133 |
+
d["dist_m"] = d["SPEED_ms"] * dt
|
| 134 |
+
|
| 135 |
+
return d
|
| 136 |
+
|
| 137 |
+
def _idle_rule(self, d, thr):
|
| 138 |
+
"""Apply idle detection rule"""
|
| 139 |
+
speed_low = (d["SPEED_ms"].abs() <= thr.get("SPEED_IDLE_MPS", 0.6))
|
| 140 |
+
thr_low = (d["THROTTLE_POS"] <= thr.get("THR_LOW_Q10", 0.0)) if "THROTTLE_POS" in d else True
|
| 141 |
+
load_low = (d["ENGINE_LOAD"] <= thr.get("LOAD_LOW_Q15", 0.0)) if "ENGINE_LOAD" in d else True
|
| 142 |
+
maf_low = (d["MAF"] <= thr.get("MAF_LOW_Q10", 0.0)) if "MAF" in d else True
|
| 143 |
+
accel_low = (d["ACCEL"].abs() <= thr.get("ACCEL_LOW_Q20", 0.0))
|
| 144 |
+
|
| 145 |
+
mask = (speed_low & thr_low & load_low & maf_low & accel_low).astype(int)
|
| 146 |
+
k = 5
|
| 147 |
+
return (mask.rolling(k, center=True, min_periods=1).median().round().astype(bool)
|
| 148 |
+
if len(mask) >= k else mask.astype(bool))
|
| 149 |
+
|
| 150 |
+
def _sharp_mask_from_thresholds(self, d, thr):
|
| 151 |
+
"""Detect sharp acceleration/deceleration events"""
|
| 152 |
+
thr_a = thr.get("ACCEL_HIGH_Q85",
|
| 153 |
+
np.nanquantile(d["ACCEL"].abs().dropna(), 0.85) if d["ACCEL"].notna().any() else 0.3)
|
| 154 |
+
thr_j = thr.get("JERK_HIGH_Q90",
|
| 155 |
+
np.nanquantile(d["JERK"].abs().dropna(), 0.90) if d["JERK"].notna().any() else 0.5)
|
| 156 |
+
return (d["ACCEL"].abs() > thr_a) | (d["JERK"].abs() > thr_j)
|
| 157 |
+
|
| 158 |
+
def _q(self, s, p):
|
| 159 |
+
"""Quantile helper function"""
|
| 160 |
+
s = pd.to_numeric(s, errors="coerce")
|
| 161 |
+
return float(np.nanquantile(s, p)) if s.notna().any() else 0.0
|
| 162 |
+
|
| 163 |
+
def _agg_for_ml_drive(self, g, thr):
|
| 164 |
+
"""Aggregate drive-level features for ML model"""
|
| 165 |
+
g = self._add_basic_derivatives(g.copy())
|
| 166 |
+
base = self._infer_base_interval_seconds(g["timestamp"], 1.0)
|
| 167 |
+
g["IDLE_RULE"] = self._idle_rule(g, thr)
|
| 168 |
+
|
| 169 |
+
dt = g["timestamp"].diff().dt.total_seconds().fillna(0).clip(lower=0, upper=10*base)
|
| 170 |
+
T = float(dt.sum())
|
| 171 |
+
mins = max(1e-6, T/60)
|
| 172 |
+
|
| 173 |
+
sharp = self._sharp_mask_from_thresholds(g, thr).values
|
| 174 |
+
edges = np.flatnonzero(np.diff(np.r_[False, sharp, False]))
|
| 175 |
+
sharp_freq_pm = (len(edges)//2)/mins
|
| 176 |
+
|
| 177 |
+
rpm90, maf90 = thr.get("RPM90", np.nan), thr.get("MAF90", np.nan)
|
| 178 |
+
frac_rpm90 = float((g["RPM"] >= rpm90).mean()) if ("RPM" in g and np.isfinite(rpm90)) else 0.0
|
| 179 |
+
frac_maf90 = float((g["MAF"] >= maf90).mean()) if ("MAF" in g and np.isfinite(maf90)) else 0.0
|
| 180 |
+
|
| 181 |
+
W10 = self._rows_for(10, base)
|
| 182 |
+
speed_cv = float((g["SPEED_ms"].rolling(W10,1).std()/(g["SPEED_ms"].rolling(W10,1).mean()+1e-6)).mean())
|
| 183 |
+
|
| 184 |
+
return {
|
| 185 |
+
"duration_min": max(1e-6, T/60),
|
| 186 |
+
"distance_km": g["dist_m"].sum()/1000.0,
|
| 187 |
+
"speed_mean": float(g["SPEED_ms"].mean()),
|
| 188 |
+
"speed_q90": self._q(g["SPEED_ms"], 0.90),
|
| 189 |
+
"speed_cv": speed_cv,
|
| 190 |
+
"accel_q90": self._q(g["ACCEL"].abs(), 0.90),
|
| 191 |
+
"jerk_q90": self._q(g["JERK"].abs(), 0.90),
|
| 192 |
+
"sharp_freq_pm": sharp_freq_pm,
|
| 193 |
+
"idle_frac": float(g["IDLE_RULE"].mean()),
|
| 194 |
+
"idle_epm": (len(np.flatnonzero(np.diff(np.r_[False, g['IDLE_RULE'].values, False])))//2)/mins,
|
| 195 |
+
"rpm_q90": self._q(g["RPM"], 0.90) if "RPM" in g else 0.0,
|
| 196 |
+
"maf_q90": self._q(g["MAF"], 0.90) if "MAF" in g else 0.0,
|
| 197 |
+
"load_q85": self._q(g["ENGINE_LOAD"], 0.85) if "ENGINE_LOAD" in g else 0.0,
|
| 198 |
+
"thr_q85": self._q(g["THROTTLE_POS"], 0.85) if "THROTTLE_POS" in g else 0.0,
|
| 199 |
+
"frac_rpm90": frac_rpm90,
|
| 200 |
+
"frac_maf90": frac_maf90,
|
| 201 |
+
"fuel_intensity": (self._q(g["RPM"], 0.90)*self._q(g["MAF"], 0.90)) if (("RPM" in g) and ("MAF" in g)) else 0.0
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
def _align_to_schema(self, feats, art):
|
| 205 |
+
"""Align features to model schema"""
|
| 206 |
+
x = pd.DataFrame([feats])
|
| 207 |
+
for c in art["feature_names"]:
|
| 208 |
+
if c not in x.columns:
|
| 209 |
+
x[c] = 0.0
|
| 210 |
+
x = x[art["feature_names"]]
|
| 211 |
+
if len(art["num_cols"]):
|
| 212 |
+
x.loc[:, art["num_cols"]] = art["scaler"].transform(x[art["num_cols"]])
|
| 213 |
+
return x
|
| 214 |
+
|
| 215 |
+
def _predict_drive(self, df_drive):
|
| 216 |
+
"""Predict efficiency for a single drive"""
|
| 217 |
+
art = self.model_artifacts
|
| 218 |
+
thr = art["thr"]
|
| 219 |
+
|
| 220 |
+
feats = self._agg_for_ml_drive(df_drive, thr)
|
| 221 |
+
x = self._align_to_schema(feats, art)
|
| 222 |
+
|
| 223 |
+
# Get model
|
| 224 |
+
mdl = art["rf"] if art.get("model_kind") == "rf" else art["gbm"]
|
| 225 |
+
raw = float(mdl.predict(x)[0])
|
| 226 |
+
|
| 227 |
+
# Apply quantile-mapping calibration
|
| 228 |
+
if art.get("calib", {}).get("type") == "qmap":
|
| 229 |
+
rq = np.array(art["calib"]["rq"])
|
| 230 |
+
yq = np.array(art["calib"]["yq"])
|
| 231 |
+
|
| 232 |
+
# Ensure strictly increasing rq for stable interpolation
|
| 233 |
+
for i in range(1, len(rq)):
|
| 234 |
+
if rq[i] <= rq[i-1]:
|
| 235 |
+
rq[i] = rq[i-1] + 1e-6
|
| 236 |
+
|
| 237 |
+
pred = float(np.clip(np.interp(raw, rq, yq), 0, 100))
|
| 238 |
+
else:
|
| 239 |
+
pred = float(np.clip(raw, 0, 100))
|
| 240 |
+
|
| 241 |
+
return pred, raw, feats
|
| 242 |
+
|
| 243 |
+
def predict_single_drive(self, df: pd.DataFrame) -> Dict[str, Any]:
|
| 244 |
+
"""
|
| 245 |
+
Predict fuel efficiency for a single drive.
|
| 246 |
+
|
| 247 |
+
Args:
|
| 248 |
+
df: DataFrame with OBD data including timestamp, SPEED, RPM, MAF, etc.
|
| 249 |
+
|
| 250 |
+
Returns:
|
| 251 |
+
Dictionary containing efficiency prediction and metadata
|
| 252 |
+
"""
|
| 253 |
+
try:
|
| 254 |
+
if self.model_artifacts is None:
|
| 255 |
+
raise RuntimeError("Efficiency model not loaded")
|
| 256 |
+
|
| 257 |
+
if len(df) < 5:
|
| 258 |
+
logger.warning("⚠️ Drive too short for efficiency prediction")
|
| 259 |
+
return {
|
| 260 |
+
"efficiency_score": 0.0,
|
| 261 |
+
"raw_score": 0.0,
|
| 262 |
+
"duration_min": 0.0,
|
| 263 |
+
"distance_km": 0.0,
|
| 264 |
+
"note": "too short",
|
| 265 |
+
"features": {}
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
# Calculate basic drive metrics
|
| 269 |
+
g2 = self._add_basic_derivatives(df[["timestamp","SPEED"]].assign(
|
| 270 |
+
RPM=df.get("RPM"), MAF=df.get("MAF"),
|
| 271 |
+
ENGINE_LOAD=df.get("ENGINE_LOAD"), THROTTLE_POS=df.get("THROTTLE_POS")))
|
| 272 |
+
|
| 273 |
+
dt = g2["timestamp"].diff().dt.total_seconds().fillna(0)
|
| 274 |
+
mins = float(dt.sum())/60.0
|
| 275 |
+
dist_km = float(pd.to_numeric(g2["dist_m"], errors="coerce").fillna(0).sum())/1000.0
|
| 276 |
+
|
| 277 |
+
# Predict efficiency
|
| 278 |
+
efficiency_score, raw_score, features = self._predict_drive(df)
|
| 279 |
+
|
| 280 |
+
logger.info(f"📊 Drive efficiency: {efficiency_score:.1f}% (raw: {raw_score:.3f})")
|
| 281 |
+
|
| 282 |
+
return {
|
| 283 |
+
"efficiency_score": round(efficiency_score, 1),
|
| 284 |
+
"raw_score": round(raw_score, 3),
|
| 285 |
+
"duration_min": round(mins, 2),
|
| 286 |
+
"distance_km": round(dist_km, 3),
|
| 287 |
+
"features": features,
|
| 288 |
+
"timestamp": datetime.now().isoformat()
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
except Exception as e:
|
| 292 |
+
logger.error(f"❌ Error predicting efficiency: {e}")
|
| 293 |
+
return {
|
| 294 |
+
"efficiency_score": 0.0,
|
| 295 |
+
"raw_score": 0.0,
|
| 296 |
+
"duration_min": 0.0,
|
| 297 |
+
"distance_km": 0.0,
|
| 298 |
+
"error": str(e),
|
| 299 |
+
"timestamp": datetime.now().isoformat()
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
def predict_batch(self, csv_files: List[str]) -> pd.DataFrame:
|
| 303 |
+
"""
|
| 304 |
+
Predict efficiency for multiple CSV files (batch processing).
|
| 305 |
+
|
| 306 |
+
Args:
|
| 307 |
+
csv_files: List of CSV file paths
|
| 308 |
+
|
| 309 |
+
Returns:
|
| 310 |
+
DataFrame with predictions for each file
|
| 311 |
+
"""
|
| 312 |
+
logger.info(f"📊 Processing {len(csv_files)} CSV files...")
|
| 313 |
+
|
| 314 |
+
rows = []
|
| 315 |
+
for i, csv_path in enumerate(csv_files, start=1):
|
| 316 |
+
try:
|
| 317 |
+
# Load CSV
|
| 318 |
+
df = pd.read_csv(csv_path)
|
| 319 |
+
df["source_file"] = os.path.basename(csv_path)
|
| 320 |
+
df["drive_id"] = i
|
| 321 |
+
df["timestamp"] = self._ensure_dt(df["timestamp"])
|
| 322 |
+
df = df.dropna(subset=["timestamp"]).sort_values("timestamp")
|
| 323 |
+
|
| 324 |
+
if len(df) < 5:
|
| 325 |
+
rows.append({
|
| 326 |
+
"source_file": os.path.basename(csv_path),
|
| 327 |
+
"drive_id": i,
|
| 328 |
+
"duration_min": np.nan,
|
| 329 |
+
"distance_km": np.nan,
|
| 330 |
+
"pred_efficiency_ml": np.nan,
|
| 331 |
+
"raw": np.nan,
|
| 332 |
+
"note": "too short"
|
| 333 |
+
})
|
| 334 |
+
continue
|
| 335 |
+
|
| 336 |
+
# Predict efficiency
|
| 337 |
+
result = self.predict_single_drive(df)
|
| 338 |
+
|
| 339 |
+
rows.append({
|
| 340 |
+
"source_file": os.path.basename(csv_path),
|
| 341 |
+
"drive_id": i,
|
| 342 |
+
"duration_min": result["duration_min"],
|
| 343 |
+
"distance_km": result["distance_km"],
|
| 344 |
+
"pred_efficiency_ml": result["efficiency_score"],
|
| 345 |
+
"raw": result["raw_score"]
|
| 346 |
+
})
|
| 347 |
+
|
| 348 |
+
except Exception as e:
|
| 349 |
+
logger.error(f"❌ Error processing {csv_path}: {e}")
|
| 350 |
+
rows.append({
|
| 351 |
+
"source_file": os.path.basename(csv_path),
|
| 352 |
+
"drive_id": i,
|
| 353 |
+
"duration_min": np.nan,
|
| 354 |
+
"distance_km": np.nan,
|
| 355 |
+
"pred_efficiency_ml": np.nan,
|
| 356 |
+
"raw": np.nan,
|
| 357 |
+
"error": str(e)
|
| 358 |
+
})
|
| 359 |
+
|
| 360 |
+
pred_df = pd.DataFrame(rows).sort_values("drive_id").reset_index(drop=True)
|
| 361 |
+
|
| 362 |
+
# Calculate statistics
|
| 363 |
+
valid_preds = pred_df["pred_efficiency_ml"].dropna()
|
| 364 |
+
if len(valid_preds) > 0:
|
| 365 |
+
logger.info(f"📊 Batch results: {len(valid_preds)} valid predictions")
|
| 366 |
+
logger.info(f"📈 Efficiency range: {valid_preds.min():.1f}% - {valid_preds.max():.1f}%")
|
| 367 |
+
logger.info(f"📊 Mean efficiency: {valid_preds.mean():.1f}%")
|
| 368 |
+
logger.info(f"📊 Std efficiency: {valid_preds.std():.1f}%")
|
| 369 |
+
|
| 370 |
+
return pred_df
|
| 371 |
+
|
| 372 |
+
def get_model_info(self) -> Dict[str, Any]:
|
| 373 |
+
"""Get information about the loaded model"""
|
| 374 |
+
if self.model_artifacts is None:
|
| 375 |
+
return {"error": "Model not loaded"}
|
| 376 |
+
|
| 377 |
+
return {
|
| 378 |
+
"model_kind": self.model_artifacts.get("model_kind", "unknown"),
|
| 379 |
+
"feature_count": len(self.model_artifacts.get("feature_names", [])),
|
| 380 |
+
"features": self.model_artifacts.get("feature_names", []),
|
| 381 |
+
"calibration_type": self.model_artifacts.get("calib", {}).get("type", "none"),
|
| 382 |
+
"oof_stats": self.model_artifacts.get("oof_stats", {}),
|
| 383 |
+
"metadata": self.metadata,
|
| 384 |
+
"model_path": self.model_path
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
def evaluate_csv_files(csv_directory: str = "./") -> pd.DataFrame:
|
| 388 |
+
"""
|
| 389 |
+
Convenience function to evaluate all CSV files in a directory.
|
| 390 |
+
|
| 391 |
+
Args:
|
| 392 |
+
csv_directory: Directory containing CSV files
|
| 393 |
+
|
| 394 |
+
Returns:
|
| 395 |
+
DataFrame with efficiency predictions
|
| 396 |
+
"""
|
| 397 |
+
# Find CSV files
|
| 398 |
+
csv_patterns = [
|
| 399 |
+
os.path.join(csv_directory, "*.csv"),
|
| 400 |
+
os.path.join("/content", "*.csv") # For Colab compatibility
|
| 401 |
+
]
|
| 402 |
+
|
| 403 |
+
csv_files = []
|
| 404 |
+
for pattern in csv_patterns:
|
| 405 |
+
csv_files.extend(glob.glob(pattern))
|
| 406 |
+
|
| 407 |
+
csv_files = sorted([p for p in csv_files if os.path.isfile(p)])
|
| 408 |
+
|
| 409 |
+
if not csv_files:
|
| 410 |
+
logger.warning("⚠️ No CSV files found")
|
| 411 |
+
return pd.DataFrame()
|
| 412 |
+
|
| 413 |
+
# Initialize evaluator and process files
|
| 414 |
+
evaluator = EfficiencyEvaluator()
|
| 415 |
+
return evaluator.predict_batch(csv_files)
|
| 416 |
+
|
| 417 |
+
def main():
|
| 418 |
+
"""Main function for command-line usage"""
|
| 419 |
+
import argparse
|
| 420 |
+
|
| 421 |
+
parser = argparse.ArgumentParser(description="Evaluate fuel efficiency model")
|
| 422 |
+
parser.add_argument("--csv-dir", default="./", help="Directory containing CSV files")
|
| 423 |
+
parser.add_argument("--model-path", help="Path to efficiency model file")
|
| 424 |
+
parser.add_argument("--output", help="Output CSV file path")
|
| 425 |
+
|
| 426 |
+
args = parser.parse_args()
|
| 427 |
+
|
| 428 |
+
try:
|
| 429 |
+
# Initialize evaluator
|
| 430 |
+
evaluator = EfficiencyEvaluator(model_path=args.model_path)
|
| 431 |
+
|
| 432 |
+
# Print model info
|
| 433 |
+
info = evaluator.get_model_info()
|
| 434 |
+
print(f"📊 Model info: {info}")
|
| 435 |
+
|
| 436 |
+
# Evaluate CSV files
|
| 437 |
+
results_df = evaluate_csv_files(args.csv_dir)
|
| 438 |
+
|
| 439 |
+
if len(results_df) > 0:
|
| 440 |
+
print("\n=== Batch Efficiency Scores (per CSV / drive) ===")
|
| 441 |
+
print(results_df.to_string(index=False))
|
| 442 |
+
|
| 443 |
+
# Save results if output path specified
|
| 444 |
+
if args.output:
|
| 445 |
+
results_df.to_csv(args.output, index=False)
|
| 446 |
+
print(f"\n💾 Results saved to: {args.output}")
|
| 447 |
+
else:
|
| 448 |
+
print("❌ No valid CSV files found for evaluation")
|
| 449 |
+
return 1
|
| 450 |
+
|
| 451 |
+
return 0
|
| 452 |
+
|
| 453 |
+
except Exception as e:
|
| 454 |
+
print(f"❌ Evaluation failed: {e}")
|
| 455 |
+
return 1
|
| 456 |
+
|
| 457 |
+
if __name__ == "__main__":
|
| 458 |
+
exit(main())
|
efficiency/retrain.py
ADDED
|
@@ -0,0 +1,698 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Fuel Efficiency Model Retraining Script
|
| 3 |
+
Reproducible training script for fuel efficiency model with Hugging Face integration
|
| 4 |
+
Based on the original retrain.py but reformatted for system integration
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import glob
|
| 9 |
+
import json
|
| 10 |
+
import math
|
| 11 |
+
import joblib
|
| 12 |
+
import warnings
|
| 13 |
+
import logging
|
| 14 |
+
import numpy as np
|
| 15 |
+
import pandas as pd
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
from typing import Dict, List, Tuple, Optional, Any
|
| 18 |
+
from datetime import datetime
|
| 19 |
+
|
| 20 |
+
# ML imports
|
| 21 |
+
from sklearn.preprocessing import StandardScaler
|
| 22 |
+
from sklearn.ensemble import HistGradientBoostingRegressor, RandomForestRegressor
|
| 23 |
+
from sklearn.model_selection import GroupKFold
|
| 24 |
+
from sklearn.metrics import mean_absolute_error
|
| 25 |
+
from sklearn.linear_model import Ridge
|
| 26 |
+
|
| 27 |
+
# Hugging Face integration
|
| 28 |
+
from huggingface_hub import HfApi, Repository
|
| 29 |
+
|
| 30 |
+
# Suppress warnings
|
| 31 |
+
warnings.filterwarnings("ignore", category=UserWarning)
|
| 32 |
+
|
| 33 |
+
# Setup logging
|
| 34 |
+
logger = logging.getLogger("efficiency-retrain")
|
| 35 |
+
logger.setLevel(logging.INFO)
|
| 36 |
+
if not logger.handlers:
|
| 37 |
+
handler = logging.StreamHandler()
|
| 38 |
+
handler.setFormatter(logging.Formatter("[%(levelname)s] %(asctime)s - %(message)s"))
|
| 39 |
+
logger.addHandler(handler)
|
| 40 |
+
|
| 41 |
+
# Constants
|
| 42 |
+
SEED = 42
|
| 43 |
+
KMH_TO_MS = 1000.0/3600.0
|
| 44 |
+
np.random.seed(SEED)
|
| 45 |
+
|
| 46 |
+
class EfficiencyModelTrainer:
|
| 47 |
+
"""
|
| 48 |
+
Fuel efficiency model trainer with Hugging Face integration.
|
| 49 |
+
Handles data loading, feature engineering, model training, and model upload.
|
| 50 |
+
"""
|
| 51 |
+
|
| 52 |
+
def __init__(self,
|
| 53 |
+
csv_directory: str = "./",
|
| 54 |
+
export_directory: str = "./efficiency_export",
|
| 55 |
+
repo_id: str = "BinKhoaLe1812/Fuel_Efficiency_OBD"):
|
| 56 |
+
"""
|
| 57 |
+
Initialize the trainer.
|
| 58 |
+
|
| 59 |
+
Args:
|
| 60 |
+
csv_directory: Directory containing CSV files for training
|
| 61 |
+
export_directory: Directory to save trained model artifacts
|
| 62 |
+
repo_id: Hugging Face repository ID for model upload
|
| 63 |
+
"""
|
| 64 |
+
self.csv_directory = csv_directory
|
| 65 |
+
self.export_directory = Path(export_directory)
|
| 66 |
+
self.repo_id = repo_id
|
| 67 |
+
self.hf_token = os.getenv("HF_TOKEN")
|
| 68 |
+
|
| 69 |
+
# Create export directory
|
| 70 |
+
self.export_directory.mkdir(parents=True, exist_ok=True)
|
| 71 |
+
|
| 72 |
+
# Initialize HF API if token available
|
| 73 |
+
self.hf_api = None
|
| 74 |
+
if self.hf_token:
|
| 75 |
+
self.hf_api = HfApi(token=self.hf_token)
|
| 76 |
+
logger.info(f"✅ Hugging Face API initialized for {repo_id}")
|
| 77 |
+
else:
|
| 78 |
+
logger.warning("⚠️ HF_TOKEN not set - model will not be uploaded to Hugging Face")
|
| 79 |
+
|
| 80 |
+
def load_training_data(self) -> pd.DataFrame:
|
| 81 |
+
"""Load and preprocess training data from CSV files"""
|
| 82 |
+
logger.info("📊 Loading training data...")
|
| 83 |
+
|
| 84 |
+
# Find CSV files
|
| 85 |
+
csv_patterns = [
|
| 86 |
+
os.path.join(self.csv_directory, "*.csv"),
|
| 87 |
+
os.path.join("/content", "*.csv") # For Colab compatibility
|
| 88 |
+
]
|
| 89 |
+
|
| 90 |
+
csvs = []
|
| 91 |
+
for pattern in csv_patterns:
|
| 92 |
+
csvs.extend(glob.glob(pattern))
|
| 93 |
+
|
| 94 |
+
csvs = sorted([p for p in csvs if os.path.isfile(p)])
|
| 95 |
+
|
| 96 |
+
if not csvs:
|
| 97 |
+
raise RuntimeError("No CSV logs found for training")
|
| 98 |
+
|
| 99 |
+
logger.info(f"📁 Found {len(csvs)} CSV files")
|
| 100 |
+
|
| 101 |
+
# Load and combine CSV files
|
| 102 |
+
frames = []
|
| 103 |
+
for i, p in enumerate(csvs, start=1):
|
| 104 |
+
try:
|
| 105 |
+
d = pd.read_csv(p)
|
| 106 |
+
d["source_file"] = os.path.basename(p)
|
| 107 |
+
d["drive_id"] = i
|
| 108 |
+
frames.append(d)
|
| 109 |
+
logger.info(f"✅ Loaded {os.path.basename(p)} ({len(d)} rows)")
|
| 110 |
+
except Exception as e:
|
| 111 |
+
logger.warning(f"⚠️ Failed to load {p}: {e}")
|
| 112 |
+
|
| 113 |
+
if not frames:
|
| 114 |
+
raise RuntimeError("No valid CSV files could be loaded")
|
| 115 |
+
|
| 116 |
+
# Combine all data
|
| 117 |
+
df = pd.concat(frames, ignore_index=True)
|
| 118 |
+
df["timestamp"] = self._ensure_dt(df["timestamp"])
|
| 119 |
+
df = df.dropna(subset=["timestamp"]).sort_values(["drive_id", "timestamp"]).reset_index(drop=True)
|
| 120 |
+
df = self._add_basic_derivatives(df)
|
| 121 |
+
|
| 122 |
+
logger.info(f"📊 Combined dataset: {len(df)} rows, {df['drive_id'].nunique()} drives")
|
| 123 |
+
return df
|
| 124 |
+
|
| 125 |
+
def _ensure_dt(self, s):
|
| 126 |
+
"""Ensure datetime conversion"""
|
| 127 |
+
return pd.to_datetime(s, errors="coerce")
|
| 128 |
+
|
| 129 |
+
def _infer_base_interval_seconds(self, ts, fallback=1.0):
|
| 130 |
+
"""Infer base interval from timestamps"""
|
| 131 |
+
ts = pd.to_datetime(ts, errors="coerce")
|
| 132 |
+
dt = ts.diff().dt.total_seconds().dropna()
|
| 133 |
+
med = float(np.nanmedian(dt)) if len(dt) else fallback
|
| 134 |
+
return fallback if (not np.isfinite(med) or med <= 0) else med
|
| 135 |
+
|
| 136 |
+
def _rows_for(self, seconds, base_sec):
|
| 137 |
+
"""Calculate number of rows for given time window"""
|
| 138 |
+
return max(3, int(round(seconds / max(1e-3, base_sec))))
|
| 139 |
+
|
| 140 |
+
def _add_basic_derivatives(self, d):
|
| 141 |
+
"""Add basic derivatives (acceleration, jerk, distance)"""
|
| 142 |
+
d = d.copy()
|
| 143 |
+
d["timestamp"] = self._ensure_dt(d["timestamp"])
|
| 144 |
+
d = d.dropna(subset=["timestamp"]).sort_values("timestamp")
|
| 145 |
+
base = self._infer_base_interval_seconds(d["timestamp"], 1.0)
|
| 146 |
+
|
| 147 |
+
# Convert numeric columns
|
| 148 |
+
for c in ["SPEED","RPM","MAF","ENGINE_LOAD","THROTTLE_POS"]:
|
| 149 |
+
if c in d.columns:
|
| 150 |
+
d[c] = pd.to_numeric(d[c], errors="coerce")
|
| 151 |
+
|
| 152 |
+
# Convert speed to m/s
|
| 153 |
+
if "SPEED_ms" not in d.columns:
|
| 154 |
+
d["SPEED_ms"] = (d["SPEED"] * KMH_TO_MS) if "SPEED" in d.columns else np.nan
|
| 155 |
+
|
| 156 |
+
# Calculate derivatives
|
| 157 |
+
d["ACCEL"] = d["SPEED_ms"].diff()/max(base,1e-3)
|
| 158 |
+
d["JERK"] = d["ACCEL"].diff()/max(base,1e-3)
|
| 159 |
+
|
| 160 |
+
# Calculate distance
|
| 161 |
+
dt = d["timestamp"].diff().dt.total_seconds().fillna(0).clip(lower=0, upper=10*base)
|
| 162 |
+
d["dist_m"] = d["SPEED_ms"] * dt
|
| 163 |
+
|
| 164 |
+
return d
|
| 165 |
+
|
| 166 |
+
def _idle_rule(self, d, thr):
|
| 167 |
+
"""Apply idle detection rule"""
|
| 168 |
+
speed_low = (d["SPEED_ms"].abs() <= thr.get("SPEED_IDLE_MPS", 0.6))
|
| 169 |
+
thr_low = (d["THROTTLE_POS"] <= thr.get("THR_LOW_Q10", 0.0)) if "THROTTLE_POS" in d else True
|
| 170 |
+
load_low = (d["ENGINE_LOAD"] <= thr.get("LOAD_LOW_Q15", 0.0)) if "ENGINE_LOAD" in d else True
|
| 171 |
+
maf_low = (d["MAF"] <= thr.get("MAF_LOW_Q10", 0.0)) if "MAF" in d else True
|
| 172 |
+
accel_low = (d["ACCEL"].abs() <= thr.get("ACCEL_LOW_Q20", 0.0))
|
| 173 |
+
|
| 174 |
+
mask = (speed_low & thr_low & load_low & maf_low & accel_low).astype(int)
|
| 175 |
+
k = 5
|
| 176 |
+
return (mask.rolling(k, center=True, min_periods=1).median().round().astype(bool)
|
| 177 |
+
if len(mask) >= k else mask.astype(bool))
|
| 178 |
+
|
| 179 |
+
def _sharp_mask_from_thresholds(self, d, thr):
|
| 180 |
+
"""Detect sharp acceleration/deceleration events"""
|
| 181 |
+
thr_a = thr.get("ACCEL_HIGH_Q85",
|
| 182 |
+
np.nanquantile(d["ACCEL"].abs().dropna(), 0.85) if d["ACCEL"].notna().any() else 0.3)
|
| 183 |
+
thr_j = thr.get("JERK_HIGH_Q90",
|
| 184 |
+
np.nanquantile(d["JERK"].abs().dropna(), 0.90) if d["JERK"].notna().any() else 0.5)
|
| 185 |
+
return (d["ACCEL"].abs() > thr_a) | (d["JERK"].abs() > thr_j)
|
| 186 |
+
|
| 187 |
+
def _run_lengths(self, mask):
|
| 188 |
+
"""Calculate run lengths from boolean mask"""
|
| 189 |
+
m = np.asarray(mask, dtype=bool)
|
| 190 |
+
if m.size == 0:
|
| 191 |
+
return np.array([], int), np.array([], int)
|
| 192 |
+
dm = np.diff(np.r_[False, m, False].astype(int))
|
| 193 |
+
starts = np.where(dm == 1)[0]
|
| 194 |
+
ends = np.where(dm == -1)[0]
|
| 195 |
+
return starts, (ends - starts)
|
| 196 |
+
|
| 197 |
+
def _penalty(self, series):
|
| 198 |
+
"""Calculate penalty function for efficiency scoring"""
|
| 199 |
+
arr = pd.to_numeric(series, errors="coerce").fillna(0).values
|
| 200 |
+
if arr.size == 0:
|
| 201 |
+
return pd.Series([], dtype=float, index=series.index)
|
| 202 |
+
q25, q50, q75 = np.quantile(arr, [0.25, 0.50, 0.75])
|
| 203 |
+
s = (q75-q25)/1.349 if (q75 > q25) else (np.std(arr) if np.std(arr) > 0 else 1.0)
|
| 204 |
+
return pd.Series(1/(1+np.exp(-(arr - q50)/max(1e-6, s))), index=series.index)
|
| 205 |
+
|
| 206 |
+
def compute_fleet_thresholds(self, df: pd.DataFrame) -> Dict[str, float]:
|
| 207 |
+
"""Compute fleet-wide thresholds for feature engineering"""
|
| 208 |
+
logger.info("🔧 Computing fleet thresholds...")
|
| 209 |
+
|
| 210 |
+
thr = {}
|
| 211 |
+
|
| 212 |
+
# RPM threshold
|
| 213 |
+
if "RPM" in df and df["RPM"].notna().any():
|
| 214 |
+
thr["RPM90"] = float(np.nanquantile(df["RPM"], 0.90))
|
| 215 |
+
|
| 216 |
+
# MAF threshold
|
| 217 |
+
if "MAF" in df and df["MAF"].notna().any():
|
| 218 |
+
thr["MAF90"] = float(np.nanquantile(df["MAF"], 0.90))
|
| 219 |
+
|
| 220 |
+
# Throttle position thresholds
|
| 221 |
+
if "THROTTLE_POS" in df and df["THROTTLE_POS"].notna().any():
|
| 222 |
+
thr["THR_LOW_Q10"] = float(np.nanquantile(df["THROTTLE_POS"], 0.10))
|
| 223 |
+
thr["THR_Q85"] = float(np.nanquantile(df["THROTTLE_POS"], 0.85))
|
| 224 |
+
|
| 225 |
+
# Engine load thresholds
|
| 226 |
+
if "ENGINE_LOAD" in df and df["ENGINE_LOAD"].notna().any():
|
| 227 |
+
thr["LOAD_LOW_Q15"] = float(np.nanquantile(df["ENGINE_LOAD"], 0.15))
|
| 228 |
+
thr["LOAD_Q85"] = float(np.nanquantile(df["ENGINE_LOAD"], 0.85))
|
| 229 |
+
|
| 230 |
+
# Acceleration and jerk thresholds
|
| 231 |
+
tmpd = self._add_basic_derivatives(df[["timestamp","SPEED"]].assign(
|
| 232 |
+
RPM=df.get("RPM"), MAF=df.get("MAF"),
|
| 233 |
+
THROTTLE_POS=df.get("THROTTLE_POS"), ENGINE_LOAD=df.get("ENGINE_LOAD")))
|
| 234 |
+
|
| 235 |
+
thr["ACCEL_LOW_Q20"] = float(np.nanquantile(tmpd["ACCEL"].abs().dropna(), 0.20)) if tmpd["ACCEL"].notna().any() else 0.05
|
| 236 |
+
thr["ACCEL_HIGH_Q85"] = float(np.nanquantile(tmpd["ACCEL"].abs().dropna(), 0.85)) if tmpd["ACCEL"].notna().any() else 0.3
|
| 237 |
+
thr["JERK_HIGH_Q90"] = float(np.nanquantile(tmpd["JERK"].abs().dropna(), 0.90)) if tmpd["JERK"].notna().any() else 0.5
|
| 238 |
+
thr["SPEED_IDLE_MPS"] = 0.6
|
| 239 |
+
|
| 240 |
+
logger.info(f"✅ Computed {len(thr)} fleet thresholds")
|
| 241 |
+
return thr
|
| 242 |
+
|
| 243 |
+
def create_algorithmic_teacher(self, df: pd.DataFrame, thr: Dict[str, float]) -> pd.DataFrame:
|
| 244 |
+
"""Create algorithmic teacher labels for training"""
|
| 245 |
+
logger.info("🎯 Creating algorithmic teacher labels...")
|
| 246 |
+
|
| 247 |
+
# Apply idle rule to all drives
|
| 248 |
+
df["IDLE_RULE"] = False
|
| 249 |
+
for gid, g in df.groupby("drive_id", sort=True):
|
| 250 |
+
df.loc[g.index, "IDLE_RULE"] = self._idle_rule(g, thr)
|
| 251 |
+
|
| 252 |
+
# Extract thresholds
|
| 253 |
+
thr_accel, thr_jerk = thr["ACCEL_HIGH_Q85"], thr["JERK_HIGH_Q90"]
|
| 254 |
+
thr_rpm90, thr_maf90 = thr.get("RPM90", np.nan), thr.get("MAF90", np.nan)
|
| 255 |
+
|
| 256 |
+
# Process each drive
|
| 257 |
+
drv = []
|
| 258 |
+
for gid, g in df.groupby("drive_id", sort=True):
|
| 259 |
+
if len(g) < 5:
|
| 260 |
+
continue
|
| 261 |
+
|
| 262 |
+
base = self._infer_base_interval_seconds(g["timestamp"], 1.0)
|
| 263 |
+
dt_s = g["timestamp"].diff().dt.total_seconds().fillna(0).clip(lower=0, upper=10*base)
|
| 264 |
+
T = float(dt_s.sum())
|
| 265 |
+
mins = max(1e-6, T/60)
|
| 266 |
+
|
| 267 |
+
# Sharp acceleration analysis
|
| 268 |
+
sharp = self._sharp_mask_from_thresholds(g, thr).values
|
| 269 |
+
st, ln = self._run_lengths(sharp)
|
| 270 |
+
freq_pm = len(ln)/mins
|
| 271 |
+
dur_frac = (ln.sum()*base)/max(1e-6, T)
|
| 272 |
+
|
| 273 |
+
# Peak analysis
|
| 274 |
+
peaks = []
|
| 275 |
+
for a, b in zip(st, ln):
|
| 276 |
+
seg = g.iloc[a:a+b]
|
| 277 |
+
pa = float(np.nanmax(np.abs(seg["ACCEL"])))
|
| 278 |
+
pj = float(np.nanmax(np.abs(seg["JERK"])))
|
| 279 |
+
over_a = max(0.0, (pa-thr_accel)/max(1e-6, thr_accel))
|
| 280 |
+
over_j = max(0.0, (pj-thr_jerk)/max(1e-6, thr_jerk))
|
| 281 |
+
peaks.append(min(1.5, 0.7*over_a + 0.3*over_j))
|
| 282 |
+
|
| 283 |
+
sharp_mag = float(np.mean(peaks)) if peaks else 0.0
|
| 284 |
+
|
| 285 |
+
# Idle analysis
|
| 286 |
+
idle_frac = float(g["IDLE_RULE"].mean())
|
| 287 |
+
sti, lni = self._run_lengths(g["IDLE_RULE"].values)
|
| 288 |
+
idle_med_s = float(np.median(lni)*base if len(lni) else 0.0)
|
| 289 |
+
idle_epm = len(lni)/mins
|
| 290 |
+
|
| 291 |
+
# Speed variability
|
| 292 |
+
W10 = self._rows_for(10, base)
|
| 293 |
+
speed_cv = float((g["SPEED_ms"].rolling(W10,1).std()/(g["SPEED_ms"].rolling(W10,1).mean()+1e-6)).mean())
|
| 294 |
+
|
| 295 |
+
# High-load fractions
|
| 296 |
+
frac_rpm90 = float((g["RPM"] >= thr_rpm90).mean()) if ("RPM" in g and np.isfinite(thr_rpm90)) else 0.0
|
| 297 |
+
frac_maf90 = float((g["MAF"] >= thr_maf90).mean()) if ("MAF" in g and np.isfinite(thr_maf90)) else 0.0
|
| 298 |
+
frac_load85 = float((g["ENGINE_LOAD"] >= thr.get("LOAD_Q85", np.inf)).mean()) if "ENGINE_LOAD" in g else 0.0
|
| 299 |
+
frac_thr85 = float((g["THROTTLE_POS"] >= thr.get("THR_Q85", np.inf)).mean()) if "THROTTLE_POS" in g else 0.0
|
| 300 |
+
|
| 301 |
+
# Efficiency proxy
|
| 302 |
+
proxy = (0.80*frac_rpm90 + 0.60*frac_maf90 + 0.15*frac_load85 + 0.10*frac_thr85 + 0.10*idle_frac)
|
| 303 |
+
|
| 304 |
+
drv.append(dict(
|
| 305 |
+
drive_id=gid, duration_min=mins, distance_km=g["dist_m"].sum()/1000.0,
|
| 306 |
+
freq_pm=freq_pm, dur_frac=dur_frac, sharp_mag=sharp_mag,
|
| 307 |
+
idle_frac=idle_frac, idle_med_s=idle_med_s, idle_epm=idle_epm,
|
| 308 |
+
speed_cv=speed_cv, frac_rpm90=frac_rpm90, frac_maf90=frac_maf90, proxy=proxy
|
| 309 |
+
))
|
| 310 |
+
|
| 311 |
+
dfeat = pd.DataFrame(drv).set_index("drive_id")
|
| 312 |
+
|
| 313 |
+
# Calculate penalty-based features
|
| 314 |
+
P = pd.DataFrame({
|
| 315 |
+
"p_freq": self._penalty(dfeat["freq_pm"]),
|
| 316 |
+
"p_dur": self._penalty(dfeat["dur_frac"]),
|
| 317 |
+
"p_mag": self._penalty(dfeat["sharp_mag"]),
|
| 318 |
+
"p_idle": 0.7*self._penalty(dfeat["idle_frac"]) + 0.3*self._penalty(dfeat["idle_med_s"]),
|
| 319 |
+
"p_cv": self._penalty(dfeat["speed_cv"]),
|
| 320 |
+
"p_rpm": self._penalty(dfeat["frac_rpm90"]),
|
| 321 |
+
"p_maf": self._penalty(dfeat["frac_maf90"]),
|
| 322 |
+
}, index=dfeat.index)
|
| 323 |
+
|
| 324 |
+
# Calculate efficiency scores
|
| 325 |
+
proxy = dfeat["proxy"].clip(0, 1-1e-6)
|
| 326 |
+
target_lin = -np.log(1 - proxy)
|
| 327 |
+
w = np.linalg.lstsq(P.values, target_lin.values, rcond=None)[0]
|
| 328 |
+
dfeat["ineff_model"] = 1 - np.exp(-P.values @ w)
|
| 329 |
+
dfeat["efficiency_algo"] = 100*(1 - dfeat["ineff_model"])
|
| 330 |
+
|
| 331 |
+
logger.info(f"✅ Teacher range: {dfeat['efficiency_algo'].min():.1f} → {dfeat['efficiency_algo'].max():.1f}")
|
| 332 |
+
return dfeat
|
| 333 |
+
|
| 334 |
+
def _q(self, s, p):
|
| 335 |
+
"""Quantile helper function"""
|
| 336 |
+
s = pd.to_numeric(s, errors="coerce")
|
| 337 |
+
return float(np.nanquantile(s, p)) if s.notna().any() else 0.0
|
| 338 |
+
|
| 339 |
+
def _agg_for_ml_drive(self, g, thr):
|
| 340 |
+
"""Aggregate drive-level features for ML model"""
|
| 341 |
+
g = self._add_basic_derivatives(g.copy())
|
| 342 |
+
base = self._infer_base_interval_seconds(g["timestamp"], 1.0)
|
| 343 |
+
g["IDLE_RULE"] = self._idle_rule(g, thr)
|
| 344 |
+
|
| 345 |
+
dt = g["timestamp"].diff().dt.total_seconds().fillna(0).clip(lower=0, upper=10*base)
|
| 346 |
+
T = float(dt.sum())
|
| 347 |
+
mins = max(1e-6, T/60)
|
| 348 |
+
|
| 349 |
+
sharp = self._sharp_mask_from_thresholds(g, thr).values
|
| 350 |
+
edges = np.flatnonzero(np.diff(np.r_[False, sharp, False]))
|
| 351 |
+
sharp_freq_pm = (len(edges)//2)/mins
|
| 352 |
+
|
| 353 |
+
rpm90, maf90 = thr.get("RPM90", np.nan), thr.get("MAF90", np.nan)
|
| 354 |
+
frac_rpm90 = float((g["RPM"] >= rpm90).mean()) if ("RPM" in g and np.isfinite(rpm90)) else 0.0
|
| 355 |
+
frac_maf90 = float((g["MAF"] >= maf90).mean()) if ("MAF" in g and np.isfinite(maf90)) else 0.0
|
| 356 |
+
|
| 357 |
+
W10 = self._rows_for(10, base)
|
| 358 |
+
speed_cv = float((g["SPEED_ms"].rolling(W10,1).std()/(g["SPEED_ms"].rolling(W10,1).mean()+1e-6)).mean())
|
| 359 |
+
|
| 360 |
+
return {
|
| 361 |
+
"duration_min": max(1e-6, T/60),
|
| 362 |
+
"distance_km": g["dist_m"].sum()/1000.0,
|
| 363 |
+
"speed_mean": float(g["SPEED_ms"].mean()),
|
| 364 |
+
"speed_q90": self._q(g["SPEED_ms"], 0.90),
|
| 365 |
+
"speed_cv": speed_cv,
|
| 366 |
+
"accel_q90": self._q(g["ACCEL"].abs(), 0.90),
|
| 367 |
+
"jerk_q90": self._q(g["JERK"].abs(), 0.90),
|
| 368 |
+
"sharp_freq_pm": sharp_freq_pm,
|
| 369 |
+
"idle_frac": float(g["IDLE_RULE"].mean()),
|
| 370 |
+
"idle_epm": (len(np.flatnonzero(np.diff(np.r_[False, g['IDLE_RULE'].values, False])))//2)/mins,
|
| 371 |
+
"rpm_q90": self._q(g["RPM"], 0.90) if "RPM" in g else 0.0,
|
| 372 |
+
"maf_q90": self._q(g["MAF"], 0.90) if "MAF" in g else 0.0,
|
| 373 |
+
"load_q85": self._q(g["ENGINE_LOAD"], 0.85) if "ENGINE_LOAD" in g else 0.0,
|
| 374 |
+
"thr_q85": self._q(g["THROTTLE_POS"], 0.85) if "THROTTLE_POS" in g else 0.0,
|
| 375 |
+
"frac_rpm90": frac_rpm90,
|
| 376 |
+
"frac_maf90": frac_maf90,
|
| 377 |
+
"fuel_intensity": (self._q(g["RPM"], 0.90)*self._q(g["MAF"], 0.90)) if (("RPM" in g) and ("MAF" in g)) else 0.0
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
def prepare_ml_data(self, df: pd.DataFrame, dfeat: pd.DataFrame, thr: Dict[str, float]) -> Tuple[pd.DataFrame, np.ndarray, np.ndarray]:
|
| 381 |
+
"""Prepare data for machine learning training"""
|
| 382 |
+
logger.info("🔧 Preparing ML training data...")
|
| 383 |
+
|
| 384 |
+
rows, y, groups = [], [], []
|
| 385 |
+
for gid, g in df.groupby("drive_id", sort=True):
|
| 386 |
+
if len(g) < 5:
|
| 387 |
+
continue
|
| 388 |
+
rows.append(self._agg_for_ml_drive(g, thr))
|
| 389 |
+
y.append(float(dfeat.loc[gid, "efficiency_algo"]))
|
| 390 |
+
groups.append(g["source_file"].iloc[0] if "source_file" in g.columns else gid)
|
| 391 |
+
|
| 392 |
+
X = pd.DataFrame(rows)
|
| 393 |
+
y = np.asarray(y, float)
|
| 394 |
+
groups = np.asarray(groups)
|
| 395 |
+
|
| 396 |
+
# Remove zero-variance features
|
| 397 |
+
zv = X.std(numeric_only=True).fillna(0.0)
|
| 398 |
+
drop_cols = list(zv[zv <= 1e-10].index)
|
| 399 |
+
if drop_cols:
|
| 400 |
+
X = X.drop(columns=drop_cols)
|
| 401 |
+
logger.info(f"🗑️ Dropped zero-variance features: {drop_cols}")
|
| 402 |
+
|
| 403 |
+
# Scale features
|
| 404 |
+
holdout_cols = ["duration_min", "distance_km"]
|
| 405 |
+
num_cols = [c for c in X.columns if c not in holdout_cols]
|
| 406 |
+
sc = StandardScaler().fit(X[num_cols])
|
| 407 |
+
X[num_cols] = sc.transform(X[num_cols])
|
| 408 |
+
|
| 409 |
+
logger.info(f"✅ Prepared ML data: {X.shape[0]} samples, {X.shape[1]} features")
|
| 410 |
+
return X, y, groups, sc, num_cols, holdout_cols
|
| 411 |
+
|
| 412 |
+
def train_model(self, X: pd.DataFrame, y: np.ndarray, groups: np.ndarray) -> Tuple[Any, str, Dict[str, Any]]:
|
| 413 |
+
"""Train the efficiency model with cross-validation"""
|
| 414 |
+
logger.info("🤖 Training efficiency model...")
|
| 415 |
+
|
| 416 |
+
# Out-of-fold predictions for calibration
|
| 417 |
+
gkf = GroupKFold(n_splits=min(5, max(2, len(np.unique(groups)))))
|
| 418 |
+
oof_raw = np.zeros_like(y)
|
| 419 |
+
|
| 420 |
+
for tr, va in gkf.split(X, y, groups):
|
| 421 |
+
gbm_fold = HistGradientBoostingRegressor(
|
| 422 |
+
loss="squared_error", max_depth=6, learning_rate=0.08, max_bins=255,
|
| 423 |
+
early_stopping=True, random_state=SEED
|
| 424 |
+
)
|
| 425 |
+
wtr = np.clip(X.iloc[tr]["duration_min"].values, 0.5, None)
|
| 426 |
+
gbm_fold.fit(X.iloc[tr], y[tr], sample_weight=wtr)
|
| 427 |
+
pred = gbm_fold.predict(X.iloc[va])
|
| 428 |
+
|
| 429 |
+
if np.std(pred) < 1e-6:
|
| 430 |
+
# Ridge rescue to enforce variability
|
| 431 |
+
ridge = Ridge(alpha=1.0, random_state=SEED).fit(X.iloc[tr][X.columns[2:]], y[tr])
|
| 432 |
+
pred = ridge.predict(X.iloc[va][X.columns[2:]])
|
| 433 |
+
|
| 434 |
+
oof_raw[va] = pred
|
| 435 |
+
|
| 436 |
+
# Calculate OOF statistics
|
| 437 |
+
raw_std = float(np.std(oof_raw))
|
| 438 |
+
y_std = float(np.std(y))
|
| 439 |
+
corr = float(np.corrcoef(oof_raw, y)[0,1]) if len(y) > 1 else 1.0
|
| 440 |
+
|
| 441 |
+
logger.info(f"📊 OOF: corr={corr:.3f} | raw_std={raw_std:.3f} | y_std={y_std:.3f}")
|
| 442 |
+
|
| 443 |
+
# Quantile-mapping calibration
|
| 444 |
+
qs = np.linspace(0.05, 0.95, 19)
|
| 445 |
+
rq = np.quantile(oof_raw, qs)
|
| 446 |
+
yq = np.quantile(y, qs)
|
| 447 |
+
|
| 448 |
+
# Ensure strictly increasing rq for stable interpolation
|
| 449 |
+
for i in range(1, len(rq)):
|
| 450 |
+
if rq[i] <= rq[i-1]:
|
| 451 |
+
rq[i] = rq[i-1] + 1e-6
|
| 452 |
+
|
| 453 |
+
calib = {"type": "qmap", "rq": rq.tolist(), "yq": yq.tolist()}
|
| 454 |
+
|
| 455 |
+
def apply_calib_qmap(raw):
|
| 456 |
+
return float(np.clip(np.interp(raw, rq, yq), 0, 100))
|
| 457 |
+
|
| 458 |
+
oof_cal = np.array([apply_calib_qmap(r) for r in oof_raw], float)
|
| 459 |
+
oof_mae = float(mean_absolute_error(y, oof_cal))
|
| 460 |
+
|
| 461 |
+
logger.info(f"📊 OOF MAE (qmap): {oof_mae:.2f}")
|
| 462 |
+
|
| 463 |
+
# Final model training
|
| 464 |
+
gbm = HistGradientBoostingRegressor(
|
| 465 |
+
loss="squared_error", max_depth=6, learning_rate=0.08, max_bins=255,
|
| 466 |
+
early_stopping=False, max_iter=400, random_state=SEED
|
| 467 |
+
)
|
| 468 |
+
w_all = np.clip(X["duration_min"].values, 0.5, None)
|
| 469 |
+
gbm.fit(X, y, sample_weight=w_all)
|
| 470 |
+
raw_all = gbm.predict(X)
|
| 471 |
+
|
| 472 |
+
if np.std(raw_all) < 1e-6:
|
| 473 |
+
logger.warning("⚠️ Final GBM raw constant — switching to RandomForest")
|
| 474 |
+
rf = RandomForestRegressor(n_estimators=600, min_samples_leaf=2, random_state=SEED, n_jobs=-1)
|
| 475 |
+
rf.fit(X, y)
|
| 476 |
+
model_kind, model = "rf", rf
|
| 477 |
+
else:
|
| 478 |
+
model_kind, model = "gbm", gbm
|
| 479 |
+
|
| 480 |
+
oof_stats = {
|
| 481 |
+
"oof_mae_qmap": oof_mae,
|
| 482 |
+
"oof_corr": corr,
|
| 483 |
+
"raw_std": raw_std,
|
| 484 |
+
"y_std": y_std
|
| 485 |
+
}
|
| 486 |
+
|
| 487 |
+
logger.info(f"✅ Model training complete | kind: {model_kind}")
|
| 488 |
+
return model, model_kind, calib, oof_stats
|
| 489 |
+
|
| 490 |
+
def save_model(self, model, model_kind: str, scaler, feature_names: List[str],
|
| 491 |
+
num_cols: List[str], holdout_cols: List[str], thr: Dict[str, float],
|
| 492 |
+
calib: Dict[str, Any], oof_stats: Dict[str, Any]) -> str:
|
| 493 |
+
"""Save the trained model and artifacts"""
|
| 494 |
+
logger.info("💾 Saving model artifacts...")
|
| 495 |
+
|
| 496 |
+
# Prepare artifacts
|
| 497 |
+
artifacts = {
|
| 498 |
+
"scaler": scaler,
|
| 499 |
+
"model_kind": model_kind,
|
| 500 |
+
"gbm": model if model_kind == "gbm" else None,
|
| 501 |
+
"rf": model if model_kind == "rf" else None,
|
| 502 |
+
"feature_names": feature_names,
|
| 503 |
+
"num_cols": num_cols,
|
| 504 |
+
"holdout_cols": holdout_cols,
|
| 505 |
+
"windowing": {"size_s": 120, "step_s": 60}, # For future use
|
| 506 |
+
"thr": thr,
|
| 507 |
+
"seed": SEED,
|
| 508 |
+
"calib": calib,
|
| 509 |
+
"oof_stats": oof_stats,
|
| 510 |
+
"training_timestamp": datetime.now().isoformat(),
|
| 511 |
+
"version": "1.0" # Will be updated based on HF versioning
|
| 512 |
+
}
|
| 513 |
+
|
| 514 |
+
# Save model
|
| 515 |
+
model_path = self.export_directory / "efficiency_model.joblib"
|
| 516 |
+
joblib.dump(artifacts, model_path)
|
| 517 |
+
|
| 518 |
+
# Save metadata
|
| 519 |
+
metadata = {
|
| 520 |
+
"model_type": "fuel_efficiency",
|
| 521 |
+
"version": "1.0",
|
| 522 |
+
"training_date": datetime.now().isoformat(),
|
| 523 |
+
"model_kind": model_kind,
|
| 524 |
+
"feature_count": len(feature_names),
|
| 525 |
+
"oof_stats": oof_stats,
|
| 526 |
+
"calibration_type": calib.get("type", "none")
|
| 527 |
+
}
|
| 528 |
+
|
| 529 |
+
meta_path = self.export_directory / "efficiency_meta.json"
|
| 530 |
+
with open(meta_path, 'w') as f:
|
| 531 |
+
json.dump(metadata, f, indent=2)
|
| 532 |
+
|
| 533 |
+
logger.info(f"✅ Model saved to {model_path}")
|
| 534 |
+
logger.info(f"✅ Metadata saved to {meta_path}")
|
| 535 |
+
|
| 536 |
+
return str(model_path)
|
| 537 |
+
|
| 538 |
+
def upload_to_huggingface(self, version: str = None) -> bool:
|
| 539 |
+
"""Upload the trained model to Hugging Face Hub"""
|
| 540 |
+
if not self.hf_api:
|
| 541 |
+
logger.warning("⚠️ Hugging Face API not available - skipping upload")
|
| 542 |
+
return False
|
| 543 |
+
|
| 544 |
+
try:
|
| 545 |
+
if version is None:
|
| 546 |
+
version = self._get_next_version()
|
| 547 |
+
|
| 548 |
+
logger.info(f"📤 Uploading model version {version} to Hugging Face...")
|
| 549 |
+
|
| 550 |
+
# Upload model file
|
| 551 |
+
model_path = self.export_directory / "efficiency_model.joblib"
|
| 552 |
+
meta_path = self.export_directory / "efficiency_meta.json"
|
| 553 |
+
|
| 554 |
+
if not model_path.exists():
|
| 555 |
+
logger.error(f"❌ Model file not found: {model_path}")
|
| 556 |
+
return False
|
| 557 |
+
|
| 558 |
+
# Upload files
|
| 559 |
+
self.hf_api.upload_file(
|
| 560 |
+
path_or_fileobj=str(model_path),
|
| 561 |
+
path_in_repo=f"{version}/efficiency_model.joblib",
|
| 562 |
+
repo_id=self.repo_id,
|
| 563 |
+
repo_type="model"
|
| 564 |
+
)
|
| 565 |
+
|
| 566 |
+
if meta_path.exists():
|
| 567 |
+
self.hf_api.upload_file(
|
| 568 |
+
path_or_fileobj=str(meta_path),
|
| 569 |
+
path_in_repo=f"{version}/efficiency_meta.json",
|
| 570 |
+
repo_id=self.repo_id,
|
| 571 |
+
repo_type="model"
|
| 572 |
+
)
|
| 573 |
+
|
| 574 |
+
logger.info(f"✅ Model {version} uploaded successfully to {self.repo_id}")
|
| 575 |
+
return True
|
| 576 |
+
|
| 577 |
+
except Exception as e:
|
| 578 |
+
logger.error(f"❌ Error uploading to Hugging Face: {e}")
|
| 579 |
+
return False
|
| 580 |
+
|
| 581 |
+
def _get_next_version(self) -> str:
|
| 582 |
+
"""Get the next version number (1.0, 1.1, 1.2, ..., 1.9, 2.0, etc.)"""
|
| 583 |
+
try:
|
| 584 |
+
repo_files = self.hf_api.list_repo_files(
|
| 585 |
+
repo_id=self.repo_id,
|
| 586 |
+
repo_type="model"
|
| 587 |
+
)
|
| 588 |
+
|
| 589 |
+
# Find existing versions
|
| 590 |
+
versions = []
|
| 591 |
+
for f in repo_files:
|
| 592 |
+
if f.startswith('v') and '/' not in f:
|
| 593 |
+
try:
|
| 594 |
+
version_str = f[1:] # Remove 'v' prefix
|
| 595 |
+
major, minor = map(int, version_str.split('.'))
|
| 596 |
+
versions.append((major, minor))
|
| 597 |
+
except ValueError:
|
| 598 |
+
continue
|
| 599 |
+
|
| 600 |
+
if not versions:
|
| 601 |
+
return "v1.0"
|
| 602 |
+
|
| 603 |
+
# Sort and get next version
|
| 604 |
+
versions.sort(key=lambda x: (x[0], x[1]))
|
| 605 |
+
latest_major, latest_minor = versions[-1]
|
| 606 |
+
|
| 607 |
+
if latest_minor < 9:
|
| 608 |
+
return f"v{latest_major}.{latest_minor + 1}"
|
| 609 |
+
else:
|
| 610 |
+
return f"v{latest_major + 1}.0"
|
| 611 |
+
|
| 612 |
+
except Exception as e:
|
| 613 |
+
logger.warning(f"⚠️ Could not determine next version: {e}")
|
| 614 |
+
return "v1.0"
|
| 615 |
+
|
| 616 |
+
def train_and_upload(self, upload_to_hf: bool = True) -> Dict[str, Any]:
|
| 617 |
+
"""Complete training pipeline"""
|
| 618 |
+
try:
|
| 619 |
+
logger.info("🚀 Starting fuel efficiency model training pipeline...")
|
| 620 |
+
|
| 621 |
+
# Load data
|
| 622 |
+
df = self.load_training_data()
|
| 623 |
+
|
| 624 |
+
# Compute thresholds
|
| 625 |
+
thr = self.compute_fleet_thresholds(df)
|
| 626 |
+
|
| 627 |
+
# Create teacher labels
|
| 628 |
+
dfeat = self.create_algorithmic_teacher(df, thr)
|
| 629 |
+
|
| 630 |
+
# Prepare ML data
|
| 631 |
+
X, y, groups, scaler, num_cols, holdout_cols = self.prepare_ml_data(df, dfeat, thr)
|
| 632 |
+
|
| 633 |
+
# Train model
|
| 634 |
+
model, model_kind, calib, oof_stats = self.train_model(X, y, groups)
|
| 635 |
+
|
| 636 |
+
# Save model
|
| 637 |
+
model_path = self.save_model(
|
| 638 |
+
model, model_kind, scaler, list(X.columns),
|
| 639 |
+
num_cols, holdout_cols, thr, calib, oof_stats
|
| 640 |
+
)
|
| 641 |
+
|
| 642 |
+
# Upload to Hugging Face
|
| 643 |
+
upload_success = False
|
| 644 |
+
if upload_to_hf:
|
| 645 |
+
upload_success = self.upload_to_huggingface()
|
| 646 |
+
|
| 647 |
+
result = {
|
| 648 |
+
"success": True,
|
| 649 |
+
"model_path": model_path,
|
| 650 |
+
"model_kind": model_kind,
|
| 651 |
+
"oof_stats": oof_stats,
|
| 652 |
+
"upload_success": upload_success,
|
| 653 |
+
"training_samples": len(X),
|
| 654 |
+
"feature_count": len(X.columns)
|
| 655 |
+
}
|
| 656 |
+
|
| 657 |
+
logger.info("✅ Training pipeline completed successfully")
|
| 658 |
+
return result
|
| 659 |
+
|
| 660 |
+
except Exception as e:
|
| 661 |
+
logger.error(f"❌ Training pipeline failed: {e}")
|
| 662 |
+
return {"success": False, "error": str(e)}
|
| 663 |
+
|
| 664 |
+
def main():
|
| 665 |
+
"""Main function for command-line usage"""
|
| 666 |
+
import argparse
|
| 667 |
+
|
| 668 |
+
parser = argparse.ArgumentParser(description="Train fuel efficiency model")
|
| 669 |
+
parser.add_argument("--csv-dir", default="./", help="Directory containing CSV files")
|
| 670 |
+
parser.add_argument("--export-dir", default="./efficiency_export", help="Export directory")
|
| 671 |
+
parser.add_argument("--repo-id", default="BinKhoaLe1812/Fuel_Efficiency_OBD", help="Hugging Face repo ID")
|
| 672 |
+
parser.add_argument("--no-upload", action="store_true", help="Skip Hugging Face upload")
|
| 673 |
+
|
| 674 |
+
args = parser.parse_args()
|
| 675 |
+
|
| 676 |
+
# Initialize trainer
|
| 677 |
+
trainer = EfficiencyModelTrainer(
|
| 678 |
+
csv_directory=args.csv_dir,
|
| 679 |
+
export_directory=args.export_dir,
|
| 680 |
+
repo_id=args.repo_id
|
| 681 |
+
)
|
| 682 |
+
|
| 683 |
+
# Train and upload
|
| 684 |
+
result = trainer.train_and_upload(upload_to_hf=not args.no_upload)
|
| 685 |
+
|
| 686 |
+
if result["success"]:
|
| 687 |
+
print("✅ Training completed successfully!")
|
| 688 |
+
print(f"📊 Model: {result['model_kind']}")
|
| 689 |
+
print(f"📈 OOF MAE: {result['oof_stats']['oof_mae_qmap']:.2f}")
|
| 690 |
+
print(f"📤 Upload: {'✅' if result['upload_success'] else '❌'}")
|
| 691 |
+
else:
|
| 692 |
+
print(f"❌ Training failed: {result['error']}")
|
| 693 |
+
return 1
|
| 694 |
+
|
| 695 |
+
return 0
|
| 696 |
+
|
| 697 |
+
if __name__ == "__main__":
|
| 698 |
+
exit(main())
|
train/rlhf.py
CHANGED
|
@@ -168,7 +168,7 @@ class RLHFTrainer:
|
|
| 168 |
# First, try to download the latest model
|
| 169 |
logger.info("🔄 Checking for latest model version...")
|
| 170 |
try:
|
| 171 |
-
from utils.
|
| 172 |
download_latest_models()
|
| 173 |
except Exception as e:
|
| 174 |
logger.warning(f"⚠️ Failed to download latest models: {e}")
|
|
|
|
| 168 |
# First, try to download the latest model
|
| 169 |
logger.info("🔄 Checking for latest model version...")
|
| 170 |
try:
|
| 171 |
+
from utils.dbehavior_download import download_latest_models
|
| 172 |
download_latest_models()
|
| 173 |
except Exception as e:
|
| 174 |
logger.warning(f"⚠️ Failed to download latest models: {e}")
|
train/saver.py
CHANGED
|
@@ -102,7 +102,7 @@ class ModelSaver:
|
|
| 102 |
"performance_metrics": performance_metrics,
|
| 103 |
"framework": "xgboost",
|
| 104 |
"task": "driver_behavior_classification",
|
| 105 |
-
"labels": ["aggressive", "normal", "conservative"], # Based on
|
| 106 |
"features": "obd_sensor_data",
|
| 107 |
"rlhf_metadata": rlhf_metadata or {}
|
| 108 |
}
|
|
|
|
| 102 |
"performance_metrics": performance_metrics,
|
| 103 |
"framework": "xgboost",
|
| 104 |
"task": "driver_behavior_classification",
|
| 105 |
+
"labels": ["aggressive", "normal", "conservative"], # Based on dbehavior_labeler.py
|
| 106 |
"features": "obd_sensor_data",
|
| 107 |
"rlhf_metadata": rlhf_metadata or {}
|
| 108 |
}
|
utils/{download.py → dbehavior_download.py}
RENAMED
|
File without changes
|
utils/{ul_label.py → dbehavior_labeler.py}
RENAMED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
#
|
| 2 |
# Load UL models and predict driving style
|
| 3 |
import os, logging, pickle
|
| 4 |
import warnings
|
|
|
|
| 1 |
+
# dbehavior_labeler.py
|
| 2 |
# Load UL models and predict driving style
|
| 3 |
import os, logging, pickle
|
| 4 |
import warnings
|
utils/efficiency_download.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Fuel Efficiency Model Downloader
|
| 3 |
+
Downloads the latest fuel efficiency model from Hugging Face Hub
|
| 4 |
+
Similar to utils/download.py but for fuel efficiency models
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import pathlib
|
| 9 |
+
import logging
|
| 10 |
+
from typing import Optional, List
|
| 11 |
+
from huggingface_hub import HfApi, hf_hub_download
|
| 12 |
+
import joblib
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger("efficiency-downloader")
|
| 15 |
+
logger.setLevel(logging.INFO)
|
| 16 |
+
if not logger.handlers:
|
| 17 |
+
handler = logging.StreamHandler()
|
| 18 |
+
handler.setFormatter(logging.Formatter("[%(levelname)s] %(asctime)s - %(message)s"))
|
| 19 |
+
logger.addHandler(handler)
|
| 20 |
+
|
| 21 |
+
def load_env_file():
|
| 22 |
+
"""Load .env file if it exists"""
|
| 23 |
+
env_path = pathlib.Path(".env")
|
| 24 |
+
if env_path.exists():
|
| 25 |
+
logger.info("📄 Loading .env file...")
|
| 26 |
+
with open(env_path, 'r') as f:
|
| 27 |
+
for line in f:
|
| 28 |
+
line = line.strip()
|
| 29 |
+
if line and not line.startswith('#') and '=' in line:
|
| 30 |
+
key, value = line.split('=', 1)
|
| 31 |
+
os.environ[key] = value
|
| 32 |
+
return True
|
| 33 |
+
return False
|
| 34 |
+
|
| 35 |
+
# Load .env file first before setting any environment variables
|
| 36 |
+
load_env_file()
|
| 37 |
+
|
| 38 |
+
# Configuration
|
| 39 |
+
EFFICIENCY_REPO_ID = os.getenv("HF_EFFICIENCY_MODEL_REPO", "BinKhoaLe1812/Fuel_Efficiency_OBD")
|
| 40 |
+
EFFICIENCY_MODEL_DIR = pathlib.Path(os.getenv("EFFICIENCY_MODEL_DIR", "/app/models/efficiency")).resolve()
|
| 41 |
+
EFFICIENCY_FILES = ["efficiency_model.joblib", "efficiency_meta.json"]
|
| 42 |
+
|
| 43 |
+
EFFICIENCY_MODEL_DIR.mkdir(parents=True, exist_ok=True)
|
| 44 |
+
|
| 45 |
+
def get_latest_efficiency_version():
|
| 46 |
+
"""Get the latest fuel efficiency model version from Hugging Face repo"""
|
| 47 |
+
try:
|
| 48 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 49 |
+
if not hf_token:
|
| 50 |
+
logger.warning("⚠️ HF_TOKEN not set, using default efficiency model files")
|
| 51 |
+
return None
|
| 52 |
+
|
| 53 |
+
api = HfApi(token=hf_token)
|
| 54 |
+
repo_files = api.list_repo_files(
|
| 55 |
+
repo_id=EFFICIENCY_REPO_ID,
|
| 56 |
+
repo_type="model"
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
logger.info(f"🔍 Checking efficiency repository files...")
|
| 60 |
+
logger.info(f"📁 Found {len(repo_files)} files in efficiency repository")
|
| 61 |
+
|
| 62 |
+
# Find version directories (v1.0, v1.1, etc.)
|
| 63 |
+
version_dirs = [f for f in repo_files if f.startswith('v') and '/' not in f]
|
| 64 |
+
logger.info(f"📦 Found efficiency version directories: {version_dirs}")
|
| 65 |
+
|
| 66 |
+
# Also check for version directories with files inside
|
| 67 |
+
version_dirs_with_files = []
|
| 68 |
+
for f in repo_files:
|
| 69 |
+
if f.startswith('v') and '/' in f:
|
| 70 |
+
version_dir = f.split('/')[0]
|
| 71 |
+
if version_dir not in version_dirs_with_files:
|
| 72 |
+
version_dirs_with_files.append(version_dir)
|
| 73 |
+
|
| 74 |
+
if version_dirs_with_files:
|
| 75 |
+
logger.info(f"📦 Found efficiency version directories with files: {version_dirs_with_files}")
|
| 76 |
+
version_dirs.extend(version_dirs_with_files)
|
| 77 |
+
|
| 78 |
+
versions = []
|
| 79 |
+
|
| 80 |
+
for v_dir in version_dirs:
|
| 81 |
+
try:
|
| 82 |
+
# Extract version number (e.g., "v1.0" -> 1.0)
|
| 83 |
+
version_str = v_dir[1:] # Remove 'v' prefix
|
| 84 |
+
major, minor = map(int, version_str.split('.'))
|
| 85 |
+
versions.append((major, minor, v_dir))
|
| 86 |
+
except ValueError:
|
| 87 |
+
logger.warning(f"⚠️ Could not parse version: {v_dir}")
|
| 88 |
+
continue
|
| 89 |
+
|
| 90 |
+
if not versions:
|
| 91 |
+
logger.warning("⚠️ No valid efficiency versions found")
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
# Sort by major.minor version
|
| 95 |
+
versions.sort(key=lambda x: (x[0], x[1]))
|
| 96 |
+
latest_version = versions[-1][2] # Get the version string
|
| 97 |
+
|
| 98 |
+
logger.info(f"✅ Latest efficiency model version: {latest_version}")
|
| 99 |
+
return latest_version
|
| 100 |
+
|
| 101 |
+
except Exception as e:
|
| 102 |
+
logger.error(f"❌ Error getting latest efficiency version: {e}")
|
| 103 |
+
return None
|
| 104 |
+
|
| 105 |
+
def download_efficiency_model(version: Optional[str] = None) -> bool:
|
| 106 |
+
"""Download the specified version of the fuel efficiency model"""
|
| 107 |
+
try:
|
| 108 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 109 |
+
if not hf_token:
|
| 110 |
+
logger.error("❌ HF_TOKEN not set")
|
| 111 |
+
return False
|
| 112 |
+
|
| 113 |
+
if version is None:
|
| 114 |
+
version = get_latest_efficiency_version()
|
| 115 |
+
if version is None:
|
| 116 |
+
logger.error("❌ Could not determine latest efficiency version")
|
| 117 |
+
return False
|
| 118 |
+
|
| 119 |
+
logger.info(f"📥 Downloading efficiency model version: {version}")
|
| 120 |
+
|
| 121 |
+
# Download each required file
|
| 122 |
+
for filename in EFFICIENCY_FILES:
|
| 123 |
+
try:
|
| 124 |
+
file_path = hf_hub_download(
|
| 125 |
+
repo_id=EFFICIENCY_REPO_ID,
|
| 126 |
+
filename=f"{version}/{filename}",
|
| 127 |
+
token=hf_token,
|
| 128 |
+
local_dir=EFFICIENCY_MODEL_DIR,
|
| 129 |
+
local_dir_use_symlinks=False
|
| 130 |
+
)
|
| 131 |
+
logger.info(f"✅ Downloaded: {filename}")
|
| 132 |
+
|
| 133 |
+
except Exception as e:
|
| 134 |
+
logger.error(f"❌ Failed to download {filename}: {e}")
|
| 135 |
+
return False
|
| 136 |
+
|
| 137 |
+
logger.info(f"✅ Efficiency model {version} downloaded successfully")
|
| 138 |
+
return True
|
| 139 |
+
|
| 140 |
+
except Exception as e:
|
| 141 |
+
logger.error(f"❌ Error downloading efficiency model: {e}")
|
| 142 |
+
return False
|
| 143 |
+
|
| 144 |
+
def download_latest_efficiency_models() -> bool:
|
| 145 |
+
"""Download the latest fuel efficiency model files"""
|
| 146 |
+
try:
|
| 147 |
+
logger.info("🚀 Starting efficiency model download...")
|
| 148 |
+
|
| 149 |
+
# Get latest version
|
| 150 |
+
latest_version = get_latest_efficiency_version()
|
| 151 |
+
if latest_version is None:
|
| 152 |
+
logger.error("❌ Could not determine latest efficiency version")
|
| 153 |
+
return False
|
| 154 |
+
|
| 155 |
+
# Download the model
|
| 156 |
+
success = download_efficiency_model(latest_version)
|
| 157 |
+
if success:
|
| 158 |
+
logger.info("✅ Latest efficiency model downloaded successfully")
|
| 159 |
+
else:
|
| 160 |
+
logger.error("❌ Failed to download latest efficiency model")
|
| 161 |
+
|
| 162 |
+
return success
|
| 163 |
+
|
| 164 |
+
except Exception as e:
|
| 165 |
+
logger.error(f"❌ Error in download_latest_efficiency_models: {e}")
|
| 166 |
+
return False
|
| 167 |
+
|
| 168 |
+
def load_efficiency_model():
|
| 169 |
+
"""Load the efficiency model from local storage"""
|
| 170 |
+
try:
|
| 171 |
+
model_path = EFFICIENCY_MODEL_DIR / "efficiency_model.joblib"
|
| 172 |
+
meta_path = EFFICIENCY_MODEL_DIR / "efficiency_meta.json"
|
| 173 |
+
|
| 174 |
+
if not model_path.exists():
|
| 175 |
+
logger.error(f"❌ Efficiency model not found at {model_path}")
|
| 176 |
+
return None, None
|
| 177 |
+
|
| 178 |
+
# Load model
|
| 179 |
+
model_artifacts = joblib.load(model_path)
|
| 180 |
+
|
| 181 |
+
# Load metadata if available
|
| 182 |
+
metadata = None
|
| 183 |
+
if meta_path.exists():
|
| 184 |
+
import json
|
| 185 |
+
with open(meta_path, 'r') as f:
|
| 186 |
+
metadata = json.load(f)
|
| 187 |
+
|
| 188 |
+
logger.info("✅ Efficiency model loaded successfully")
|
| 189 |
+
return model_artifacts, metadata
|
| 190 |
+
|
| 191 |
+
except Exception as e:
|
| 192 |
+
logger.error(f"❌ Error loading efficiency model: {e}")
|
| 193 |
+
return None, None
|
| 194 |
+
|
| 195 |
+
def check_efficiency_model_exists() -> bool:
|
| 196 |
+
"""Check if efficiency model files exist locally"""
|
| 197 |
+
model_path = EFFICIENCY_MODEL_DIR / "efficiency_model.joblib"
|
| 198 |
+
return model_path.exists()
|
| 199 |
+
|
| 200 |
+
if __name__ == "__main__":
|
| 201 |
+
# Test the download functionality
|
| 202 |
+
success = download_latest_efficiency_models()
|
| 203 |
+
if success:
|
| 204 |
+
print("✅ Efficiency model download test successful")
|
| 205 |
+
else:
|
| 206 |
+
print("❌ Efficiency model download test failed")
|
utils/efficiency_labeler.py
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Fuel Efficiency Labeler
|
| 3 |
+
Provides fuel efficiency scoring for OBD data using the trained model
|
| 4 |
+
Similar to utils/ul_label.py but for fuel efficiency scoring
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import logging
|
| 9 |
+
import joblib
|
| 10 |
+
import numpy as np
|
| 11 |
+
import pandas as pd
|
| 12 |
+
from typing import List, Optional, Dict, Any, Tuple
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger("efficiency-labeler")
|
| 16 |
+
logger.setLevel(logging.INFO)
|
| 17 |
+
if not logger.handlers:
|
| 18 |
+
handler = logging.StreamHandler()
|
| 19 |
+
handler.setFormatter(logging.Formatter("[%(levelname)s] %(asctime)s - %(message)s"))
|
| 20 |
+
logger.addHandler(handler)
|
| 21 |
+
|
| 22 |
+
# Constants
|
| 23 |
+
KMH_TO_MS = 1000.0/3600.0
|
| 24 |
+
SEED = 42
|
| 25 |
+
|
| 26 |
+
class EfficiencyLabeler:
|
| 27 |
+
"""
|
| 28 |
+
Fuel efficiency scorer for OBD data using machine learning model.
|
| 29 |
+
Provides drive-level efficiency scores (0-100%) for entire drives.
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
_instance = None
|
| 33 |
+
_model_artifacts = None
|
| 34 |
+
_metadata = None
|
| 35 |
+
_initialized = False
|
| 36 |
+
|
| 37 |
+
def __init__(self):
|
| 38 |
+
if not EfficiencyLabeler._initialized:
|
| 39 |
+
self._load_model()
|
| 40 |
+
EfficiencyLabeler._initialized = True
|
| 41 |
+
|
| 42 |
+
@classmethod
|
| 43 |
+
def get(cls):
|
| 44 |
+
"""Get singleton instance"""
|
| 45 |
+
if cls._instance is None:
|
| 46 |
+
cls._instance = cls()
|
| 47 |
+
return cls._instance
|
| 48 |
+
|
| 49 |
+
def _load_model(self):
|
| 50 |
+
"""Load the efficiency model and metadata"""
|
| 51 |
+
try:
|
| 52 |
+
from utils.efficiency_download import load_efficiency_model, check_efficiency_model_exists
|
| 53 |
+
|
| 54 |
+
# Check if model exists locally
|
| 55 |
+
if not check_efficiency_model_exists():
|
| 56 |
+
logger.warning("⚠️ Efficiency model not found locally, attempting download...")
|
| 57 |
+
from utils.efficiency_download import download_latest_efficiency_models
|
| 58 |
+
success = download_latest_efficiency_models()
|
| 59 |
+
if not success:
|
| 60 |
+
raise RuntimeError("Failed to download efficiency model")
|
| 61 |
+
|
| 62 |
+
# Load model
|
| 63 |
+
model_artifacts, metadata = load_efficiency_model()
|
| 64 |
+
if model_artifacts is None:
|
| 65 |
+
raise RuntimeError("Failed to load efficiency model")
|
| 66 |
+
|
| 67 |
+
EfficiencyLabeler._model_artifacts = model_artifacts
|
| 68 |
+
EfficiencyLabeler._metadata = metadata
|
| 69 |
+
|
| 70 |
+
logger.info(f"✅ Efficiency model loaded | kind: {model_artifacts.get('model_kind', 'unknown')}")
|
| 71 |
+
logger.info(f"📊 Model features: {len(model_artifacts.get('feature_names', []))}")
|
| 72 |
+
|
| 73 |
+
except Exception as e:
|
| 74 |
+
logger.error(f"❌ Error loading efficiency model: {e}")
|
| 75 |
+
raise
|
| 76 |
+
|
| 77 |
+
def _ensure_dt(self, s):
|
| 78 |
+
"""Ensure datetime conversion"""
|
| 79 |
+
return pd.to_datetime(s, errors="coerce")
|
| 80 |
+
|
| 81 |
+
def _infer_base_interval_seconds(self, ts, fallback=1.0):
|
| 82 |
+
"""Infer base interval from timestamps"""
|
| 83 |
+
ts = pd.to_datetime(ts, errors="coerce")
|
| 84 |
+
dt = ts.diff().dt.total_seconds().dropna()
|
| 85 |
+
med = float(np.nanmedian(dt)) if len(dt) else fallback
|
| 86 |
+
return fallback if (not np.isfinite(med) or med <= 0) else med
|
| 87 |
+
|
| 88 |
+
def _rows_for(self, seconds, base_sec):
|
| 89 |
+
"""Calculate number of rows for given time window"""
|
| 90 |
+
return max(3, int(round(seconds / max(1e-3, base_sec))))
|
| 91 |
+
|
| 92 |
+
def _add_basic_derivatives(self, d):
|
| 93 |
+
"""Add basic derivatives (acceleration, jerk, distance)"""
|
| 94 |
+
d = d.copy()
|
| 95 |
+
d["timestamp"] = self._ensure_dt(d["timestamp"])
|
| 96 |
+
d = d.dropna(subset=["timestamp"]).sort_values("timestamp")
|
| 97 |
+
base = self._infer_base_interval_seconds(d["timestamp"], 1.0)
|
| 98 |
+
|
| 99 |
+
# Convert numeric columns
|
| 100 |
+
for c in ["SPEED","RPM","MAF","ENGINE_LOAD","THROTTLE_POS"]:
|
| 101 |
+
if c in d.columns:
|
| 102 |
+
d[c] = pd.to_numeric(d[c], errors="coerce")
|
| 103 |
+
|
| 104 |
+
# Convert speed to m/s
|
| 105 |
+
if "SPEED_ms" not in d.columns:
|
| 106 |
+
d["SPEED_ms"] = (d["SPEED"] * KMH_TO_MS) if "SPEED" in d.columns else np.nan
|
| 107 |
+
|
| 108 |
+
# Calculate derivatives
|
| 109 |
+
d["ACCEL"] = d["SPEED_ms"].diff()/max(base,1e-3)
|
| 110 |
+
d["JERK"] = d["ACCEL"].diff()/max(base,1e-3)
|
| 111 |
+
|
| 112 |
+
# Calculate distance
|
| 113 |
+
dt = d["timestamp"].diff().dt.total_seconds().fillna(0).clip(lower=0, upper=10*base)
|
| 114 |
+
d["dist_m"] = d["SPEED_ms"] * dt
|
| 115 |
+
|
| 116 |
+
return d
|
| 117 |
+
|
| 118 |
+
def _idle_rule(self, d, thr):
|
| 119 |
+
"""Apply idle detection rule"""
|
| 120 |
+
speed_low = (d["SPEED_ms"].abs() <= thr.get("SPEED_IDLE_MPS", 0.6))
|
| 121 |
+
thr_low = (d["THROTTLE_POS"] <= thr.get("THR_LOW_Q10", 0.0)) if "THROTTLE_POS" in d else True
|
| 122 |
+
load_low = (d["ENGINE_LOAD"] <= thr.get("LOAD_LOW_Q15", 0.0)) if "ENGINE_LOAD" in d else True
|
| 123 |
+
maf_low = (d["MAF"] <= thr.get("MAF_LOW_Q10", 0.0)) if "MAF" in d else True
|
| 124 |
+
accel_low = (d["ACCEL"].abs() <= thr.get("ACCEL_LOW_Q20", 0.0))
|
| 125 |
+
|
| 126 |
+
mask = (speed_low & thr_low & load_low & maf_low & accel_low).astype(int)
|
| 127 |
+
k = 5
|
| 128 |
+
return (mask.rolling(k, center=True, min_periods=1).median().round().astype(bool)
|
| 129 |
+
if len(mask) >= k else mask.astype(bool))
|
| 130 |
+
|
| 131 |
+
def _sharp_mask_from_thresholds(self, d, thr):
|
| 132 |
+
"""Detect sharp acceleration/deceleration events"""
|
| 133 |
+
thr_a = thr.get("ACCEL_HIGH_Q85",
|
| 134 |
+
np.nanquantile(d["ACCEL"].abs().dropna(), 0.85) if d["ACCEL"].notna().any() else 0.3)
|
| 135 |
+
thr_j = thr.get("JERK_HIGH_Q90",
|
| 136 |
+
np.nanquantile(d["JERK"].abs().dropna(), 0.90) if d["JERK"].notna().any() else 0.5)
|
| 137 |
+
return (d["ACCEL"].abs() > thr_a) | (d["JERK"].abs() > thr_j)
|
| 138 |
+
|
| 139 |
+
def _agg_for_ml_drive(self, g, thr):
|
| 140 |
+
"""Aggregate drive-level features for ML model"""
|
| 141 |
+
g = self._add_basic_derivatives(g.copy())
|
| 142 |
+
base = self._infer_base_interval_seconds(g["timestamp"], 1.0)
|
| 143 |
+
g["IDLE_RULE"] = self._idle_rule(g, thr)
|
| 144 |
+
|
| 145 |
+
dt = g["timestamp"].diff().dt.total_seconds().fillna(0).clip(lower=0, upper=10*base)
|
| 146 |
+
T = float(dt.sum())
|
| 147 |
+
mins = max(1e-6, T/60)
|
| 148 |
+
|
| 149 |
+
sharp = self._sharp_mask_from_thresholds(g, thr).values
|
| 150 |
+
edges = np.flatnonzero(np.diff(np.r_[False, sharp, False]))
|
| 151 |
+
sharp_freq_pm = (len(edges)//2)/mins
|
| 152 |
+
|
| 153 |
+
def q(s, p):
|
| 154 |
+
s = pd.to_numeric(s, errors="coerce")
|
| 155 |
+
return float(np.nanquantile(s, p)) if s.notna().any() else 0.0
|
| 156 |
+
|
| 157 |
+
rpm90, maf90 = thr.get("RPM90", np.nan), thr.get("MAF90", np.nan)
|
| 158 |
+
frac_rpm90 = float((g["RPM"] >= rpm90).mean()) if ("RPM" in g and np.isfinite(rpm90)) else 0.0
|
| 159 |
+
frac_maf90 = float((g["MAF"] >= maf90).mean()) if ("MAF" in g and np.isfinite(maf90)) else 0.0
|
| 160 |
+
|
| 161 |
+
W10 = self._rows_for(10, base)
|
| 162 |
+
speed_cv = float((g["SPEED_ms"].rolling(W10,1).std()/(g["SPEED_ms"].rolling(W10,1).mean()+1e-6)).mean())
|
| 163 |
+
|
| 164 |
+
return {
|
| 165 |
+
"duration_min": max(1e-6, T/60),
|
| 166 |
+
"distance_km": g["dist_m"].sum()/1000.0,
|
| 167 |
+
"speed_mean": float(g["SPEED_ms"].mean()),
|
| 168 |
+
"speed_q90": q(g["SPEED_ms"], 0.90),
|
| 169 |
+
"speed_cv": speed_cv,
|
| 170 |
+
"accel_q90": q(g["ACCEL"].abs(), 0.90),
|
| 171 |
+
"jerk_q90": q(g["JERK"].abs(), 0.90),
|
| 172 |
+
"sharp_freq_pm": sharp_freq_pm,
|
| 173 |
+
"idle_frac": float(g["IDLE_RULE"].mean()),
|
| 174 |
+
"idle_epm": (len(np.flatnonzero(np.diff(np.r_[False, g['IDLE_RULE'].values, False])))//2)/mins,
|
| 175 |
+
"rpm_q90": q(g["RPM"], 0.90) if "RPM" in g else 0.0,
|
| 176 |
+
"maf_q90": q(g["MAF"], 0.90) if "MAF" in g else 0.0,
|
| 177 |
+
"load_q85": q(g["ENGINE_LOAD"], 0.85) if "ENGINE_LOAD" in g else 0.0,
|
| 178 |
+
"thr_q85": q(g["THROTTLE_POS"], 0.85) if "THROTTLE_POS" in g else 0.0,
|
| 179 |
+
"frac_rpm90": frac_rpm90,
|
| 180 |
+
"frac_maf90": frac_maf90,
|
| 181 |
+
"fuel_intensity": (q(g["RPM"], 0.90)*q(g["MAF"], 0.90)) if (("RPM" in g) and ("MAF" in g)) else 0.0
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
def _align_to_schema(self, feats, art):
|
| 185 |
+
"""Align features to model schema"""
|
| 186 |
+
x = pd.DataFrame([feats])
|
| 187 |
+
for c in art["feature_names"]:
|
| 188 |
+
if c not in x.columns:
|
| 189 |
+
x[c] = 0.0
|
| 190 |
+
x = x[art["feature_names"]]
|
| 191 |
+
if len(art["num_cols"]):
|
| 192 |
+
x.loc[:, art["num_cols"]] = art["scaler"].transform(x[art["num_cols"]])
|
| 193 |
+
return x
|
| 194 |
+
|
| 195 |
+
def _predict_drive(self, df_drive):
|
| 196 |
+
"""Predict efficiency for a single drive"""
|
| 197 |
+
art = EfficiencyLabeler._model_artifacts
|
| 198 |
+
thr = art["thr"]
|
| 199 |
+
|
| 200 |
+
feats = self._agg_for_ml_drive(df_drive, thr)
|
| 201 |
+
x = self._align_to_schema(feats, art)
|
| 202 |
+
|
| 203 |
+
# Get model
|
| 204 |
+
mdl = art["rf"] if art.get("model_kind") == "rf" else art["gbm"]
|
| 205 |
+
raw = float(mdl.predict(x)[0])
|
| 206 |
+
|
| 207 |
+
# Apply quantile-mapping calibration
|
| 208 |
+
if art.get("calib", {}).get("type") == "qmap":
|
| 209 |
+
rq = np.array(art["calib"]["rq"])
|
| 210 |
+
yq = np.array(art["calib"]["yq"])
|
| 211 |
+
|
| 212 |
+
# Ensure strictly increasing rq for stable interpolation
|
| 213 |
+
for i in range(1, len(rq)):
|
| 214 |
+
if rq[i] <= rq[i-1]:
|
| 215 |
+
rq[i] = rq[i-1] + 1e-6
|
| 216 |
+
|
| 217 |
+
pred = float(np.clip(np.interp(raw, rq, yq), 0, 100))
|
| 218 |
+
else:
|
| 219 |
+
pred = float(np.clip(raw, 0, 100))
|
| 220 |
+
|
| 221 |
+
return pred, raw
|
| 222 |
+
|
| 223 |
+
def predict_df(self, df: pd.DataFrame) -> List[float]:
|
| 224 |
+
"""
|
| 225 |
+
Predict fuel efficiency for a DataFrame containing OBD data.
|
| 226 |
+
Returns a single efficiency score (0-100%) for the entire drive.
|
| 227 |
+
|
| 228 |
+
Args:
|
| 229 |
+
df: DataFrame with OBD data including timestamp, SPEED, RPM, MAF, etc.
|
| 230 |
+
|
| 231 |
+
Returns:
|
| 232 |
+
List containing single efficiency score for the drive
|
| 233 |
+
"""
|
| 234 |
+
try:
|
| 235 |
+
if EfficiencyLabeler._model_artifacts is None:
|
| 236 |
+
raise RuntimeError("Efficiency model not loaded")
|
| 237 |
+
|
| 238 |
+
if len(df) < 5:
|
| 239 |
+
logger.warning("⚠️ Drive too short for efficiency prediction")
|
| 240 |
+
return [0.0] # Return minimum efficiency for very short drives
|
| 241 |
+
|
| 242 |
+
# Ensure timestamp column exists
|
| 243 |
+
if "timestamp" not in df.columns:
|
| 244 |
+
logger.error("❌ No timestamp column found")
|
| 245 |
+
return [0.0]
|
| 246 |
+
|
| 247 |
+
# Predict efficiency for the entire drive
|
| 248 |
+
efficiency_score, raw_score = self._predict_drive(df)
|
| 249 |
+
|
| 250 |
+
logger.info(f"📊 Drive efficiency: {efficiency_score:.1f}% (raw: {raw_score:.3f})")
|
| 251 |
+
return [efficiency_score]
|
| 252 |
+
|
| 253 |
+
except Exception as e:
|
| 254 |
+
logger.error(f"❌ Error predicting efficiency: {e}")
|
| 255 |
+
return [0.0] # Return minimum efficiency on error
|
| 256 |
+
|
| 257 |
+
def get_model_info(self) -> Dict[str, Any]:
|
| 258 |
+
"""Get information about the loaded model"""
|
| 259 |
+
if EfficiencyLabeler._model_artifacts is None:
|
| 260 |
+
return {"error": "Model not loaded"}
|
| 261 |
+
|
| 262 |
+
art = EfficiencyLabeler._model_artifacts
|
| 263 |
+
return {
|
| 264 |
+
"model_kind": art.get("model_kind", "unknown"),
|
| 265 |
+
"feature_count": len(art.get("feature_names", [])),
|
| 266 |
+
"features": art.get("feature_names", []),
|
| 267 |
+
"calibration_type": art.get("calib", {}).get("type", "none"),
|
| 268 |
+
"oof_stats": art.get("oof_stats", {}),
|
| 269 |
+
"metadata": EfficiencyLabeler._metadata
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
# Convenience function for backward compatibility
|
| 273 |
+
def predict_efficiency(df: pd.DataFrame) -> List[float]:
|
| 274 |
+
"""Convenience function to predict efficiency"""
|
| 275 |
+
labeler = EfficiencyLabeler.get()
|
| 276 |
+
return labeler.predict_df(df)
|
| 277 |
+
|
| 278 |
+
if __name__ == "__main__":
|
| 279 |
+
# Test the efficiency labeler
|
| 280 |
+
try:
|
| 281 |
+
labeler = EfficiencyLabeler.get()
|
| 282 |
+
print("✅ Efficiency labeler initialized successfully")
|
| 283 |
+
|
| 284 |
+
# Print model info
|
| 285 |
+
info = labeler.get_model_info()
|
| 286 |
+
print(f"📊 Model info: {info}")
|
| 287 |
+
|
| 288 |
+
except Exception as e:
|
| 289 |
+
print(f"❌ Error initializing efficiency labeler: {e}")
|