File size: 35,841 Bytes
03bcd34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60e8834
03bcd34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
"""
FastAPI Predictive Maintenance API with Rule-Based Diagnostics
Senior Backend Engineer & Reliability Engineer Implementation

Features:
- ML-based failure prediction using XGBoost
- Physics-based diagnostic engine
- Single prediction and batch processing endpoints
- Production-ready with proper error handling
"""

from fastapi import FastAPI, HTTPException, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field, validator
from typing import Optional, Dict, Any, List
import joblib
import numpy as np
import pandas as pd
from datetime import datetime
from pathlib import Path
import os
import io
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize FastAPI
app = FastAPI(
    title="Predictive Maintenance API",
    description="ML + Rule-Based Diagnostics for Industrial Equipment",
    version="1.0.0"
)

# CORS Configuration
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

from huggingface_hub import hf_hub_download

# Global model pipeline
MODEL_PIPELINE = None
HF_MODEL_REPO = os.getenv("HF_MODEL_REPO", "deropxyz/AC02-ML")
PRIMARY_MODEL_FILENAME = os.getenv("HF_PRIMARY_MODEL", "model_pipeline_improved.joblib")
FALLBACK_MODEL_FILENAME = os.getenv("HF_FALLBACK_MODEL", "model_pipeline.joblib")
HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")


def download_model_artifact(filename: str) -> Path:
    """
    Download a model artifact from Hugging Face Hub and return the local cache path.
    """
    if not filename:
        raise ValueError("Filename must be provided for model download")

    logger.info("Downloading model '%s' from repo '%s'...", filename, HF_MODEL_REPO)
    cache_path = hf_hub_download(
        repo_id=HF_MODEL_REPO,
        filename=filename,
        repo_type="model",
        token=HF_TOKEN,
    )
    return Path(cache_path)


# ============================================================================
# REQUEST/RESPONSE SCHEMAS
# ============================================================================

class PredictionRequest(BaseModel):
    """Single prediction request schema matching dataset columns"""
    machine_id: Optional[str] = Field(None, description="Machine identifier (e.g., M_L_01)")
    air_temperature: float = Field(..., ge=250, le=350, description="Air temperature in Kelvin")
    process_temperature: float = Field(..., ge=250, le=400, description="Process temperature in Kelvin")
    rotational_speed: int = Field(..., ge=0, le=3000, description="Rotational speed in RPM")
    torque: float = Field(..., ge=0, le=100, description="Torque in Nm")
    tool_wear: int = Field(..., ge=0, le=300, description="Tool wear in minutes")
    type: str = Field(..., description="Machine type: L (Low), M (Medium), H (High)")
    
    @validator('type')
    def validate_type(cls, v):
        if v not in ['L', 'M', 'H']:
            raise ValueError('Type must be L, M, or H')
        return v
    
    class Config:
        schema_extra = {
            "example": {
                "machine_id": "M_L_01",
                "air_temperature": 298.1,
                "process_temperature": 308.6,
                "rotational_speed": 1551,
                "torque": 42.8,
                "tool_wear": 0,
                "type": "M"
            }
        }


class DiagnosticResult(BaseModel):
    """Diagnostic analysis result"""
    primary_cause: str
    sensor_alert: str
    recommended_action: str
    severity: str  # LOW, MEDIUM, HIGH, CRITICAL


class AnomalyDetail(BaseModel):
    """Anomaly detection detail"""
    parameter: str
    value: str
    normal_range: str
    status: str  # NORMAL, WARNING, CRITICAL
    explanation: str


class PredictionResponse(BaseModel):
    """Single prediction response"""
    machine_id: Optional[str]
    timestamp: str
    prediction: str  # HEALTHY or FAILURE
    confidence: float
    diagnostics: DiagnosticResult
    features: Optional[Dict[str, Any]] = None  # Engineered features
    anomalies: Optional[List[AnomalyDetail]] = None  # Parameter anomaly analysis
    overall_health: Optional[str] = None  # Overall health status


# ============================================================================
# STARTUP EVENT: LOAD MODEL
# ============================================================================

@app.on_event("startup")
async def load_model():
    """Download and load model pipeline on startup"""
    global MODEL_PIPELINE

    last_error: Optional[Exception] = None
    for candidate in [PRIMARY_MODEL_FILENAME, FALLBACK_MODEL_FILENAME]:
        if not candidate:
            continue
        try:
            model_path = download_model_artifact(candidate)
            MODEL_PIPELINE = joblib.load(model_path)
            logger.info("✅ Model loaded from %s", candidate)
            break
        except Exception as err:
            last_error = err
            logger.error("Failed to load model '%s': %s", candidate, err)

    if MODEL_PIPELINE is None:
        raise RuntimeError(f"Unable to load any model artifact: {last_error}") from last_error

    # Validate model structure
    required_keys = ['model', 'scaler', 'features']
    for key in required_keys:
        if key not in MODEL_PIPELINE:
            raise ValueError(f"Model pipeline missing key: {key}")

    logger.info("Model type: %s", type(MODEL_PIPELINE['model']).__name__)
    logger.info("Features: %s", MODEL_PIPELINE['features'])


# ============================================================================
# FEATURE ENGINEERING (Must match training logic)
# ============================================================================

def engineer_features(data: Dict[str, Any]) -> pd.DataFrame:
    """
    Apply physics-based feature engineering matching training pipeline
    
    Features:
    - Temperature_Diff: Thermal stress indicator
    - Power_W: Mechanical power (P = τω)
    - Type_Encoded: Categorical encoding
    - Tool_wear_hours: Tool wear converted to hours
    """
    # Create base dataframe
    df = pd.DataFrame([{
        'Air_temp_K': data['air_temperature'],
        'Process_temp_K': data['process_temperature'],
        'Speed_rpm': data['rotational_speed'],
        'Torque_Nm': data['torque'],
        'Tool_wear_min': data['tool_wear']
    }])
    
    # Physics-based features
    df['Tool_wear_hours'] = df['Tool_wear_min'] / 60.0  # Convert minutes to hours
    df['Temperature_Diff'] = df['Process_temp_K'] - df['Air_temp_K']
    df['Power_W'] = df['Torque_Nm'] * df['Speed_rpm'] * (2 * np.pi / 60)
    
    # Type encoding
    type_mapping = {'L': 0, 'M': 1, 'H': 2}
    df['Type_Encoded'] = type_mapping[data['type']]
    
    # Ensure correct feature order matching training
    feature_cols = MODEL_PIPELINE['features']
    return df[feature_cols]


# ============================================================================
# DIAGNOSTIC ENGINE (Rule-Based Intelligence)
# ============================================================================

def analyze_condition(
    data: Dict[str, Any],
    pred_class: int,
    pred_proba: float,
    engineered_features: pd.DataFrame
) -> DiagnosticResult:
    """
    Advanced diagnostic engine combining ML predictions with physics-based rules
    
    Priority Rules:
    1. Tool Wear Failure (CRITICAL)
    2. Power Overstrain (HIGH)
    3. Heat Dissipation Issue (MEDIUM)
    4. ML Anomaly Detection (VARIABLE)
    5. Healthy Operation (LOW)
    """
    
    # Extract values for rule evaluation
    temp_diff = engineered_features['Temperature_Diff'].iloc[0]
    power_w = engineered_features['Power_W'].iloc[0]
    torque = data['torque']
    tool_wear = data['tool_wear']
    speed = data['rotational_speed']
    air_temp = data['air_temperature']
    process_temp = data['process_temperature']
    
    # ========================================================================
    # RULE 1: TOOL WEAR FAILURE (CRITICAL PRIORITY)
    # ========================================================================
    if tool_wear > 200:
        return DiagnosticResult(
            primary_cause="Tool End of Life",
            sensor_alert=f"Tool wear ({tool_wear} min) exceeds safety threshold (200 min)",
            recommended_action="IMMEDIATE ACTION: Replace cutting tool before catastrophic failure. "
                             "Schedule downtime within 4 hours.",
            severity="CRITICAL"
        )
    
    # ========================================================================
    # RULE 2: POWER OVERSTRAIN (HIGH PRIORITY)
    # ========================================================================
    if power_w > 9000 or torque > 60:
        cause_details = []
        if power_w > 9000:
            cause_details.append(f"Power output ({power_w:.0f} W) exceeds design limit (9000 W)")
        if torque > 60:
            cause_details.append(f"Torque ({torque:.1f} Nm) exceeds safety limit (60 Nm)")
        
        return DiagnosticResult(
            primary_cause="Power Overstrain / Mechanical Overload",
            sensor_alert=" | ".join(cause_details),
            recommended_action="URGENT: Reduce operational load immediately. "
                             "Inspect shaft alignment, bearing condition, and drive belt tension. "
                             "Check for material jamming or obstruction.",
            severity="HIGH"
        )
    
    # ========================================================================
    # RULE 3: HEAT DISSIPATION ISSUE (MEDIUM PRIORITY)
    # ========================================================================
    if temp_diff < 8.0 and speed > 1300:
        return DiagnosticResult(
            primary_cause="Inefficient Cooling System",
            sensor_alert=f"Temperature differential ({temp_diff:.1f} K) too low at high speed ({speed} RPM). "
                        f"Expected ≥8.0 K for proper heat dissipation.",
            recommended_action="MAINTENANCE REQUIRED: Inspect cooling system within 24 hours. "
                             "Check heat exchanger efficiency, coolant flow rate, and radiator fins. "
                             "Verify coolant temperature and pressure.",
            severity="MEDIUM"
        )
    
    # ========================================================================
    # RULE 4: THERMAL STRESS (MEDIUM PRIORITY)
    # ========================================================================
    if temp_diff > 15.0:
        return DiagnosticResult(
            primary_cause="Excessive Thermal Stress",
            sensor_alert=f"Temperature differential ({temp_diff:.1f} K) exceeds normal range (8-12 K). "
                        f"Process temp: {process_temp:.1f} K, Air temp: {air_temp:.1f} K",
            recommended_action="MONITOR CLOSELY: High thermal gradient detected. "
                             "Check process parameters and reduce processing intensity if possible. "
                             "Verify insulation integrity and ambient conditions.",
            severity="MEDIUM"
        )
    
    # ========================================================================
    # RULE 5: HIGH-SPEED OPERATION RISK (MEDIUM PRIORITY)
    # ========================================================================
    if speed > 2500:
        return DiagnosticResult(
            primary_cause="High-Speed Operation Risk",
            sensor_alert=f"Rotational speed ({speed} RPM) in critical range (>2500 RPM). "
                        f"Vibration and wear accelerate exponentially.",
            recommended_action="PREVENTIVE ACTION: Monitor vibration levels closely. "
                             "Perform balance check and bearing inspection. "
                             "Consider reducing speed if not operationally critical.",
            severity="MEDIUM"
        )
    
    # ========================================================================
    # RULE 6: ML DETECTED ANOMALY (VARIABLE PRIORITY)
    # ========================================================================
    if pred_class == 1:
        # ML model detected failure but no specific rule matched
        confidence_level = "high" if pred_proba > 0.8 else "moderate"
        
        return DiagnosticResult(
            primary_cause="ML-Detected Anomaly Pattern",
            sensor_alert=f"Machine learning model detected failure risk with {confidence_level} confidence ({pred_proba:.1%}). "
                        f"No specific rule violation identified.",
            recommended_action="DIAGNOSTIC SCAN REQUIRED: Perform comprehensive machine diagnostic. "
                             "Check for: (1) Unusual vibration patterns, (2) Bearing wear, "
                             "(3) Lubrication quality, (4) Electrical anomalies, (5) Sensor calibration. "
                             "Review recent maintenance history.",
            severity="HIGH" if pred_proba > 0.8 else "MEDIUM"
        )
    
    # ========================================================================
    # RULE 7: HEALTHY OPERATION (LOW PRIORITY)
    # ========================================================================
    # Calculate health score
    health_indicators = {
        'tool_wear_ok': tool_wear <= 150,
        'power_ok': power_w <= 8000,
        'temp_ok': 8.0 <= temp_diff <= 12.0,
        'speed_ok': speed <= 2000,
        'torque_ok': torque <= 50
    }
    health_score = sum(health_indicators.values()) / len(health_indicators) * 100
    
    return DiagnosticResult(
        primary_cause="Normal Operation",
        sensor_alert=f"All parameters within optimal range. Health score: {health_score:.0f}%. "
                    f"Tool wear: {tool_wear}/200 min, Power: {power_w:.0f}/9000 W, "
                    f"Temp diff: {temp_diff:.1f} K, Speed: {speed}/2500 RPM",
        recommended_action="CONTINUE NORMAL OPERATION. Next scheduled maintenance as planned. "
                         "Continue routine monitoring.",
        severity="LOW"
    )


# ============================================================================
# ANOMALY DETECTION & HEALTH ASSESSMENT
# ============================================================================

def detect_anomalies(data: Dict[str, Any], features_df: pd.DataFrame) -> List[AnomalyDetail]:
    """
    Detect anomalies in sensor readings and engineered features
    Returns list of anomaly details for visualization
    """
    anomalies = []
    
    # Extract values
    air_temp = data['air_temperature']
    process_temp = data['process_temperature']
    speed = data['rotational_speed']
    torque = data['torque']
    tool_wear = data['tool_wear']
    temp_diff = features_df['Temperature_Diff'].iloc[0]
    power_w = features_df['Power_W'].iloc[0]
    
    # Define normal ranges and check each parameter
    
    # 1. Air Temperature
    if air_temp < 290 or air_temp > 310:
        status = "CRITICAL" if air_temp < 285 or air_temp > 315 else "WARNING"
        anomalies.append(AnomalyDetail(
            parameter="Air Temperature",
            value=f"{air_temp:.1f} K",
            normal_range="290-310 K",
            status=status,
            explanation=f"Air temperature is {'too low' if air_temp < 290 else 'too high'}. This can affect cooling efficiency and equipment performance."
        ))
    
    # 2. Process Temperature
    if process_temp < 300 or process_temp > 315:
        status = "CRITICAL" if process_temp < 295 or process_temp > 320 else "WARNING"
        anomalies.append(AnomalyDetail(
            parameter="Process Temperature",
            value=f"{process_temp:.1f} K",
            normal_range="300-315 K",
            status=status,
            explanation=f"Process temperature is {'below optimal' if process_temp < 300 else 'exceeding optimal'} range. May indicate cooling system issues or excessive load."
        ))
    
    # 3. Temperature Differential
    if temp_diff < 8.0 or temp_diff > 12.0:
        status = "CRITICAL" if temp_diff < 5.0 or temp_diff > 15.0 else "WARNING"
        anomalies.append(AnomalyDetail(
            parameter="Temperature Differential",
            value=f"{temp_diff:.1f} K",
            normal_range="8.0-12.0 K",
            status=status,
            explanation=f"Temperature gradient is {'insufficient' if temp_diff < 8.0 else 'excessive'}. This indicates {'poor heat dissipation' if temp_diff < 8.0 else 'thermal stress'}."
        ))
    
    # 4. Rotational Speed
    if speed < 1200 or speed > 2000:
        status = "CRITICAL" if speed < 1000 or speed > 2500 else "WARNING"
        anomalies.append(AnomalyDetail(
            parameter="Rotational Speed",
            value=f"{speed} RPM",
            normal_range="1200-2000 RPM",
            status=status,
            explanation=f"Speed is {'below normal' if speed < 1200 else 'above normal'} operating range. This affects wear rate and vibration levels."
        ))
    
    # 5. Torque
    if torque > 50:
        status = "CRITICAL" if torque > 60 else "WARNING"
        anomalies.append(AnomalyDetail(
            parameter="Torque",
            value=f"{torque:.1f} Nm",
            normal_range="20-50 Nm",
            status=status,
            explanation=f"Torque is {'significantly ' if torque > 60 else ''}exceeding normal range. This indicates mechanical overload or obstruction."
        ))
    
    # 6. Tool Wear
    if tool_wear > 150:
        status = "CRITICAL" if tool_wear > 200 else "WARNING"
        anomalies.append(AnomalyDetail(
            parameter="Tool Wear",
            value=f"{tool_wear} min",
            normal_range="0-150 min",
            status=status,
            explanation=f"Tool wear is {'critically high' if tool_wear > 200 else 'elevated'}. {'IMMEDIATE replacement required' if tool_wear > 200 else 'Schedule replacement soon'}."
        ))
    
    # 7. Power Output
    if power_w > 8000:
        status = "CRITICAL" if power_w > 9000 else "WARNING"
        anomalies.append(AnomalyDetail(
            parameter="Power Output",
            value=f"{power_w:.0f} W",
            normal_range="3000-8000 W",
            status=status,
            explanation=f"Power consumption is {'critically ' if power_w > 9000 else ''}high. This suggests mechanical resistance or overload conditions."
        ))
    
    return anomalies


def determine_overall_health(prediction: int, confidence: float, severity: str, anomaly_count: int) -> str:
    """
    Determine overall machine health status based on multiple factors
    Returns: EXCELLENT, GOOD, FAIR, or POOR health status
    """
    if prediction == 1:
        # Failure predicted
        if severity == "CRITICAL" or confidence > 0.9:
            return "🔴 POOR HEALTH - Critical failure risk detected. Immediate action required."
        elif severity == "HIGH" or confidence > 0.75:
            return "🟠 POOR HEALTH - High failure risk. Urgent maintenance needed."
        else:
            return "🟡 FAIR HEALTH - Moderate failure risk detected. Schedule maintenance soon."
    else:
        # No failure predicted
        if anomaly_count == 0:
            return "🟢 EXCELLENT HEALTH - All systems operating optimally within normal parameters."
        elif anomaly_count <= 2:
            return "🟢 GOOD HEALTH - Minor anomalies detected but within acceptable limits."
        else:
            return "🟡 FAIR HEALTH - Multiple anomalies detected. Monitor closely and address issues."


# ============================================================================
# ENDPOINT 1: SINGLE PREDICTION
# ============================================================================

@app.post("/predict", response_model=PredictionResponse)
async def predict_single(request: PredictionRequest):
    """
    Predict failure risk for a single machine reading
    
    Returns:
    - Prediction (HEALTHY/FAILURE)
    - Confidence score
    - Detailed diagnostics with actionable recommendations
    """
    try:
        # Convert request to dict
        data = request.dict()
        
        # Feature engineering
        features_df = engineer_features(data)
        
        # Scale features
        features_scaled = MODEL_PIPELINE['scaler'].transform(features_df)
        
        # Predict
        prediction = MODEL_PIPELINE['model'].predict(features_scaled)[0]
        prediction_proba = MODEL_PIPELINE['model'].predict_proba(features_scaled)[0]
        
        # Get confidence (probability of predicted class)
        confidence = float(prediction_proba[prediction])
        
        # Run diagnostic engine
        diagnostics = analyze_condition(data, prediction, confidence, features_df)
        
        # Prepare features dict for display
        features_dict = features_df.iloc[0].to_dict()
        
        # Detect anomalies
        anomalies = detect_anomalies(data, features_df)
        
        # Determine overall health
        overall_health = determine_overall_health(prediction, confidence, diagnostics.severity, len(anomalies))
        
        # Format response
        return PredictionResponse(
            machine_id=request.machine_id,
            timestamp=datetime.now().isoformat(),
            prediction="FAILURE" if prediction == 1 else "HEALTHY",
            confidence=round(confidence, 4),
            diagnostics=diagnostics,
            features=features_dict,
            anomalies=anomalies,
            overall_health=overall_health
        )
        
    except Exception as e:
        logger.error(f"Prediction error: {e}")
        raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")


# ============================================================================
# ENDPOINT 2: BATCH PREDICTION (CSV UPLOAD)
# ============================================================================

@app.post("/predict/batch")
async def predict_batch(file: UploadFile = File(...)):
    """
    Batch prediction from CSV upload
    
    Input: CSV file with columns matching PredictionRequest
    Output: CSV with added columns: Prediction, Probability, Primary_Cause, 
            Sensor_Alert, Recommended_Action, Severity
    """
    try:
        # Validate file type
        if not file.filename.endswith('.csv'):
            raise HTTPException(status_code=400, detail="File must be CSV format")
        
        # Read CSV
        contents = await file.read()
        df = pd.read_csv(io.StringIO(contents.decode('utf-8')))
        
        # Normalize column names (handle case-insensitive, spaces, brackets, etc)
        df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('[', '_').str.replace(']', '').str.replace('__', '_').str.rstrip('_')
        
        # Create column mapping for different formats
        column_mapping = {
            'air_temperature_k': 'air_temperature',
            'process_temperature_k': 'process_temperature',
            'rotational_speed_rpm': 'rotational_speed',
            'torque_nm': 'torque',
            'tool_wear_min': 'tool_wear'
        }
        
        # Apply mapping if columns have units
        df.rename(columns=column_mapping, inplace=True)
        
        # Validate required columns
        required_columns = [
            'air_temperature', 'process_temperature', 'rotational_speed',
            'torque', 'tool_wear', 'type'
        ]
        missing_cols = [col for col in required_columns if col not in df.columns]
        if missing_cols:
            raise HTTPException(
                status_code=400,
                detail=f"Missing required columns: {missing_cols}. Found columns: {list(df.columns)}"
            )
        
        # Initialize result columns
        predictions = []
        probabilities = []
        causes = []
        alerts = []
        actions = []
        severities = []
        
        # Process each row
        for idx, row in df.iterrows():
            try:
                # Prepare data
                data = {
                    'machine_id': row.get('machine_id', f'MACHINE_{idx}'),
                    'air_temperature': float(row['air_temperature']),
                    'process_temperature': float(row['process_temperature']),
                    'rotational_speed': int(row['rotational_speed']),
                    'torque': float(row['torque']),
                    'tool_wear': int(row['tool_wear']),
                    'type': str(row['type'])
                }
                
                # Feature engineering
                features_df = engineer_features(data)
                
                # Scale and predict
                features_scaled = MODEL_PIPELINE['scaler'].transform(features_df)
                prediction = MODEL_PIPELINE['model'].predict(features_scaled)[0]
                prediction_proba = MODEL_PIPELINE['model'].predict_proba(features_scaled)[0]
                confidence = float(prediction_proba[prediction])
                
                # Diagnostics
                diagnostics = analyze_condition(data, prediction, confidence, features_df)
                
                # Append results
                predictions.append("FAILURE" if prediction == 1 else "HEALTHY")
                probabilities.append(round(confidence, 4))
                causes.append(diagnostics.primary_cause)
                alerts.append(diagnostics.sensor_alert)
                actions.append(diagnostics.recommended_action)
                severities.append(diagnostics.severity)
                
            except Exception as e:
                logger.warning(f"Row {idx} processing error: {e}")
                predictions.append("ERROR")
                probabilities.append(0.0)
                causes.append("Processing Error")
                alerts.append(str(e))
                actions.append("Review input data")
                severities.append("UNKNOWN")
        
        # Add results to dataframe
        df['Prediction'] = predictions
        df['Confidence'] = probabilities
        df['Primary_Cause'] = causes
        df['Sensor_Alert'] = alerts
        df['Recommended_Action'] = actions
        df['Severity'] = severities
        df['Processed_At'] = datetime.now().isoformat()
        
        # Convert to CSV
        output = io.StringIO()
        df.to_csv(output, index=False)
        output.seek(0)
        
        # Return as downloadable file
        return StreamingResponse(
            iter([output.getvalue()]),
            media_type="text/csv",
            headers={
                "Content-Disposition": f"attachment; filename=predictions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
            }
        )
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Batch prediction error: {e}")
        raise HTTPException(status_code=500, detail=f"Batch prediction failed: {str(e)}")


# ============================================================================
# ENDPOINT 3: BATCH PREDICTION JSON (For UI Display)
# ============================================================================

@app.post("/predict/batch/json")
async def predict_batch_json(file: UploadFile = File(...)):
    """
    Batch prediction from CSV upload - returns detailed JSON for UI display
    
    Input: CSV file with required columns
    Output: JSON with detailed predictions, features, and anomalies for each row
    """
    try:
        # Validate file type
        if not file.filename.endswith('.csv'):
            raise HTTPException(status_code=400, detail="File must be CSV format")
        
        # Read CSV
        contents = await file.read()
        df = pd.read_csv(io.StringIO(contents.decode('utf-8')))
        
        # Normalize column names
        df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')
        
        # Validate required columns
        required_columns = [
            'air_temperature', 'process_temperature', 'rotational_speed',
            'torque', 'tool_wear', 'type'
        ]
        missing_cols = [col for col in required_columns if col not in df.columns]
        if missing_cols:
            raise HTTPException(
                status_code=400,
                detail=f"Missing required columns: {missing_cols}. Found columns: {list(df.columns)}"
            )
        
        # Limit batch size for JSON response (increased for better UX)
        if len(df) > 1000:
            raise HTTPException(
                status_code=400,
                detail=f"Batch JSON endpoint limited to 500 rows. Your file has {len(df)} rows. Use /predict/batch for larger files or download CSV results."
            )
        
        results = []
        
        # Process each row
        for idx, row in df.iterrows():
            try:
                # Prepare data
                data = {
                    'machine_id': row.get('machine_id', f'MACHINE_{idx+1}'),
                    'air_temperature': float(row['air_temperature']),
                    'process_temperature': float(row['process_temperature']),
                    'rotational_speed': int(row['rotational_speed']),
                    'torque': float(row['torque']),
                    'tool_wear': int(row['tool_wear']),
                    'type': str(row['type']).strip().upper()
                }
                
                # Feature engineering
                features_df = engineer_features(data)
                
                # Scale and predict
                features_scaled = MODEL_PIPELINE['scaler'].transform(features_df)
                prediction = MODEL_PIPELINE['model'].predict(features_scaled)[0]
                prediction_proba = MODEL_PIPELINE['model'].predict_proba(features_scaled)[0]
                confidence = float(prediction_proba[prediction])
                
                # Diagnostics
                diagnostics = analyze_condition(data, prediction, confidence, features_df)
                
                # Features dict
                features_dict = features_df.iloc[0].to_dict()
                
                # Detect anomalies
                anomalies = detect_anomalies(data, features_df)
                
                # Overall health
                overall_health = determine_overall_health(prediction, confidence, diagnostics.severity, len(anomalies))
                
                # Append result
                results.append({
                    "row_number": idx + 1,
                    "machine_id": data['machine_id'],
                    "timestamp": datetime.now().isoformat(),
                    "prediction": "FAILURE" if prediction == 1 else "HEALTHY",
                    "confidence": round(confidence, 4),
                    "diagnostics": {
                        "primary_cause": diagnostics.primary_cause,
                        "sensor_alert": diagnostics.sensor_alert,
                        "recommended_action": diagnostics.recommended_action,
                        "severity": diagnostics.severity
                    },
                    "features": features_dict,
                    "anomalies": [
                        {
                            "parameter": a.parameter,
                            "value": a.value,
                            "normal_range": a.normal_range,
                            "status": a.status,
                            "explanation": a.explanation
                        } for a in anomalies
                    ],
                    "overall_health": overall_health,
                    "input_data": {
                        "air_temperature": data['air_temperature'],
                        "process_temperature": data['process_temperature'],
                        "rotational_speed": data['rotational_speed'],
                        "torque": data['torque'],
                        "tool_wear": data['tool_wear'],
                        "type": data['type']
                    }
                })
                
            except Exception as e:
                logger.warning(f"Row {idx+1} processing error: {e}")
                results.append({
                    "row_number": idx + 1,
                    "machine_id": row.get('machine_id', f'MACHINE_{idx+1}'),
                    "timestamp": datetime.now().isoformat(),
                    "prediction": "ERROR",
                    "confidence": 0.0,
                    "diagnostics": {
                        "primary_cause": "Processing Error",
                        "sensor_alert": str(e),
                        "recommended_action": "Review input data format and values",
                        "severity": "UNKNOWN"
                    },
                    "error": str(e)
                })
        
        # Summary statistics
        total = len(results)
        failures = sum(1 for r in results if r.get('prediction') == 'FAILURE')
        healthy = sum(1 for r in results if r.get('prediction') == 'HEALTHY')
        errors = sum(1 for r in results if r.get('prediction') == 'ERROR')
        
        return {
            "summary": {
                "total_records": total,
                "predictions": {
                    "failure": failures,
                    "healthy": healthy,
                    "errors": errors
                },
                "failure_rate": round(failures / total * 100, 2) if total > 0 else 0
            },
            "results": results,
            "processed_at": datetime.now().isoformat()
        }
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Batch JSON prediction error: {e}")
        raise HTTPException(status_code=500, detail=f"Batch processing failed: {str(e)}")


# ============================================================================
# HEALTH CHECK ENDPOINTS
# ============================================================================

@app.get("/")
async def root():
    """API root endpoint"""
    return {
        "service": "Predictive Maintenance API",
        "version": "1.0.0",
        "status": "operational",
        "model_loaded": MODEL_PIPELINE is not None,
        "endpoints": {
            "single_prediction": "/predict",
            "batch_prediction": "/predict/batch",
            "health": "/health",
            "model_info": "/model/info"
        }
    }


@app.get("/health")
async def health_check():
    """Health check endpoint"""
    return {
        "status": "healthy" if MODEL_PIPELINE is not None else "unhealthy",
        "timestamp": datetime.now().isoformat(),
        "model_loaded": MODEL_PIPELINE is not None,
        "model_repo": HF_MODEL_REPO
    }


@app.get("/model/info")
async def model_info():
    """Get model information"""
    if MODEL_PIPELINE is None:
        raise HTTPException(status_code=503, detail="Model not loaded")
    
    return {
        "model_type": type(MODEL_PIPELINE['model']).__name__,
        "features": MODEL_PIPELINE['features'],
        "model_name": MODEL_PIPELINE.get('model_name', 'Unknown'),
        "performance": MODEL_PIPELINE.get('performance', {}),
        "training_config": MODEL_PIPELINE.get('training_config', {}),
        "random_state": MODEL_PIPELINE.get('random_state', None)
    }


# ============================================================================
# MAIN
# ============================================================================

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(
        "main:app",
        host="0.0.0.0",
        port=8000,
        reload=True,
        log_level="info"
    )