solfedge commited on
Commit
e573a4e
·
verified ·
1 Parent(s): be38c09

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +535 -0
  2. data_processor.py +56 -0
  3. model_trainer.py +63 -0
  4. requirements.txt +10 -0
  5. x.txt +218 -0
app.py ADDED
@@ -0,0 +1,535 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ import matplotlib.pyplot as plt
6
+ import gradio as gr
7
+ from sklearn.ensemble import IsolationForest
8
+ from sklearn.preprocessing import StandardScaler # Imported but not used directly here
9
+ from transformers import pipeline
10
+ import os
11
+
12
+ # Global variables
13
+ df = None
14
+ iso_forest = None
15
+ sensor_cols = None
16
+ explainer = None
17
+
18
+ def find_data_file():
19
+ """Find the train_FD001.txt file in various possible locations"""
20
+ possible_paths = [
21
+ 'CMaps/train_FD001.txt', # Original extracted location
22
+ 'train_FD001.txt', # Current directory
23
+ 'data/train_FD001.txt', # Data folder
24
+ 'C-MAPSS/train_FD001.txt', # Alternative folder names
25
+ 'CMAPSS/train_FD001.txt',
26
+ 'dataset/train_FD001.txt'
27
+ ]
28
+ for path in possible_paths:
29
+ if os.path.exists(path):
30
+ print(f" Found data file at: {path}")
31
+ return path
32
+
33
+ # If still not found, list what files exist for debugging
34
+ print("Files in current directory:")
35
+ for file in os.listdir('.'):
36
+ print(f" {file}")
37
+ if os.path.exists('CMaps'):
38
+ print("Files in CMaps directory:")
39
+ for file in os.listdir('CMaps'):
40
+ print(f" {file}")
41
+
42
+ raise FileNotFoundError("Could not find train_FD001.txt in any expected location")
43
+
44
+ def load_and_process_data():
45
+ """
46
+ Load and preprocess the NASA Turbofan dataset
47
+ """
48
+ print("Loading and processing data...")
49
+
50
+ # Find the data file
51
+ data_path = find_data_file()
52
+
53
+ # Load raw data first to determine actual columns
54
+ # Use delim_whitespace=True for more robust parsing of space-separated files
55
+ df_raw = pd.read_csv(data_path, delim_whitespace=True, header=None, nrows=1)
56
+ num_columns = len(df_raw.columns)
57
+ print(f"Found {num_columns} columns in the dataset")
58
+
59
+ # Define column names based on actual number of columns
60
+ # Standard NASA CMAPSS FD001 has id, cycle, op1, op2, op3, and then sensors
61
+ if num_columns >= 26: # id, cycle, 3 ops, 21+ sensors
62
+ columns = ['id', 'cycle', 'op1', 'op2', 'op3'] + [f'sensor{i}' for i in range(1, num_columns - 4)]
63
+ elif num_columns >= 25: # id, cycle, 2 ops, sensors
64
+ columns = ['id', 'cycle', 'op1', 'op2'] + [f'sensor{i}' for i in range(1, num_columns - 3)]
65
+ elif num_columns >= 24: # id, cycle, 1 op, sensors
66
+ columns = ['id', 'cycle', 'op1'] + [f'sensor{i}' for i in range(1, num_columns - 2)]
67
+ else: # id, cycle, sensors (less common for FD001)
68
+ columns = ['id', 'cycle'] + [f'sensor{i}' for i in range(1, num_columns - 1)]
69
+
70
+ # Trim columns to actual number (safety check)
71
+ columns = columns[:num_columns]
72
+
73
+ # Load full dataset with correct column names
74
+ # Using delim_whitespace=True for consistency and robustness
75
+ df = pd.read_csv(data_path, delim_whitespace=True, header=None, names=columns)
76
+
77
+ # The NASA data often has trailing spaces or extra NaN columns, drop them
78
+ df = df.dropna(axis=1, how='all')
79
+
80
+ # Identify sensor columns (those starting with 'sensor')
81
+ sensor_cols = [col for col in df.columns if col.startswith('sensor')]
82
+
83
+ print(f" Identified {len(sensor_cols)} sensor columns: {sensor_cols}")
84
+
85
+ # Normalize sensor readings per engine
86
+ if len(sensor_cols) > 0:
87
+ # Use transform with groupby correctly and ensure numerical stability
88
+ df[sensor_cols] = df.groupby('id')[sensor_cols].transform(
89
+ lambda x: (x - x.mean()) / (x.std() + 1e-6) if x.std() > 1e-6 else x - x.mean()
90
+ )
91
+ else:
92
+ print("⚠️ Warning: No sensor columns found!")
93
+ sensor_cols = []
94
+
95
+ print(f" Processed data shape: {df.shape}")
96
+ return df, sensor_cols
97
+
98
+ def load_processed_data(filepath='processed_data.csv'):
99
+ """
100
+ Load processed data from CSV
101
+ """
102
+ if not os.path.exists(filepath):
103
+ return None, None
104
+
105
+ try:
106
+ df = pd.read_csv(filepath)
107
+ except Exception as e:
108
+ print(f" Error loading processed data from {filepath}: {e}")
109
+ return None, None
110
+
111
+ sensor_cols = [col for col in df.columns if col.startswith('sensor')]
112
+ return df, sensor_cols
113
+
114
+ def load_model(filepath='isolation_forest_model.pkl'):
115
+ """
116
+ Load trained model from disk
117
+ """
118
+ if not os.path.exists(filepath):
119
+ return None
120
+
121
+ try:
122
+ model = joblib.load(filepath)
123
+ print(f" Model loaded from {filepath}")
124
+ return model
125
+ except Exception as e:
126
+ print(f" Error loading model from {filepath}: {e}")
127
+ return None
128
+
129
+ def train_isolation_forest(df, sensor_cols, contamination=0.02): # Reduced contamination for fewer false positives
130
+ """
131
+ Train Isolation Forest model for anomaly detection
132
+ """
133
+ print(" Training Isolation Forest model...")
134
+ print(f" Using {len(sensor_cols)} sensor columns for training")
135
+ print(f" Contamination rate: {contamination}")
136
+
137
+ if len(sensor_cols) == 0:
138
+ raise ValueError(" No sensor columns found for training")
139
+
140
+ # Initialize and train the model with better parameters
141
+ iso_forest = IsolationForest(
142
+ contamination=contamination,
143
+ random_state=42,
144
+ n_estimators=150, # More trees for better detection
145
+ max_samples='auto'
146
+ )
147
+
148
+ # Fit the model
149
+ iso_forest.fit(df[sensor_cols])
150
+
151
+ # Add predictions to dataframe
152
+ df['anomaly'] = iso_forest.predict(df[sensor_cols])
153
+ df['anomaly_score'] = iso_forest.decision_function(df[sensor_cols])
154
+
155
+ # Show statistics
156
+ if 'anomaly' in df.columns: # Check if column exists after prediction
157
+ anomaly_count = (df['anomaly'] == -1).sum()
158
+ normal_count = (df['anomaly'] == 1).sum()
159
+ print(f" Anomalies detected: {anomaly_count} ({anomaly_count/len(df)*100:.1f}%)")
160
+ print(f" Normal readings: {normal_count} ({normal_count/len(df)*100:.1f}%)")
161
+ else:
162
+ print(" Warning: 'anomaly' column not found in df after prediction.")
163
+
164
+ print(" Model training completed!")
165
+ return iso_forest, df
166
+
167
+ def initialize_app():
168
+ """
169
+ Initialize the application by loading data and model
170
+ """
171
+ global df, iso_forest, sensor_cols, explainer
172
+
173
+ print(" Initializing FIFO Mining Predictor...")
174
+
175
+ try:
176
+ # Try to load processed data first
177
+ df, sensor_cols = load_processed_data('processed_data.csv')
178
+
179
+ # If processed data doesn't exist, create it
180
+ if df is None:
181
+ print(" Processed data not found. Creating from raw data...")
182
+ df, sensor_cols = load_and_process_data()
183
+ df.to_csv('processed_data.csv', index=False)
184
+ print(" Processed data saved.")
185
+
186
+ # Safety check after loading/processing
187
+ if df is None or df.empty:
188
+ print(" Failed to load or process data.")
189
+ return False
190
+
191
+ # Try to load existing model
192
+ iso_forest = load_model('isolation_forest_model.pkl')
193
+
194
+ # If model doesn't exist, train it
195
+ if iso_forest is None:
196
+ print(" Model not found. Training new model...")
197
+ # Use the potentially lower contamination rate for retraining if needed
198
+ iso_forest_trained, df_updated = train_isolation_forest(df, sensor_cols, contamination=0.02)
199
+ joblib.dump(iso_forest_trained, 'isolation_forest_model.pkl')
200
+ df_updated.to_csv('processed_data.csv', index=False)
201
+ iso_forest = iso_forest_trained
202
+ df = df_updated
203
+ print(" Model trained and saved.")
204
+
205
+ # Ensure anomaly scores are present upon initialization
206
+ if 'anomaly_score' not in df.columns and iso_forest is not None and sensor_cols is not None:
207
+ print(" Re-calculating anomaly scores...")
208
+ df['anomaly'] = iso_forest.predict(df[sensor_cols])
209
+ df['anomaly_score'] = iso_forest.decision_function(df[sensor_cols])
210
+ df.to_csv('processed_data.csv', index=False)
211
+ print(" Anomaly scores updated in processed data.")
212
+
213
+
214
+ # Initialize Gen AI (optional)
215
+ try:
216
+ print(" Loading Gen AI model...")
217
+ explainer = pipeline("text2text-generation", model="google/flan-t5-small")
218
+ print(" Gen AI model loaded successfully.")
219
+ except Exception as e:
220
+ print(f" Gen AI model not available: {e}")
221
+ explainer = None # Ensure it's None if loading fails
222
+
223
+ print(" Application initialized successfully!")
224
+ return True
225
+
226
+ except Exception as e:
227
+ print(f" Error initializing application: {e}")
228
+ import traceback
229
+ traceback.print_exc()
230
+ return False
231
+
232
+ def generate_insight(engine_id, cycle, anomaly_score, top_sensors):
233
+ """Generate AI explanation for the anomaly"""
234
+ if explainer is not None:
235
+ try:
236
+ # Determine risk level for prompt
237
+ if anomaly_score < -0.7:
238
+ risk_desc = "high risk"
239
+ elif anomaly_score < 0:
240
+ risk_desc = "moderate risk"
241
+ else:
242
+ risk_desc = "normal operation"
243
+
244
+ prompt = f"""
245
+ Mining equipment shows {risk_desc}. ID: {engine_id}, cycle: {cycle}.
246
+ Score: {anomaly_score:.3f}. Sensors: {', '.join(top_sensors[:2])}.
247
+ Brief maintenance recommendation in 1-2 sentences.
248
+ """
249
+
250
+ result = explainer(
251
+ prompt,
252
+ max_length=80,
253
+ num_return_sequences=1,
254
+ do_sample=False,
255
+ truncation=True
256
+ )
257
+ return result[0]['generated_text'].strip()
258
+ except Exception as e:
259
+ print(f" Gen AI error: {e}")
260
+ # Fallback if AI fails during prediction
261
+ pass # Will use fallback logic below
262
+
263
+ # Fallback simple insights if AI fails or is not available
264
+ if anomaly_score < -0.7: # Stricter threshold for critical
265
+ return "Critical risk detected. Immediate inspection required. Check for mechanical wear or overheating."
266
+ elif anomaly_score < -0.5: # Medium threshold
267
+ return "Moderate risk detected. Schedule inspection within 48 hours. Monitor vibration and temperature."
268
+ elif anomaly_score < 0: # Low threshold
269
+ return "Low risk anomaly detected. Increase monitoring frequency. Review operational parameters."
270
+ else:
271
+ return "Equipment operating normally. Continue routine monitoring schedule."
272
+
273
+ def predict_failure(engine_id):
274
+ """Main prediction function with better risk assessment"""
275
+ global df, iso_forest, sensor_cols
276
+
277
+ # Basic sanity check for initialization state
278
+ if df is None or df.empty or sensor_cols is None or len(sensor_cols) == 0 or iso_forest is None:
279
+ return " Application not properly initialized. Data or model is missing.", None
280
+
281
+ # Validate input - Check against actual unique IDs in the data
282
+ unique_ids = df['id'].unique()
283
+ if engine_id not in unique_ids:
284
+ # Provide better feedback on available IDs
285
+ sample_ids = sorted(unique_ids)[:10] # Show first 10
286
+ sample_str = ", ".join(map(str, sample_ids))
287
+ if len(unique_ids) > 10:
288
+ sample_str += ", ..."
289
+ return f" Truck ID {engine_id} not found.\nAvailable IDs (first 10): {sample_str}", None
290
+
291
+ # Get latest data for this engine
292
+ engine_data = df[df['id'] == engine_id].tail(1)
293
+ if engine_data.empty:
294
+ return " No data found for this truck ID.", None
295
+
296
+ try:
297
+ cycle = int(engine_data['cycle'].iloc[0])
298
+ anomaly_score = float(engine_data['anomaly_score'].iloc[0])
299
+ except (IndexError, KeyError, ValueError, TypeError) as e:
300
+ return f" Error retrieving data for Truck ID {engine_id}: {e}", None
301
+
302
+ # Get top abnormal sensors
303
+ try:
304
+ sens_vals = engine_data[sensor_cols].iloc[0].abs().sort_values(ascending=False).head(5).index.tolist()
305
+ except Exception as e:
306
+ return f" Error analyzing sensor data for Truck ID {engine_id}: {e}", None
307
+
308
+ # Generate AI explanation
309
+ insight = generate_insight(engine_id, cycle, anomaly_score, sens_vals)
310
+
311
+ # Better risk level calculation using percentiles from the *full* dataset
312
+ try:
313
+ all_scores = df['anomaly_score'].dropna().values
314
+ if len(all_scores) == 0:
315
+ raise ValueError("No anomaly scores found in data.")
316
+
317
+ high_threshold = np.percentile(all_scores, 1) # Bottom 10% = high risk
318
+ medium_threshold = np.percentile(all_scores, 5) # Bottom 30% = medium risk
319
+ except Exception as e:
320
+ # Fallback thresholds if percentile calculation fails
321
+ print(f" Warning: Could not calculate percentiles, using fallback thresholds: {e}")
322
+ high_threshold = -0.3
323
+ medium_threshold = -0.1
324
+
325
+
326
+ # Risk level with better thresholds
327
+ if anomaly_score <= high_threshold:
328
+ risk_level = "🔴 HIGH RISK"
329
+ action = " **IMMEDIATE INSPECTION REQUIRED**"
330
+ elif anomaly_score <= medium_threshold:
331
+ risk_level = "🟡 MEDIUM RISK"
332
+ action = " **SCHEDULE INSPECTION SOON**"
333
+ else:
334
+ risk_level = "🟢 LOW RISK"
335
+ action = " Equipment operating normally"
336
+
337
+
338
+ result = f"""
339
+ ## 🔧 FIFO Mining Equipment Failure Predictor
340
+
341
+ ### Equipment Status
342
+ - **Truck ID:** `{int(engine_id)}`
343
+ - **Current Cycle:** `{cycle}`
344
+ - **Anomaly Score:** `{anomaly_score:.3f}`
345
+ - **Risk Assessment:** **{risk_level}**
346
+
347
+ ---
348
+
349
+ ### AI Maintenance Recommendation
350
+ > {insight}
351
+
352
+ ### Top Abnormal Sensors
353
+ 1. `{sens_vals[0]}`
354
+ 2. `{sens_vals[1]}`
355
+ 3. `{sens_vals[2]}`
356
+
357
+ ### Action Priority
358
+ {action}
359
+ """
360
+
361
+
362
+ # Create visualization
363
+ try:
364
+ fig, axes = plt.subplots(2, 2, figsize=(15, 10))
365
+ fig.suptitle(f'Mining Truck {int(engine_id)} - Health Analysis', fontsize=16, fontweight='bold')
366
+
367
+ engine_hist = df[df['id'] == engine_id]
368
+
369
+ # Plot 1: Key sensor trends (handle potential index errors)
370
+ if len(sensor_cols) > 0:
371
+ axes[0, 0].plot(engine_hist['cycle'], engine_hist[sensor_cols[0]], label=f'{sensor_cols[0]}', linewidth=2)
372
+ if len(sensor_cols) > 3:
373
+ axes[0, 0].plot(engine_hist['cycle'], engine_hist[sensor_cols[3]], label=f'{sensor_cols[3]}', linewidth=2)
374
+ if len(sensor_cols) > 6:
375
+ axes[0, 0].plot(engine_hist['cycle'], engine_hist[sensor_cols[6]], label=f'{sensor_cols[6]}', linewidth=2)
376
+ axes[0, 0].set_title('Sensor Trends')
377
+ axes[0, 0].set_xlabel('Cycle')
378
+ axes[0, 0].set_ylabel('Normalized Value')
379
+ axes[0, 0].legend()
380
+ axes[0, 0].grid(True, alpha=0.3)
381
+
382
+ # Plot 2: Anomaly score trend with thresholds
383
+ axes[0, 1].plot(engine_hist['cycle'], engine_hist['anomaly_score'], 'b-', linewidth=2, label='Current Score')
384
+ axes[0, 1].axhline(y=high_threshold, color='r', linestyle='--', alpha=0.7, label=f'High Risk ({high_threshold:.3f})')
385
+ axes[0, 1].axhline(y=medium_threshold, color='orange', linestyle='--', alpha=0.7, label=f'Medium Risk ({medium_threshold:.3f})')
386
+ axes[0, 1].axhline(y=0, color='g', linestyle='-', alpha=0.5, label='Normal')
387
+ axes[0, 1].set_title('Anomaly Score Over Time')
388
+ axes[0, 1].set_xlabel('Cycle')
389
+ axes[0, 1].set_ylabel('Anomaly Score')
390
+ axes[0, 1].legend()
391
+ axes[0, 1].grid(True, alpha=0.3)
392
+
393
+ # Plot 3: Current sensor values (top 6)
394
+ if len(sens_vals) >= 1: # Need at least one
395
+ num_bars = min(6, len(sens_vals))
396
+ current_values = engine_data[sens_vals[:num_bars]].iloc[0].values
397
+ bar_colors = ['red' if x <= high_threshold else 'orange' if x <= medium_threshold else 'green' for x in current_values]
398
+ axes[1, 0].bar(range(num_bars), current_values, color=bar_colors)
399
+ axes[1, 0].set_title('Current Top Abnormal Sensors')
400
+ axes[1, 0].set_xticks(range(num_bars))
401
+ axes[1, 0].set_xticklabels([s.replace('sensor', 'S') for s in sens_vals[:num_bars]], rotation=45)
402
+ axes[1, 0].set_ylabel('Normalized Value')
403
+ axes[1, 0].grid(True, alpha=0.3)
404
+
405
+ # Plot 4: Risk distribution
406
+ axes[1, 1].hist(all_scores, bins=50, alpha=0.7, color='lightblue', edgecolor='black', linewidth=0.5)
407
+ axes[1, 1].axvline(x=anomaly_score, color='red', linestyle='--', linewidth=2, label=f'Truck {engine_id}: {anomaly_score:.3f}')
408
+ axes[1, 1].axvline(x=high_threshold, color='r', linestyle=':', alpha=0.7, label=f'High Risk Threshold')
409
+ axes[1, 1].axvline(x=medium_threshold, color='orange', linestyle=':', alpha=0.7, label=f'Medium Risk Threshold')
410
+ axes[1, 1].set_title('Anomaly Score Distribution')
411
+ axes[1, 1].set_xlabel('Anomaly Score')
412
+ axes[1, 1].set_ylabel('Frequency')
413
+ axes[1, 1].legend()
414
+ axes[1, 1].grid(True, alpha=0.3)
415
+
416
+ plt.tight_layout()
417
+
418
+ except Exception as e:
419
+ print(f" Error creating plot: {e}")
420
+ # Return result without plot if plotting fails
421
+ return result, None
422
+
423
+ return result, fig
424
+
425
+ # --- Main Application Logic ---
426
+
427
+ # Initialize the app
428
+ app_initialized = False
429
+ try:
430
+ print("=== Starting Initialization Process ===")
431
+ app_initialized = initialize_app()
432
+ print("=== Initialization Process Complete ===")
433
+ except Exception as e:
434
+ print(f" Critical error during initialization: {e}")
435
+ import traceback
436
+ traceback.print_exc()
437
+
438
+ # --- Debug Information ---
439
+ # This block is now correctly placed AFTER app_initialized is defined
440
+ print("\n=== POST-INITIALIZATION DEBUG INFO ===")
441
+ print(f"app_initialized: {app_initialized}")
442
+ if df is not None and not df.empty:
443
+ print(f" Data loaded successfully. Shape: {df.shape}")
444
+ print(f" Columns: {list(df.columns)}")
445
+ if 'id' in df.columns:
446
+ unique_ids = sorted(df['id'].dropna().unique())
447
+ print(f" Unique Truck IDs found: {len(unique_ids)} (Min: {int(min(unique_ids)) if len(unique_ids) > 0 else 'N/A'}, Max: {int(max(unique_ids)) if len(unique_ids) > 0 else 'N/A'})")
448
+ print(f" First 10 IDs: {list(map(int, unique_ids[:10]))}")
449
+ else:
450
+ print(" 'id' column is missing!")
451
+ if 'anomaly_score' in df.columns:
452
+ try:
453
+ print(f" Anomaly scores range: [{df['anomaly_score'].min():.3f}, {df['anomaly_score'].max():.3f}]")
454
+ except:
455
+ print(" Error calculating anomaly score range.")
456
+ else:
457
+ print(" 'anomaly_score' column is missing - model might not have trained correctly.")
458
+ sensor_cols_debug = [col for col in df.columns if col.startswith('sensor')]
459
+ print(f" Sensor columns identified: {len(sensor_cols_debug)}")
460
+ else:
461
+ print(" Data (df) failed to load or is empty after initialization.")
462
+ print("=======================================\n")
463
+
464
+ # --- Gradio Interface Creation ---
465
+ # Create Gradio interface
466
+ if app_initialized and df is not None and not df.empty:
467
+ # --- Calculate safe min/max for the slider HERE ---
468
+ safe_min_id = 1
469
+ safe_max_id = 100
470
+
471
+ try:
472
+ if 'id' in df.columns and not df['id'].empty:
473
+ unique_ids = df['id'].dropna().unique()
474
+ if len(unique_ids) > 0:
475
+ calculated_min_id = int(min(unique_ids))
476
+ calculated_max_id = int(max(unique_ids))
477
+
478
+ # Apply sanity checks
479
+ if calculated_min_id > 0 and calculated_max_id >= calculated_min_id:
480
+ safe_min_id = calculated_min_id
481
+ safe_max_id = calculated_max_id
482
+ print(f" Setting interface ID range: {safe_min_id}-{safe_max_id}")
483
+ else:
484
+ print(f" Calculated ID range [{calculated_min_id}, {calculated_max_id}] seems invalid, using defaults 1-100")
485
+ else:
486
+ print(" No unique IDs found in data, using defaults 1-100")
487
+ else:
488
+ print(" 'id' column not found in data, using defaults 1-100")
489
+ except Exception as e:
490
+ print(f" Error calculating ID range: {e}, using defaults 1-100")
491
+ # --- End of max_truck_id calculation ---
492
+
493
+ print(f" Creating main Gradio interface with ID range {safe_min_id}-{safe_max_id}")
494
+
495
+ demo = gr.Interface(
496
+ fn=predict_failure,
497
+ inputs=gr.Number(
498
+ label="⛏️ Enter Mining Truck ID",
499
+ value=safe_min_id, # Start with the actual minimum ID found in data
500
+ minimum=safe_min_id,
501
+ maximum=safe_max_id, # Use the calculated maximum ID
502
+ step=1
503
+ ),
504
+ outputs=[
505
+ gr.Markdown(label=" Failure Prediction & AI Insights"),
506
+ gr.Plot(label=" Equipment Health Dashboard") # Handle potential None plots gracefully
507
+ ],
508
+ title=" FIFO Mining Equipment Failure Predictor",
509
+ description="""
510
+ AI-powered predictive maintenance using unsupervised learning + Generative AI.
511
+ Detects equipment anomalies before failures occur to prevent costly downtime.
512
+ """,
513
+ examples=[[safe_min_id], [min(safe_min_id + 4, safe_max_id)], [min(safe_min_id + 9, safe_max_id)]], # Dynamic examples based on actual data range
514
+ theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan") # Updated theme syntax for newer Gradio versions
515
+ )
516
+ else:
517
+ print(" Creating fallback Gradio interface")
518
+ # Fallback interface
519
+ def error_message(truck_id):
520
+ return " Application failed to initialize correctly. Please check the console logs and data files.", None # Return None for plot if needed
521
+
522
+ demo = gr.Interface(
523
+ fn=error_message,
524
+ inputs=gr.Number(label="⛏️ Enter Mining Truck ID", value=1),
525
+ outputs=[gr.Markdown(label="Error"), gr.Plot(label="Plot")], # Consistent output types for Gradio
526
+ title=" FIFO Mining Predictor - Initialization Error",
527
+ description="Failed to load data or model. Check file paths and data format.",
528
+ theme=gr.themes.Soft(primary_hue="red", secondary_hue="pink")
529
+ )
530
+
531
+ # For local development / Hugging Face Spaces
532
+ if __name__ == "__main__":
533
+ print(" Starting FIFO Mining Equipment Failure Predictor...")
534
+ # Use share=True for public URL in Colab/Hugging Face if needed
535
+ demo.launch()
data_processor.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.preprocessing import StandardScaler
5
+ import os
6
+
7
+ def load_and_process_data(data_path='CMaps/train_FD001.txt'):
8
+ """
9
+ Load and preprocess the NASA Turbofan dataset
10
+ """
11
+ print("Loading and processing data...")
12
+
13
+ # Define column names
14
+ columns = ['id', 'cycle', 'op1', 'op2', 'op3'] + [f'sensor{i}' for i in range(1, 22)]
15
+
16
+ if not os.path.exists(data_path):
17
+ raise FileNotFoundError(f"Data file {data_path} not found. Please download NASA Turbofan dataset.")
18
+
19
+ df = pd.read_csv(data_path, sep=' ', header=None, names=columns)
20
+ df.dropna(axis=1, inplace=True) # Remove extra NaN columns
21
+
22
+ # Normalize sensor readings per engine
23
+ sensor_cols = [f'sensor{i}' for i in range(1, 20)]
24
+ df[sensor_cols] = df.groupby('id')[sensor_cols].transform(
25
+ lambda x: (x - x.mean()) / (x.std() + 1e-6)
26
+ )
27
+
28
+ print(f"Processed data shape: {df.shape}")
29
+ return df, sensor_cols
30
+
31
+ def save_processed_data(df, filepath='processed_data.csv'):
32
+ """
33
+ Save processed data to CSV
34
+ """
35
+ df.to_csv(filepath, index=False)
36
+ print(f"Processed data saved to {filepath}")
37
+
38
+ def load_processed_data(filepath='processed_data.csv'):
39
+ """
40
+ Load processed data from CSV
41
+ """
42
+ if not os.path.exists(filepath):
43
+ return None, None
44
+
45
+ df = pd.read_csv(filepath)
46
+ sensor_cols = [f'sensor{i}' for i in range(1, 22)]
47
+ return df, sensor_cols
48
+
49
+ if __name__ == "__main__":
50
+ # Test the data processor
51
+ try:
52
+ df, sensor_cols = load_and_process_data()
53
+ save_processed_data(df)
54
+ print("Data processing completed successfully!")
55
+ except Exception as e:
56
+ print(f"Error in data processing: {e}")
model_trainer.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import joblib
3
+ from sklearn.ensemble import IsolationForest
4
+ import os
5
+
6
+ def train_isolation_forest(df, sensor_cols, contamination=0.1):
7
+ """
8
+ Train Isolation Forest model for anomaly detection
9
+ """
10
+ print("Training Isolation Forest model...")
11
+
12
+ # Initialize and train the model
13
+ iso_forest = IsolationForest(
14
+ contamination=contamination,
15
+ random_state=42,
16
+ n_estimators=100
17
+ )
18
+
19
+ iso_forest.fit(df[sensor_cols])
20
+
21
+ # Predict anomalies and scores
22
+ df['anomaly'] = iso_forest.predict(df[sensor_cols])
23
+ df['anomaly_score'] = iso_forest.decision_function(df[sensor_cols])
24
+
25
+ print("Model training completed!")
26
+ return iso_forest, df
27
+
28
+ def save_model(model, filepath='isolation_forest_model.pkl'):
29
+ """
30
+ Save trained model to disk
31
+ """
32
+ joblib.dump(model, filepath)
33
+ print(f"Model saved to {filepath}")
34
+
35
+ def load_model(filepath='isolation_forest_model.pkl'):
36
+ """
37
+ Load trained model from disk
38
+ """
39
+ if not os.path.exists(filepath):
40
+ return None
41
+
42
+ model = joblib.load(filepath)
43
+ print(f"Model loaded from {filepath}")
44
+ return model
45
+
46
+ def add_anomaly_scores(df, model, sensor_cols):
47
+ """
48
+ Add anomaly predictions to dataframe
49
+ """
50
+ df['anomaly'] = model.predict(df[sensor_cols])
51
+ df['anomaly_score'] = model.decision_function(df[sensor_cols])
52
+ return df
53
+
54
+ if __name__ == "__main__":
55
+ # Test the model trainer
56
+ try:
57
+ from data_processor import load_and_process_data
58
+ df, sensor_cols = load_and_process_data()
59
+ model, df_with_anomalies = train_isolation_forest(df, sensor_cols)
60
+ save_model(model)
61
+ print("Model training and saving completed successfully!")
62
+ except Exception as e:
63
+ print(f"Error in model training: {e}")
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ pandas
4
+ numpy
5
+ scikit-learn
6
+ matplotlib
7
+ transformers
8
+ torch
9
+ gradio
10
+ joblib
x.txt ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 18
2
+ 79
3
+ 106
4
+ 110
5
+ 15
6
+ 155
7
+ 6
8
+ 90
9
+ 11
10
+ 79
11
+ 6
12
+ 73
13
+ 30
14
+ 11
15
+ 37
16
+ 67
17
+ 68
18
+ 99
19
+ 22
20
+ 54
21
+ 97
22
+ 10
23
+ 142
24
+ 77
25
+ 88
26
+ 163
27
+ 126
28
+ 138
29
+ 83
30
+ 78
31
+ 75
32
+ 11
33
+ 53
34
+ 173
35
+ 63
36
+ 100
37
+ 151
38
+ 55
39
+ 48
40
+ 37
41
+ 44
42
+ 27
43
+ 18
44
+ 6
45
+ 15
46
+ 112
47
+ 131
48
+ 13
49
+ 122
50
+ 13
51
+ 98
52
+ 53
53
+ 52
54
+ 106
55
+ 103
56
+ 152
57
+ 123
58
+ 26
59
+ 178
60
+ 73
61
+ 169
62
+ 39
63
+ 39
64
+ 14
65
+ 11
66
+ 121
67
+ 86
68
+ 56
69
+ 115
70
+ 17
71
+ 148
72
+ 104
73
+ 78
74
+ 86
75
+ 98
76
+ 36
77
+ 94
78
+ 52
79
+ 91
80
+ 15
81
+ 141
82
+ 74
83
+ 146
84
+ 17
85
+ 47
86
+ 194
87
+ 21
88
+ 79
89
+ 97
90
+ 8
91
+ 9
92
+ 73
93
+ 183
94
+ 97
95
+ 73
96
+ 49
97
+ 31
98
+ 97
99
+ 9
100
+ 14
101
+ 106
102
+ 8
103
+ 8
104
+ 106
105
+ 116
106
+ 120
107
+ 61
108
+ 168
109
+ 35
110
+ 80
111
+ 9
112
+ 50
113
+ 151
114
+ 78
115
+ 91
116
+ 7
117
+ 181
118
+ 150
119
+ 106
120
+ 15
121
+ 67
122
+ 145
123
+ 180
124
+ 7
125
+ 179
126
+ 124
127
+ 82
128
+ 108
129
+ 79
130
+ 121
131
+ 120
132
+ 39
133
+ 38
134
+ 9
135
+ 167
136
+ 87
137
+ 88
138
+ 7
139
+ 51
140
+ 55
141
+ 155
142
+ 47
143
+ 81
144
+ 43
145
+ 98
146
+ 10
147
+ 92
148
+ 11
149
+ 165
150
+ 34
151
+ 115
152
+ 59
153
+ 99
154
+ 103
155
+ 108
156
+ 83
157
+ 171
158
+ 15
159
+ 9
160
+ 42
161
+ 13
162
+ 41
163
+ 88
164
+ 14
165
+ 155
166
+ 188
167
+ 96
168
+ 82
169
+ 135
170
+ 182
171
+ 36
172
+ 107
173
+ 14
174
+ 95
175
+ 142
176
+ 23
177
+ 6
178
+ 144
179
+ 35
180
+ 97
181
+ 68
182
+ 14
183
+ 67
184
+ 191
185
+ 19
186
+ 10
187
+ 158
188
+ 183
189
+ 43
190
+ 12
191
+ 148
192
+ 13
193
+ 37
194
+ 122
195
+ 80
196
+ 93
197
+ 132
198
+ 32
199
+ 103
200
+ 174
201
+ 111
202
+ 68
203
+ 192
204
+ 121
205
+ 134
206
+ 48
207
+ 85
208
+ 8
209
+ 23
210
+ 8
211
+ 6
212
+ 57
213
+ 83
214
+ 172
215
+ 101
216
+ 81
217
+ 86
218
+ 165