RathodHarish commited on
Commit
6684126
·
verified ·
1 Parent(s): 8f67e6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -55
app.py CHANGED
@@ -13,9 +13,8 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
13
  # Preload Hugging Face summarization model at startup
14
  logging.info("Preloading Hugging Face model...")
15
  try:
16
- # Check for GPU availability using torch.cuda
17
  device = 0 if torch.cuda.is_available() else -1
18
- summarizer = pipeline("text2text-generation", model="google/flan-t5-base", device=device) # Use GPU if available
19
  logging.info(f"Hugging Face model preloaded successfully on device: {'GPU' if device == 0 else 'CPU'}")
20
  except Exception as e:
21
  logging.error(f"Failed to preload model: {str(e)}")
@@ -26,15 +25,10 @@ def summarize_logs(df, progress=gr.Progress()):
26
  progress(0.1, "Generating summary report...")
27
  try:
28
  total_devices = df["device_id"].nunique()
29
- avg_uptime = "97%" # Placeholder
30
  most_used = df.groupby("device_id")["usage_hours"].sum().idxmax() if not df.empty else "N/A"
31
 
32
- prompt = (
33
- f"Summarize maintenance and usage logs. "
34
- f"There were {total_devices} devices. "
35
- f"The most used device was {most_used}."
36
- )
37
- summary = summarizer(prompt, max_length=200, do_sample=False)[0]["generated_text"]
38
  logging.info("Summary generated successfully")
39
  return summary
40
  except Exception as e:
@@ -49,13 +43,12 @@ def detect_anomalies(df, progress=gr.Progress()):
49
  logging.warning("Required columns for anomaly detection not found")
50
  return "Anomaly detection requires 'usage_hours' and 'downtime' columns."
51
 
52
- # Sample data if too large
53
- if len(df) > 10000: # Adjust threshold as needed
54
- df = df.sample(n=10000, random_state=42)
55
- logging.info("Sampled data for anomaly detection to 10,000 rows")
56
 
57
  features = df[["usage_hours", "downtime"]].fillna(0)
58
- iso_forest = IsolationForest(contamination=0.1, random_state=42, n_jobs=-1) # Use all CPU cores
59
  df["anomaly"] = iso_forest.fit_predict(features)
60
 
61
  anomalies = df[df["anomaly"] == -1][["device_id", "usage_hours", "downtime", "timestamp"]]
@@ -63,12 +56,8 @@ def detect_anomalies(df, progress=gr.Progress()):
63
  return "No anomalies detected."
64
 
65
  anomaly_lines = ["**Detected Anomalies:**"]
66
- for idx, row in anomalies.iterrows():
67
- anomaly_lines.append(f"- Device ID: {row['device_id']}")
68
- anomaly_lines.append(f" Usage Hours: {row['usage_hours']}")
69
- anomaly_lines.append(f" Downtime: {row['downtime']}")
70
- anomaly_lines.append(f" Timestamp: {row['timestamp']}")
71
- anomaly_lines.append("---")
72
  anomaly_list = "\n".join(anomaly_lines)
73
  logging.info("Anomalies detected successfully")
74
  return anomaly_list
@@ -94,10 +83,8 @@ def check_amc_reminders(df, current_date, progress=gr.Progress()):
94
  return "No AMC reminders due within the next 30 days."
95
 
96
  reminder_lines = ["**Upcoming AMC Reminders:**"]
97
- for idx, row in reminders.iterrows():
98
- reminder_lines.append(f"- Device ID: {row['device_id']}")
99
- reminder_lines.append(f" AMC Date: {row['amc_date']}")
100
- reminder_lines.append("---")
101
  reminder_list = "\n".join(reminder_lines)
102
  logging.info("AMC reminders generated successfully")
103
  return reminder_list
@@ -111,11 +98,8 @@ def generate_dashboard_insights(df, progress=gr.Progress()):
111
  try:
112
  total_devices = df["device_id"].nunique()
113
  avg_usage = df["usage_hours"].mean() if "usage_hours" in df.columns else 0
114
- prompt = (
115
- f"Generate executive-level insights. "
116
- f"There were {total_devices} devices with an average usage of {avg_usage:.2f} hours."
117
- )
118
- insights = summarizer(prompt, max_length=150, do_sample=False)[0]["generated_text"]
119
  logging.info("Dashboard insights generated successfully")
120
  return insights
121
  except Exception as e:
@@ -126,11 +110,10 @@ def generate_dashboard_insights(df, progress=gr.Progress()):
126
  def create_usage_chart(df, progress=gr.Progress()):
127
  progress(0.9, "Creating usage chart...")
128
  try:
129
- # Limit the data for chart to top 10 devices to reduce load
130
  usage_data = df.groupby("device_id")["usage_hours"].sum().reset_index()
131
- if len(usage_data) > 10:
132
- usage_data = usage_data.nlargest(10, "usage_hours")
133
- logging.info("Limited chart data to top 10 devices")
134
 
135
  custom_colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']
136
  fig = px.bar(
@@ -155,7 +138,7 @@ def create_usage_chart(df, progress=gr.Progress()):
155
  return None
156
 
157
  # Main Gradio function
158
- def process_logs(file_obj, progress=gr.Progress()):
159
  try:
160
  progress(0, "Starting file processing...")
161
  if file_obj is None:
@@ -169,11 +152,17 @@ def process_logs(file_obj, progress=gr.Progress()):
169
  logging.error("Unsupported file format")
170
  return "Unsupported file format. Please upload a CSV file.", None, None, None, None, None
171
 
172
- # Use pandas to load CSV
173
  progress(0.05, "Loading CSV file...")
174
  try:
175
- df = pd.read_csv(file_name)
176
- logging.info(f"File loaded successfully with {len(df)} rows")
 
 
 
 
 
 
 
177
  except Exception as e:
178
  logging.error(f"Failed to load CSV: {str(e)}")
179
  return f"Failed to load CSV: {str(e)}", None, None, None, None, None
@@ -191,35 +180,29 @@ def process_logs(file_obj, progress=gr.Progress()):
191
 
192
  # Step 1: Summary Report
193
  progress(0.2, "Generating summary...")
194
- summary = f"**Step 1: Summary Report**\n{summarize_logs(df, progress)}\n---\n"
195
 
196
  # Step 2: Log Preview
197
  progress(0.3, "Previewing logs...")
198
  if not df.empty:
199
  preview_lines = ["**Step 2: Log Preview (First 5 Rows)**"]
200
  for idx, row in df.head().iterrows():
201
- preview_lines.append(f"**Row {idx + 1}:**")
202
- preview_lines.append(f"- Device ID: {row['device_id']}")
203
- preview_lines.append(f"- Timestamp: {row['timestamp']}")
204
- preview_lines.append(f"- Usage Hours: {row['usage_hours']}")
205
- preview_lines.append(f"- Downtime: {row['downtime']}")
206
- preview_lines.append(f"- AMC Date: {row['amc_date']}")
207
- preview_lines.append("---")
208
- preview = "\n".join(preview_lines) + "\n---\n"
209
  else:
210
- preview = "**Step 2: Log Preview**\nNo data available.\n---\n"
211
 
212
  # Step 3: Usage Chart
213
  chart = create_usage_chart(df, progress)
214
 
215
  # Step 4: Anomaly Detection
216
- anomalies = f"**Step 3: Anomaly Detection**\n{detect_anomalies(df, progress)}\n---\n"
217
 
218
  # Step 5: AMC Reminders
219
- amc_reminders = f"**Step 4: AMC Reminders**\n{check_amc_reminders(df, datetime.now(), progress)}\n---\n"
220
 
221
  # Step 6: Dashboard Insights
222
- insights = f"**Step 5: Dashboard Insights (AI)**\n{generate_dashboard_insights(df, progress)}\n---\n"
223
 
224
  progress(1.0, "Processing complete!")
225
  return summary, preview, chart, anomalies, amc_reminders, insights
@@ -231,11 +214,13 @@ def process_logs(file_obj, progress=gr.Progress()):
231
  try:
232
  logging.info("Initializing Gradio Blocks interface...")
233
  with gr.Blocks(css="""
234
- .dashboard-container {border: 1px solid #e0e0e0; padding: 10px; border-radius: 5px; background-color: #f9f9f9;}
235
- .dashboard-title {font-size: 24px; font-weight: bold; margin-bottom: 10px;}
236
- .dashboard-section {margin-bottom: 15px;}
237
- .dashboard-section h3 {font-size: 18px; margin-bottom: 5px;}
238
- .dashboard-section p {margin: 2px 0;}
 
 
239
  """) as iface:
240
  gr.Markdown("<h1>LabOps Log Analyzer Dashboard (Hugging Face AI)</h1>")
241
  gr.Markdown("Upload a CSV file containing lab equipment logs to analyze usage.")
 
13
  # Preload Hugging Face summarization model at startup
14
  logging.info("Preloading Hugging Face model...")
15
  try:
 
16
  device = 0 if torch.cuda.is_available() else -1
17
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device) # Lighter model
18
  logging.info(f"Hugging Face model preloaded successfully on device: {'GPU' if device == 0 else 'CPU'}")
19
  except Exception as e:
20
  logging.error(f"Failed to preload model: {str(e)}")
 
25
  progress(0.1, "Generating summary report...")
26
  try:
27
  total_devices = df["device_id"].nunique()
 
28
  most_used = df.groupby("device_id")["usage_hours"].sum().idxmax() if not df.empty else "N/A"
29
 
30
+ prompt = f"Maintenance logs: {total_devices} devices. Most used: {most_used}."
31
+ summary = summarizer(prompt, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
 
 
 
 
32
  logging.info("Summary generated successfully")
33
  return summary
34
  except Exception as e:
 
43
  logging.warning("Required columns for anomaly detection not found")
44
  return "Anomaly detection requires 'usage_hours' and 'downtime' columns."
45
 
46
+ if len(df) > 5000:
47
+ df = df.sample(n=5000, random_state=42)
48
+ logging.info("Sampled data for anomaly detection to 5,000 rows")
 
49
 
50
  features = df[["usage_hours", "downtime"]].fillna(0)
51
+ iso_forest = IsolationForest(contamination=0.1, random_state=42, n_jobs=-1)
52
  df["anomaly"] = iso_forest.fit_predict(features)
53
 
54
  anomalies = df[df["anomaly"] == -1][["device_id", "usage_hours", "downtime", "timestamp"]]
 
56
  return "No anomalies detected."
57
 
58
  anomaly_lines = ["**Detected Anomalies:**"]
59
+ for idx, row in anomalies.head(5).iterrows():
60
+ anomaly_lines.append(f"- Device ID: {row['device_id']}, Usage Hours: {row['usage_hours']}, Downtime: {row['downtime']}, Timestamp: {row['timestamp']}")
 
 
 
 
61
  anomaly_list = "\n".join(anomaly_lines)
62
  logging.info("Anomalies detected successfully")
63
  return anomaly_list
 
83
  return "No AMC reminders due within the next 30 days."
84
 
85
  reminder_lines = ["**Upcoming AMC Reminders:**"]
86
+ for idx, row in reminders.head(5).iterrows():
87
+ reminder_lines.append(f"- Device ID: {row['device_id']}, AMC Date: {row['amc_date']}")
 
 
88
  reminder_list = "\n".join(reminder_lines)
89
  logging.info("AMC reminders generated successfully")
90
  return reminder_list
 
98
  try:
99
  total_devices = df["device_id"].nunique()
100
  avg_usage = df["usage_hours"].mean() if "usage_hours" in df.columns else 0
101
+ prompt = f"Insights: {total_devices} devices, avg usage {avg_usage:.2f} hours."
102
+ insights = summarizer(prompt, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
 
 
 
103
  logging.info("Dashboard insights generated successfully")
104
  return insights
105
  except Exception as e:
 
110
  def create_usage_chart(df, progress=gr.Progress()):
111
  progress(0.9, "Creating usage chart...")
112
  try:
 
113
  usage_data = df.groupby("device_id")["usage_hours"].sum().reset_index()
114
+ if len(usage_data) > 5:
115
+ usage_data = usage_data.nlargest(5, "usage_hours")
116
+ logging.info("Limited chart data to top 5 devices")
117
 
118
  custom_colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']
119
  fig = px.bar(
 
138
  return None
139
 
140
  # Main Gradio function
141
+ async def process_logs(file_obj, row_limit=10000, progress=gr.Progress()):
142
  try:
143
  progress(0, "Starting file processing...")
144
  if file_obj is None:
 
152
  logging.error("Unsupported file format")
153
  return "Unsupported file format. Please upload a CSV file.", None, None, None, None, None
154
 
 
155
  progress(0.05, "Loading CSV file...")
156
  try:
157
+ usecols = ["device_id", "timestamp", "usage_hours", "downtime", "amc_date"]
158
+ dtypes = {
159
+ "device_id": "string",
160
+ "usage_hours": "float32",
161
+ "downtime": "float32",
162
+ "amc_date": "string"
163
+ }
164
+ df = pd.read_csv(file_name, usecols=usecols, dtype=dtypes, nrows=row_limit)
165
+ logging.info(f"File loaded successfully with {len(df)} rows (limited to {row_limit} rows)")
166
  except Exception as e:
167
  logging.error(f"Failed to load CSV: {str(e)}")
168
  return f"Failed to load CSV: {str(e)}", None, None, None, None, None
 
180
 
181
  # Step 1: Summary Report
182
  progress(0.2, "Generating summary...")
183
+ summary = f"**Step 1: Summary Report** \n{summarize_logs(df, progress)}"
184
 
185
  # Step 2: Log Preview
186
  progress(0.3, "Previewing logs...")
187
  if not df.empty:
188
  preview_lines = ["**Step 2: Log Preview (First 5 Rows)**"]
189
  for idx, row in df.head().iterrows():
190
+ preview_lines.append(f"**Row {idx + 1}:** Device ID: {row['device_id']}, Timestamp: {row['timestamp']}, Usage Hours: {row['usage_hours']}, Downtime: {row['downtime']}, AMC Date: {row['amc_date']}")
191
+ preview = "\n".join(preview_lines)
 
 
 
 
 
 
192
  else:
193
+ preview = "**Step 2: Log Preview** \nNo data available."
194
 
195
  # Step 3: Usage Chart
196
  chart = create_usage_chart(df, progress)
197
 
198
  # Step 4: Anomaly Detection
199
+ anomalies = f"**Step 3: Anomaly Detection** \n{detect_anomalies(df, progress)}"
200
 
201
  # Step 5: AMC Reminders
202
+ amc_reminders = f"**Step 4: AMC Reminders** \n{check_amc_reminders(df, datetime.now(), progress)}"
203
 
204
  # Step 6: Dashboard Insights
205
+ insights = f"**Step 5: Dashboard Insights (AI)** \n{generate_dashboard_insights(df, progress)}"
206
 
207
  progress(1.0, "Processing complete!")
208
  return summary, preview, chart, anomalies, amc_reminders, insights
 
214
  try:
215
  logging.info("Initializing Gradio Blocks interface...")
216
  with gr.Blocks(css="""
217
+ .dashboard-container {border: 1px solid #e0e0e0; padding: 10/* Reduced padding */ 10px; border-radius: 5px; background-color: #f9f9f9;}
218
+ .dashboard-title {font-size: 24px; font-weight: bold; margin-bottom: 5px; /* Reduced margin */}
219
+ .dashboard-section {margin-bottom: 5px; /* Reduced margin */}
220
+ .dashboard-section h3 {font-size: 18px; margin-bottom: 2px; /* Reduced margin */}
221
+ .dashboard-section p {margin: 1px 0; line-height: 1.2; /* Tighter line spacing */}
222
+ .dashboard-section li {margin: 1px 0; line-height: 1.2; /* Tighter spacing for list items */}
223
+ .dashboard-section ul {margin: 2px 0; padding-left: 20px; /* Reduced margin/padding for lists */}
224
  """) as iface:
225
  gr.Markdown("<h1>LabOps Log Analyzer Dashboard (Hugging Face AI)</h1>")
226
  gr.Markdown("Upload a CSV file containing lab equipment logs to analyze usage.")