RathodHarish commited on
Commit
74e3fee
·
verified ·
1 Parent(s): 4340807

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -114
app.py CHANGED
@@ -45,12 +45,11 @@ except ImportError:
45
  logging.info("Preloading Hugging Face model...")
46
  try:
47
  device = 0 if torch.cuda.is_available() else -1
48
- # Use a smaller model for faster inference
49
  summarizer = pipeline(
50
  "summarization",
51
  model="t5-small",
52
  device=device,
53
- max_length=50,
54
  min_length=10,
55
  num_beams=2
56
  )
@@ -98,64 +97,6 @@ picklist_mapping = {
98
  }
99
  }
100
 
101
- # Cache folder ID
102
- def get_folder_id(folder_name):
103
- if sf is None:
104
- return None
105
- try:
106
- query = f"SELECT Id FROM Folder WHERE Name = '{folder_name}' AND Type = 'Report'"
107
- result = sf.query(query)
108
- if result['totalSize'] > 0:
109
- folder_id = result['records'][0]['Id']
110
- logging.info(f"Found folder ID for '{folder_name}': {folder_id}")
111
- return folder_id
112
- else:
113
- logging.error(f"Folder '{folder_name}' not found in Salesforce.")
114
- return None
115
- except Exception as e:
116
- logging.error(f"Failed to fetch folder ID for '{folder_name}': {str(e)}")
117
- return None
118
-
119
- LABOPS_REPORTS_FOLDER_ID = get_folder_id('LabOps Reports')
120
-
121
- # Salesforce report creation
122
- def create_salesforce_reports(df):
123
- if sf is None or not LABOPS_REPORTS_FOLDER_ID:
124
- return
125
- try:
126
- timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
127
- reports = [
128
- {
129
- "reportMetadata": {
130
- "name": f"SmartLog_Usage_Report_{timestamp}",
131
- "developerName": f"SmartLog_Usage_Report_{timestamp}",
132
- "reportType": {"type": "CustomEntity", "value": "SmartLog__c"},
133
- "reportFormat": "TABULAR",
134
- "reportBooleanFilter": None,
135
- "reportFilters": [],
136
- "detailColumns": ["SmartLog__c.Device_Id__c", "SmartLog__c.Usage_Hours__c"],
137
- "folderId": LABOPS_REPORTS_FOLDER_ID
138
- }
139
- },
140
- {
141
- "reportMetadata": {
142
- "name": f"SmartLog_AMC_Reminders_{timestamp}",
143
- "developerName": f"SmartLog_AMC_Reminders_{timestamp}",
144
- "reportType": {"type": "CustomEntity", "value": "SmartLog__c"},
145
- "reportFormat": "TABULAR",
146
- "reportBooleanFilter": None,
147
- "reportFilters": [],
148
- "detailColumns": ["SmartLog__c.Device_Id__c", "SmartLog__c.AMC_Date__c"],
149
- "folderId": LABOPS_REPORTS_FOLDER_ID
150
- }
151
- }
152
- ]
153
- for report in reports:
154
- sf.restful('analytics/reports', method='POST', json=report)
155
- logging.info("Salesforce reports created")
156
- except Exception as e:
157
- logging.error(f"Failed to create Salesforce reports: {str(e)}")
158
-
159
  # Save to Salesforce
160
  def save_to_salesforce(df, reminders_df):
161
  if sf is None:
@@ -181,6 +122,7 @@ def save_to_salesforce(df, reminders_df):
181
  amc_date_dt = datetime.strptime(amc_date, '%Y-%m-%d')
182
  if status == "Active" and current_date.date() <= amc_date_dt.date() <= next_30_days.date():
183
  logging.info(f"AMC Reminder for Device ID {row['device_id']}")
 
184
  except:
185
  amc_date_str = None
186
 
@@ -208,7 +150,7 @@ def summarize_logs(df):
208
  total_devices = df["device_id"].nunique()
209
  most_used = df.groupby("device_id")["usage_hours"].sum().idxmax() if not df.empty else "N/A"
210
  prompt = f"Maintenance logs: {total_devices} devices. Most used: {most_used}."
211
- summary = summarizer(prompt, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
212
  return summary
213
  except Exception as e:
214
  logging.error(f"Summary generation failed: {str(e)}")
@@ -220,8 +162,8 @@ def detect_anomalies(df):
220
  if "usage_hours" not in df.columns or "downtime" not in df.columns:
221
  return "Anomaly detection requires 'usage_hours' and 'downtime' columns.", pd.DataFrame()
222
  features = df[["usage_hours", "downtime"]].fillna(0)
223
- if len(features) > 500:
224
- features = features.sample(n=500, random_state=42)
225
  iso_forest = IsolationForest(contamination=0.1, random_state=42)
226
  df["anomaly"] = iso_forest.fit_predict(features)
227
  anomalies = df[df["anomaly"] == -1][["device_id", "usage_hours", "downtime", "timestamp"]]
@@ -254,7 +196,7 @@ def generate_dashboard_insights(df):
254
  total_devices = df["device_id"].nunique()
255
  avg_usage = df["usage_hours"].mean() if "usage_hours" in df.columns else 0
256
  prompt = f"Insights: {total_devices} devices, avg usage {avg_usage:.2f} hours."
257
- insights = summarizer(prompt, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
258
  return insights
259
  except Exception as e:
260
  logging.error(f"Dashboard insights generation failed: {str(e)}")
@@ -395,32 +337,12 @@ def generate_device_cards(df):
395
  logging.error(f"Failed to generate device cards: {str(e)}")
396
  return f'<p>Error generating device cards: {str(e)}</p>'
397
 
398
- # Generate monthly status
399
- def generate_monthly_status(df, selected_month):
400
- try:
401
- total_devices = df['device_id'].nunique()
402
- total_usage_hours = df['usage_hours'].sum()
403
- total_downtime = df['downtime'].sum()
404
- avg_usage = total_usage_hours / total_devices if total_devices > 0 else 0
405
- avg_downtime = total_downtime / total_devices if total_devices > 0 else 0
406
- return f"""
407
- Monthly Status for {selected_month}:
408
- - Total Devices: {total_devices}
409
- - Total Usage Hours: {total_usage_hours:.2f}
410
- - Total Downtime Hours: {total_downtime:.2f}
411
- - Average Usage per Device: {avg_usage:.2f} hours
412
- - Average Downtime per Device: {avg_downtime:.2f} hours
413
- """
414
- except Exception as e:
415
- logging.error(f"Failed to generate monthly status: {str(e)}")
416
- return f"Failed to generate monthly status: {str(e)}"
417
-
418
  # Generate PDF content
419
- def generate_pdf_content(summary, preview_df, anomalies, amc_reminders, insights, device_cards_html, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart, df, selected_month):
420
  if not reportlab_available:
421
  return None
422
  try:
423
- pdf_path = f"monthly_status_report_{selected_month.replace(' ', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
424
  doc = SimpleDocTemplate(pdf_path, pagesize=letter)
425
  styles = getSampleStyleSheet()
426
  story = []
@@ -428,16 +350,10 @@ def generate_pdf_content(summary, preview_df, anomalies, amc_reminders, insights
428
  def safe_paragraph(text, style):
429
  return Paragraph(str(text).replace('\n', '<br/>'), style) if text else Paragraph("", style)
430
 
431
- story.append(Paragraph("LabOps Monthly Status Report", styles['Title']))
432
  story.append(Paragraph(f"Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
433
  story.append(Spacer(1, 12))
434
 
435
- if selected_month != "All":
436
- monthly_status = generate_monthly_status(df, selected_month)
437
- story.append(Paragraph("Monthly Status Summary", styles['Heading2']))
438
- story.append(safe_paragraph(monthly_status, styles['Normal']))
439
- story.append(Spacer(1, 12))
440
-
441
  story.append(Paragraph("Summary Report", styles['Heading2']))
442
  story.append(safe_paragraph(summary, styles['Normal']))
443
  story.append(Spacer(1, 12))
@@ -492,7 +408,7 @@ def generate_pdf_content(summary, preview_df, anomalies, amc_reminders, insights
492
  return None
493
 
494
  # Main processing function
495
- async def process_logs(file_obj, lab_site_filter, equipment_type_filter, date_range, month_filter, last_modified_state):
496
  start_time = time.time()
497
  try:
498
  if not file_obj:
@@ -540,12 +456,6 @@ async def process_logs(file_obj, lab_site_filter, equipment_type_filter, date_ra
540
  start_date = today + pd.Timedelta(days=days_start)
541
  end_date = today + pd.Timedelta(days=days_end) + pd.Timedelta(days=1) - pd.Timedelta(seconds=1)
542
  filtered_df = filtered_df[(filtered_df['timestamp'] >= start_date) & (filtered_df['timestamp'] <= end_date)]
543
- if month_filter and month_filter != "All":
544
- selected_date = pd.to_datetime(month_filter, format="%B %Y")
545
- filtered_df = filtered_df[
546
- (filtered_df['timestamp'].dt.year == selected_date.year) &
547
- (filtered_df['timestamp'].dt.month == selected_date.month)
548
- ]
549
 
550
  if filtered_df.empty:
551
  return "No data after applying filters.", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state
@@ -564,9 +474,8 @@ async def process_logs(file_obj, lab_site_filter, equipment_type_filter, date_ra
564
  future_downtime_chart = executor.submit(create_downtime_chart, filtered_df)
565
  future_daily_log_chart = executor.submit(create_daily_log_trends_chart, filtered_df)
566
  future_weekly_uptime_chart = executor.submit(create_weekly_uptime_chart, filtered_df)
567
- future_anomaly_alerts_chart = executor.submit(create_anomaly_alerts_chart, pd.DataFrame())
568
  future_device_cards = executor.submit(generate_device_cards, filtered_df)
569
- future_reports = executor.submit(create_salesforce_reports, filtered_df)
570
 
571
  summary = f"Step 1: Summary Report\n{future_summary.result()}"
572
  anomalies, anomalies_df = future_anomalies.result()
@@ -582,7 +491,7 @@ async def process_logs(file_obj, lab_site_filter, equipment_type_filter, date_ra
582
  device_cards = future_device_cards.result()
583
 
584
  save_to_salesforce(filtered_df, reminders_df)
585
- pdf_file = generate_pdf_content(summary, preview_df, anomalies, amc_reminders, insights, device_cards, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart, filtered_df, month_filter)
586
 
587
  elapsed_time = time.time() - start_time
588
  logging.info(f"Processing completed in {elapsed_time:.2f} seconds")
@@ -595,9 +504,9 @@ async def process_logs(file_obj, lab_site_filter, equipment_type_filter, date_ra
595
  return f"Error: {str(e)}", pd.DataFrame(), None, '<p>Error processing data.</p>', None, None, None, None, None, None, None, None, last_modified_state
596
 
597
  # Update filters
598
- def update_filters(file_obj):
599
- if not file_obj:
600
- return gr.update(choices=['All'], value='All'), gr.update(choices=['All'], value='All'), gr.update(choices=['All'], value='All')
601
  try:
602
  with open(file_obj.name, 'rb') as f:
603
  csv_content = f.read().decode('utf-8')
@@ -606,12 +515,11 @@ def update_filters(file_obj):
606
 
607
  lab_site_options = ['All'] + [site for site in df['lab_site'].dropna().astype(str).unique().tolist() if site.strip()] if 'lab_site' in df.columns else ['All']
608
  equipment_type_options = ['All'] + [equip for equip in df['equipment_type'].dropna().astype(str).unique().tolist() if equip.strip()] if 'equipment_type' in df.columns else ['All']
609
- month_options = ['All'] + sorted(df['timestamp'].dt.strftime('%B %Y').dropna().unique().tolist()) if 'timestamp' in df.columns else ['All']
610
 
611
- return gr.update(choices=lab_site_options, value='All'), gr.update(choices=equipment_type_options, value='All'), gr.update(choices=month_options, value='All')
612
  except Exception as e:
613
  logging.error(f"Failed to update filters: {str(e)}")
614
- return gr.update(choices=['All'], value='All'), gr.update(choices=['All'], value='All'), gr.update(choices=['All'], value='All')
615
 
616
  # Gradio Interface
617
  try:
@@ -632,6 +540,7 @@ try:
632
  gr.Markdown("Upload a CSV file to analyze. Click 'Analyze' to refresh the dashboard with the latest data.")
633
 
634
  last_modified_state = gr.State(value=None)
 
635
 
636
  with gr.Row():
637
  with gr.Column(scale=1):
@@ -641,7 +550,6 @@ try:
641
  lab_site_filter = gr.Dropdown(label="Lab Site", choices=['All'], value='All', interactive=True)
642
  equipment_type_filter = gr.Dropdown(label="Equipment Type", choices=['All'], value='All', interactive=True)
643
  date_range_filter = gr.Slider(label="Date Range (Days from Today)", minimum=-365, maximum=0, step=1, value=[-30, 0])
644
- month_filter = gr.Dropdown(label="Select Month for Report", choices=['All'], value='All', interactive=True)
645
  submit_button = gr.Button("Analyze", variant="primary")
646
 
647
  with gr.Column(scale=2):
@@ -679,18 +587,18 @@ try:
679
  insights_output = gr.Markdown()
680
  with gr.Group(elem_classes="dashboard-section"):
681
  gr.Markdown("### Export Report")
682
- pdf_output = gr.File(label="Download Monthly Status Report as PDF")
683
 
684
  file_input.change(
685
  fn=update_filters,
686
- inputs=[file_input],
687
- outputs=[lab_site_filter, equipment_type_filter, month_filter],
688
  queue=False
689
  )
690
 
691
  submit_button.click(
692
  fn=process_logs,
693
- inputs=[file_input, lab_site_filter, equipment_type_filter, date_range_filter, month_filter, last_modified_state],
694
  outputs=[summary_output, preview_output, usage_chart_output, device_cards_output, daily_log_trends_output, weekly_uptime_output, anomaly_alerts_output, downtime_chart_output, anomaly_output, amc_output, insights_output, pdf_output, last_modified_state]
695
  )
696
 
 
45
  logging.info("Preloading Hugging Face model...")
46
  try:
47
  device = 0 if torch.cuda.is_available() else -1
 
48
  summarizer = pipeline(
49
  "summarization",
50
  model="t5-small",
51
  device=device,
52
+ max_length=30, # Reduced for faster processing
53
  min_length=10,
54
  num_beams=2
55
  )
 
97
  }
98
  }
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  # Save to Salesforce
101
  def save_to_salesforce(df, reminders_df):
102
  if sf is None:
 
122
  amc_date_dt = datetime.strptime(amc_date, '%Y-%m-%d')
123
  if status == "Active" and current_date.date() <= amc_date_dt.date() <= next_30_days.date():
124
  logging.info(f"AMC Reminder for Device ID {row['device_id']}")
125
+ amc_date_str = amc_date
126
  except:
127
  amc_date_str = None
128
 
 
150
  total_devices = df["device_id"].nunique()
151
  most_used = df.groupby("device_id")["usage_hours"].sum().idxmax() if not df.empty else "N/A"
152
  prompt = f"Maintenance logs: {total_devices} devices. Most used: {most_used}."
153
+ summary = summarizer(prompt, max_length=30, min_length=10, do_sample=False)[0]["summary_text"]
154
  return summary
155
  except Exception as e:
156
  logging.error(f"Summary generation failed: {str(e)}")
 
162
  if "usage_hours" not in df.columns or "downtime" not in df.columns:
163
  return "Anomaly detection requires 'usage_hours' and 'downtime' columns.", pd.DataFrame()
164
  features = df[["usage_hours", "downtime"]].fillna(0)
165
+ if len(features) > 200: # Reduced sample size
166
+ features = features.sample(n=200, random_state=42)
167
  iso_forest = IsolationForest(contamination=0.1, random_state=42)
168
  df["anomaly"] = iso_forest.fit_predict(features)
169
  anomalies = df[df["anomaly"] == -1][["device_id", "usage_hours", "downtime", "timestamp"]]
 
196
  total_devices = df["device_id"].nunique()
197
  avg_usage = df["usage_hours"].mean() if "usage_hours" in df.columns else 0
198
  prompt = f"Insights: {total_devices} devices, avg usage {avg_usage:.2f} hours."
199
+ insights = summarizer(prompt, max_length=30, min_length=10, do_sample=False)[0]["summary_text"]
200
  return insights
201
  except Exception as e:
202
  logging.error(f"Dashboard insights generation failed: {str(e)}")
 
337
  logging.error(f"Failed to generate device cards: {str(e)}")
338
  return f'<p>Error generating device cards: {str(e)}</p>'
339
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  # Generate PDF content
341
+ def generate_pdf_content(summary, preview_df, anomalies, amc_reminders, insights, device_cards_html, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart):
342
  if not reportlab_available:
343
  return None
344
  try:
345
+ pdf_path = f"status_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
346
  doc = SimpleDocTemplate(pdf_path, pagesize=letter)
347
  styles = getSampleStyleSheet()
348
  story = []
 
350
  def safe_paragraph(text, style):
351
  return Paragraph(str(text).replace('\n', '<br/>'), style) if text else Paragraph("", style)
352
 
353
+ story.append(Paragraph("LabOps Status Report", styles['Title']))
354
  story.append(Paragraph(f"Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
355
  story.append(Spacer(1, 12))
356
 
 
 
 
 
 
 
357
  story.append(Paragraph("Summary Report", styles['Heading2']))
358
  story.append(safe_paragraph(summary, styles['Normal']))
359
  story.append(Spacer(1, 12))
 
408
  return None
409
 
410
  # Main processing function
411
+ async def process_logs(file_obj, lab_site_filter, equipment_type_filter, date_range, last_modified_state):
412
  start_time = time.time()
413
  try:
414
  if not file_obj:
 
456
  start_date = today + pd.Timedelta(days=days_start)
457
  end_date = today + pd.Timedelta(days=days_end) + pd.Timedelta(days=1) - pd.Timedelta(seconds=1)
458
  filtered_df = filtered_df[(filtered_df['timestamp'] >= start_date) & (filtered_df['timestamp'] <= end_date)]
 
 
 
 
 
 
459
 
460
  if filtered_df.empty:
461
  return "No data after applying filters.", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state
 
474
  future_downtime_chart = executor.submit(create_downtime_chart, filtered_df)
475
  future_daily_log_chart = executor.submit(create_daily_log_trends_chart, filtered_df)
476
  future_weekly_uptime_chart = executor.submit(create_weekly_uptime_chart, filtered_df)
477
+ future_anomaly_alerts_chart = executor.submit(create_anomaly_alerts_chart, future_anomalies.result()[1]) # Pass anomalies_df
478
  future_device_cards = executor.submit(generate_device_cards, filtered_df)
 
479
 
480
  summary = f"Step 1: Summary Report\n{future_summary.result()}"
481
  anomalies, anomalies_df = future_anomalies.result()
 
491
  device_cards = future_device_cards.result()
492
 
493
  save_to_salesforce(filtered_df, reminders_df)
494
+ pdf_file = generate_pdf_content(summary, preview_df, anomalies, amc_reminders, insights, device_cards, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart)
495
 
496
  elapsed_time = time.time() - start_time
497
  logging.info(f"Processing completed in {elapsed_time:.2f} seconds")
 
504
  return f"Error: {str(e)}", pd.DataFrame(), None, '<p>Error processing data.</p>', None, None, None, None, None, None, None, None, last_modified_state
505
 
506
  # Update filters
507
+ def update_filters(file_obj, current_file_state):
508
+ if not file_obj or file_obj.name == current_file_state:
509
+ return gr.update(), gr.update(), current_file_state
510
  try:
511
  with open(file_obj.name, 'rb') as f:
512
  csv_content = f.read().decode('utf-8')
 
515
 
516
  lab_site_options = ['All'] + [site for site in df['lab_site'].dropna().astype(str).unique().tolist() if site.strip()] if 'lab_site' in df.columns else ['All']
517
  equipment_type_options = ['All'] + [equip for equip in df['equipment_type'].dropna().astype(str).unique().tolist() if equip.strip()] if 'equipment_type' in df.columns else ['All']
 
518
 
519
+ return gr.update(choices=lab_site_options, value='All'), gr.update(choices=equipment_type_options, value='All'), file_obj.name
520
  except Exception as e:
521
  logging.error(f"Failed to update filters: {str(e)}")
522
+ return gr.update(choices=['All'], value='All'), gr.update(choices=['All'], value='All'), current_file_state
523
 
524
  # Gradio Interface
525
  try:
 
540
  gr.Markdown("Upload a CSV file to analyze. Click 'Analyze' to refresh the dashboard with the latest data.")
541
 
542
  last_modified_state = gr.State(value=None)
543
+ current_file_state = gr.State(value=None)
544
 
545
  with gr.Row():
546
  with gr.Column(scale=1):
 
550
  lab_site_filter = gr.Dropdown(label="Lab Site", choices=['All'], value='All', interactive=True)
551
  equipment_type_filter = gr.Dropdown(label="Equipment Type", choices=['All'], value='All', interactive=True)
552
  date_range_filter = gr.Slider(label="Date Range (Days from Today)", minimum=-365, maximum=0, step=1, value=[-30, 0])
 
553
  submit_button = gr.Button("Analyze", variant="primary")
554
 
555
  with gr.Column(scale=2):
 
587
  insights_output = gr.Markdown()
588
  with gr.Group(elem_classes="dashboard-section"):
589
  gr.Markdown("### Export Report")
590
+ pdf_output = gr.File(label="Download Status Report as PDF")
591
 
592
  file_input.change(
593
  fn=update_filters,
594
+ inputs=[file_input, current_file_state],
595
+ outputs=[lab_site_filter, equipment_type_filter, current_file_state],
596
  queue=False
597
  )
598
 
599
  submit_button.click(
600
  fn=process_logs,
601
+ inputs=[file_input, lab_site_filter, equipment_type_filter, date_range_filter, last_modified_state],
602
  outputs=[summary_output, preview_output, usage_chart_output, device_cards_output, daily_log_trends_output, weekly_uptime_output, anomaly_alerts_output, downtime_chart_output, anomaly_output, amc_output, insights_output, pdf_output, last_modified_state]
603
  )
604