RathodHarish commited on
Commit
0ee3423
·
verified ·
1 Parent(s): 88f1133

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -5
app.py CHANGED
@@ -12,6 +12,7 @@ import os
12
  import io
13
  import time
14
  import uuid
 
15
 
16
  # Configure logging
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -201,7 +202,7 @@ def save_to_salesforce(df, reminders_df):
201
  records.append(record)
202
 
203
  if records:
204
- batch_size = 100 # Reduced batch size for faster processing
205
  for i in range(0, len(records), batch_size):
206
  batch = records[i:i + batch_size]
207
  try:
@@ -238,9 +239,9 @@ def detect_anomalies(df):
238
  if "usage_hours" not in df.columns or "downtime" not in df.columns:
239
  return "Anomaly detection requires 'usage_hours' and 'downtime' columns.", pd.DataFrame()
240
  features = df[["usage_hours", "downtime"]].fillna(0)
241
- if len(features) > 200: # Reduced sample size for faster processing
242
  features = features.sample(n=200, random_state=42)
243
- iso_forest = IsolationForest(contamination=0.1, random_state=42, n_estimators=50) # Reduced n_estimators
244
  df["anomaly"] = iso_forest.fit_predict(features)
245
  anomalies = df[df["anomaly"] == -1][["device_id", "usage_hours", "downtime", "timestamp"]]
246
  if anomalies.empty:
@@ -285,7 +286,22 @@ def generate_dashboard_insights(df):
285
  logging.error(f"Dashboard insights generation failed: {str(e)}")
286
  return f"Dashboard insights generation failed: {str(e)}"
287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  # Create usage chart
 
289
  def create_usage_chart(df):
290
  try:
291
  if df.empty:
@@ -307,6 +323,7 @@ def create_usage_chart(df):
307
  return None
308
 
309
  # Create downtime chart
 
310
  def create_downtime_chart(df):
311
  try:
312
  downtime_data = df.groupby("device_id")["downtime"].sum().reset_index()
@@ -326,6 +343,7 @@ def create_downtime_chart(df):
326
  return None
327
 
328
  # Create daily log trends chart
 
329
  def create_daily_log_trends_chart(df):
330
  try:
331
  df['date'] = df['timestamp'].dt.date
@@ -344,6 +362,7 @@ def create_daily_log_trends_chart(df):
344
  return None
345
 
346
  # Create weekly uptime chart
 
347
  def create_weekly_uptime_chart(df):
348
  try:
349
  df['week'] = df['timestamp'].dt.isocalendar().week
@@ -368,6 +387,7 @@ def create_weekly_uptime_chart(df):
368
  return None
369
 
370
  # Create anomaly alerts chart
 
371
  def create_anomaly_alerts_chart(anomalies_df):
372
  try:
373
  if anomalies_df.empty:
@@ -516,6 +536,10 @@ async def process_logs(file_obj, lab_site_filter, equipment_type_filter, date_ra
516
  "amc_date": "string"
517
  }
518
  df = pd.read_csv(file_path, dtype=dtypes, usecols=required_columns)
 
 
 
 
519
  missing_columns = [col for col in required_columns if col not in df.columns]
520
  if missing_columns:
521
  return f"Missing columns: {missing_columns}", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state
@@ -548,7 +572,7 @@ async def process_logs(file_obj, lab_site_filter, equipment_type_filter, date_ra
548
  preview_html = preview_df.to_html(index=False, classes='table table-striped', border=0)
549
 
550
  # Run tasks concurrently
551
- with ThreadPoolExecutor(max_workers=8) as executor: # Increased workers for better parallelization
552
  future_summary = executor.submit(summarize_logs, filtered_df)
553
  future_anomalies = executor.submit(detect_anomalies, filtered_df)
554
  future_amc = executor.submit(check_amc_reminders, filtered_df, datetime.now())
@@ -681,7 +705,7 @@ try:
681
  submit_button.click(
682
  fn=process_logs,
683
  inputs=[file_input, lab_site_filter, equipment_type_filter, date_range_filter, last_modified_state],
684
- outputs=[summary_output, preview_output, usage_chart_output, device_cards_output, daily_log_trends_output, weekly_uptime_output, anomaly_alerts_chart, downtime_chart_output, anomaly_output, amc_output, insights_output, pdf_output, last_modified_state]
685
  )
686
 
687
  logging.info("Gradio interface initialized successfully")
 
12
  import io
13
  import time
14
  import uuid
15
+ import functools
16
 
17
  # Configure logging
18
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
202
  records.append(record)
203
 
204
  if records:
205
+ batch_size = 100
206
  for i in range(0, len(records), batch_size):
207
  batch = records[i:i + batch_size]
208
  try:
 
239
  if "usage_hours" not in df.columns or "downtime" not in df.columns:
240
  return "Anomaly detection requires 'usage_hours' and 'downtime' columns.", pd.DataFrame()
241
  features = df[["usage_hours", "downtime"]].fillna(0)
242
+ if len(features) > 200:
243
  features = features.sample(n=200, random_state=42)
244
+ iso_forest = IsolationForest(contamination=0.1, random_state=42, n_estimators=50)
245
  df["anomaly"] = iso_forest.fit_predict(features)
246
  anomalies = df[df["anomaly"] == -1][["device_id", "usage_hours", "downtime", "timestamp"]]
247
  if anomalies.empty:
 
286
  logging.error(f"Dashboard insights generation failed: {str(e)}")
287
  return f"Dashboard insights generation failed: {str(e)}"
288
 
289
+ # Cache DataFrame processing
290
+ def cache_dataframe(func):
291
+ @functools.wraps(func)
292
+ def wrapper(df, *args, **kwargs):
293
+ cache_key = f"{id(df)}_{func.__name__}"
294
+ if not hasattr(wrapper, 'cache'):
295
+ wrapper.cache = {}
296
+ if cache_key in wrapper.cache:
297
+ return wrapper.cache[cache_key]
298
+ result = func(df, *args, **kwargs)
299
+ wrapper.cache[cache_key] = result
300
+ return result
301
+ return wrapper
302
+
303
  # Create usage chart
304
+ @cache_dataframe
305
  def create_usage_chart(df):
306
  try:
307
  if df.empty:
 
323
  return None
324
 
325
  # Create downtime chart
326
+ @cache_dataframe
327
  def create_downtime_chart(df):
328
  try:
329
  downtime_data = df.groupby("device_id")["downtime"].sum().reset_index()
 
343
  return None
344
 
345
  # Create daily log trends chart
346
+ @cache_dataframe
347
  def create_daily_log_trends_chart(df):
348
  try:
349
  df['date'] = df['timestamp'].dt.date
 
362
  return None
363
 
364
  # Create weekly uptime chart
365
+ @cache_dataframe
366
  def create_weekly_uptime_chart(df):
367
  try:
368
  df['week'] = df['timestamp'].dt.isocalendar().week
 
387
  return None
388
 
389
  # Create anomaly alerts chart
390
+ @cache_dataframe
391
  def create_anomaly_alerts_chart(anomalies_df):
392
  try:
393
  if anomalies_df.empty:
 
536
  "amc_date": "string"
537
  }
538
  df = pd.read_csv(file_path, dtype=dtypes, usecols=required_columns)
539
+ if len(df) > 10000: # Early exit for large datasets
540
+ df = df.sample(n=10000, random_state=42)
541
+ logging.warning("Dataset too large, sampled to 10,000 rows")
542
+
543
  missing_columns = [col for col in required_columns if col not in df.columns]
544
  if missing_columns:
545
  return f"Missing columns: {missing_columns}", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state
 
572
  preview_html = preview_df.to_html(index=False, classes='table table-striped', border=0)
573
 
574
  # Run tasks concurrently
575
+ with ThreadPoolExecutor(max_workers=8) as executor:
576
  future_summary = executor.submit(summarize_logs, filtered_df)
577
  future_anomalies = executor.submit(detect_anomalies, filtered_df)
578
  future_amc = executor.submit(check_amc_reminders, filtered_df, datetime.now())
 
705
  submit_button.click(
706
  fn=process_logs,
707
  inputs=[file_input, lab_site_filter, equipment_type_filter, date_range_filter, last_modified_state],
708
+ outputs=[summary_output, preview_output, usage_chart_output, device_cards_output, daily_log_trends_output, weekly_uptime_output, anomaly_alerts_output, downtime_chart_output, anomaly_output, amc_output, insights_output, pdf_output, last_modified_state]
709
  )
710
 
711
  logging.info("Gradio interface initialized successfully")