Spaces:

lavanya121
/

LabOpsDashboard

Sleeping

App Files Files Community

lavanya121 commited on Jun 14, 2025

Commit

ce7579b

verified ·

1 Parent(s): 92a0daa

Update app.py

Browse files

Files changed (1) hide show

app.py +170 -356

app.py CHANGED Viewed

@@ -3,31 +3,17 @@ import pandas as pd
 from datetime import datetime, timedelta
 import logging
 import plotly.express as px
 from sklearn.ensemble import IsolationForest
-from transformers import pipeline
-import torch
-from concurrent.futures import ThreadPoolExecutor
-from simple_salesforce import Salesforce
 import os
 import io
 import time
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# Salesforce configuration
-try:
-    sf = Salesforce(
-        username='multi-devicelabopsdashboard@sathkrutha.com',
-        password='Team@1234',
-        security_token=os.getenv('SF_SECURITY_TOKEN', ''),
-        domain='login'
-    )
-    logging.info("Salesforce connection established")
-except Exception as e:
-    logging.error(f"Failed to connect to Salesforce: {str(e)}")
-    sf = None
 # Try to import reportlab
 try:
     from reportlab.lib.pagesizes import letter
@@ -40,220 +26,36 @@ except ImportError:
     logging.warning("reportlab module not found. PDF generation disabled.")
     reportlab_available = False
-# Preload Hugging Face model with optimization
-logging.info("Preloading Hugging Face model...")
-try:
-    device = 0 if torch.cuda.is_available() else -1
-    summarizer = pipeline(
-        "summarization",
-        model="t5-small",
-        device=device,
-        max_length=50,
-        min_length=10,
-        num_beams=2
-    )
-    logging.info(f"Hugging Face model preloaded on {'GPU' if device == 0 else 'CPU'}")
-except Exception as e:
-    logging.error(f"Failed to preload model: {str(e)}")
-    raise e
-# Cache picklist values at startup
-def get_picklist_values(field_name):
-    if sf is None:
-        return []
-    try:
-        obj_desc = sf.SmartLog__c.describe()
-        for field in obj_desc['fields']:
-            if field['name'] == field_name:
-                return [value['value'] for value in field['picklistValues'] if value['active']]
-        return []
-    except Exception as e:
-        logging.error(f"Failed to fetch picklist values for {field_name}: {str(e)}")
-        return []
-status_values = get_picklist_values('Status__c') or ["Active", "Inactive", "Pending"]
-log_type_values = get_picklist_values('Log_Type__c') or ["Smart Log", "Cell Analysis", "UV Verification"]
-logging.info(f"Valid Status__c values: {status_values}")
-logging.info(f"Valid Log_Type__c values: {log_type_values}")
-# Map invalid picklist values
-picklist_mapping = {
-    'Status__c': {
-        'normal': 'Active',
-        'error': 'Inactive',
-        'warning': 'Pending',
-        'ok': 'Active',
-        'failed': 'Inactive'
-    },
-    'Log_Type__c': {
-        'maint': 'Smart Log',
-        'error': 'Cell Analysis',
-        'ops': 'UV Verification',
-        'maintenance': 'Smart Log',
-        'cell': 'Cell Analysis',
-        'uv': 'UV Verification',
-        'weight log': 'Smart Log'
-    }
-}
-# Cache folder ID
-def get_folder_id(folder_name):
-    if sf is None:
-        return None
-    try:
-        query = f"SELECT Id FROM Folder WHERE Name = '{folder_name}' AND Type = 'Report'"
-        result = sf.query(query)
-        if result['totalSize'] > 0:
-            folder_id = result['records'][0]['Id']
-            logging.info(f"Found folder ID for '{folder_name}': {folder_id}")
-            return folder_id
-        else:
-            logging.error(f"Folder '{folder_name}' not found in Salesforce.")
-            return None
-    except Exception as e:
-        logging.error(f"Failed to fetch folder ID for '{folder_name}': {str(e)}")
-        return None
-LABOPS_REPORTS_FOLDER_ID = get_folder_id('LabOps Reports')
-# Salesforce report creation
-def create_salesforce_reports(df):
-    if sf is None or not LABOPS_REPORTS_FOLDER_ID:
-        return
-    try:
-        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-        reports = [
-            {
-                "reportMetadata": {
-                    "name": f"SmartLog_Usage_Report_{timestamp}",
-                    "developerName": f"SmartLog_Usage_Report_{timestamp}",
-                    "reportType": {"type": "CustomEntity", "value": "SmartLog__c"},
-                    "reportFormat": "TABULAR",
-                    "reportBooleanFilter": None,
-                    "reportFilters": [],
-                    "detailColumns": ["SmartLog__c.Device_Id__c", "SmartLog__c.Usage_Hours__c"],
-                    "folderId": LABOPS_REPORTS_FOLDER_ID
-                }
-            },
-            {
-                "reportMetadata": {
-                    "name": f"SmartLog_AMC_Reminders_{timestamp}",
-                    "developerName": f"SmartLog_AMC_Reminders_{timestamp}",
-                    "reportType": {"type": "CustomEntity", "value": "SmartLog__c"},
-                    "reportFormat": "TABULAR",
-                    "reportBooleanFilter": None,
-                    "reportFilters": [],
-                    "detailColumns": ["SmartLog__c.Device_Id__c", "SmartLog__c.AMC_Date__c"],
-                    "folderId": LABOPS_REPORTS_FOLDER_ID
-                }
-            }
-        ]
-        for report in reports:
-            sf.restful('analytics/reports', method='POST', json=report)
-        logging.info("Salesforce reports created")
-    except Exception as e:
-        logging.error(f"Failed to create Salesforce reports: {str(e)}")
-# Save to Salesforce
-def save_to_salesforce(df, reminders_df):
-    if sf is None:
-        logging.error("No Salesforce connection available")
-        return
-    try:
-        logging.info("Starting Salesforce save operation")
-        current_date = datetime.now()
-        next_30_days = current_date + timedelta(days=30)
-        records = []
-        reminder_device_ids = set(reminders_df['device_id']) if not reminders_df.empty else set()
-        logging.info(f"Processing {len(df)} records for Salesforce")
-        for idx, row in df.iterrows():
-            status = str(row['status']).lower()
-            log_type = str(row['log_type']).lower()
-            status_mapped = picklist_mapping['Status__c'].get(status, status_values[0] if status_values else 'Active')
-            log_type_mapped = picklist_mapping['Log_Type__c'].get(log_type, log_type_values[0] if log_type_values else 'Smart Log')
-            if not status_mapped or not log_type_mapped:
-                logging.warning(f"Skipping record {idx}: Invalid status ({status}) or log_type ({log_type})")
-                continue
-            amc_date_str = None
-            if pd.notna(row['amc_date']):
-                try:
-                    amc_date = pd.to_datetime(row['amc_date']).strftime('%Y-%m-%d')
-                    amc_date_str = amc_date
-                    amc_date_dt = datetime.strptime(amc_date, '%Y-%m-%d')
-                    if status_mapped == "Active" and current_date.date() <= amc_date_dt.date() <= next_30_days.date():
-                        logging.info(f"AMC Reminder for Device ID {row['device_id']}: {amc_date}")
-                except Exception as e:
-                    logging.warning(f"Invalid AMC date for Device ID {row['device_id']}: {str(e)}")
-            record = {
-                'Device_Id__c': str(row['device_id'])[:50],
-                'Log_Type__c': log_type_mapped,
-                'Status__c': status_mapped,
-                'Timestamp__c': row['timestamp'].isoformat() if pd.notna(row['timestamp']) else None,
-                'Usage_Hours__c': float(row['usage_hours']) if pd.notna(row['usage_hours']) else 0.0,
-                'Downtime__c': float(row['downtime']) if pd.notna(row['downtime']) else 0.0,
-                'AMC_Date__c': amc_date_str
-            }
-            records.append(record)
-        if records:
-            batch_size = 200  # Smaller batch size for faster processing
-            for i in range(0, len(records), batch_size):
-                batch = records[i:i + batch_size]
-                try:
-                    result = sf.bulk.SmartLog__c.insert(batch)
-                    logging.info(f"Saved {len(batch)} records to Salesforce in batch {i//batch_size + 1}")
-                    for res in result:
-                        if not res['success']:
-                            logging.error(f"Failed to save record: {res['errors']}")
-                except Exception as e:
-                    logging.error(f"Failed to save batch {i//batch_size + 1}: {str(e)}")
-        else:
-            logging.warning("No records to save to Salesforce")
-    except Exception as e:
-        logging.error(f"Failed to save to Salesforce: {str(e)}")
 # Summarize logs
 def summarize_logs(df):
-    start_time = time.time()
     try:
         total_devices = df["device_id"].nunique()
-        most_used = df.groupby("device_id")["usage_hours"].sum().idxmax() if not df.empty else "N/A"
-        prompt = f"Maintenance logs: {total_devices} devices. Most used: {most_used}."
-        summary = summarizer(prompt, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
-        logging.info(f"Summary generation took {time.time() - start_time:.2f} seconds")
-        return summary
     except Exception as e:
         logging.error(f"Summary generation failed: {str(e)}")
-        return f"Failed to generate summary: {str(e)}"
 # Anomaly detection
 def detect_anomalies(df):
-    start_time = time.time()
     try:
         if "usage_hours" not in df.columns or "downtime" not in df.columns:
             return "Anomaly detection requires 'usage_hours' and 'downtime' columns.", pd.DataFrame()
         features = df[["usage_hours", "downtime"]].fillna(0)
-        if len(features) > 500:
-            features = features.sample(n=500, random_state=42)
         iso_forest = IsolationForest(contamination=0.1, random_state=42)
         df["anomaly"] = iso_forest.fit_predict(features)
         anomalies = df[df["anomaly"] == -1][["device_id", "usage_hours", "downtime", "timestamp"]]
         if anomalies.empty:
             return "No anomalies detected.", anomalies
-        result = "\n".join([f"- Device ID: {row['device_id']}, Usage: {row['usage_hours']}, Downtime: {row['downtime']}, Timestamp: {row['timestamp']}" for _, row in anomalies.head(5).iterrows()])
-        logging.info(f"Anomaly detection took {time.time() - start_time:.2f} seconds")
-        return result, anomalies
     except Exception as e:
         logging.error(f"Anomaly detection failed: {str(e)}")
         return f"Anomaly detection failed: {str(e)}", pd.DataFrame()
 # AMC reminders
 def check_amc_reminders(df, current_date):
-    start_time = time.time()
     try:
         if "device_id" not in df.columns or "amc_date" not in df.columns:
             return "AMC reminders require 'device_id' and 'amc_date' columns.", pd.DataFrame()
@@ -263,32 +65,39 @@ def check_amc_reminders(df, current_date):
         reminders = df[(df["days_to_amc"] >= 0) & (df["days_to_amc"] <= 30)][["device_id", "log_type", "status", "timestamp", "usage_hours", "downtime", "amc_date"]]
         if reminders.empty:
             return "No AMC reminders due within the next 30 days.", reminders
-        result = "\n".join([f"- Device ID: {row['device_id']}, AMC Date: {row['amc_date']}" for _, row in reminders.head(5).iterrows()])
-        logging.info(f"AMC reminders generation took {time.time() - start_time:.2f} seconds")
-        return result, reminders
     except Exception as e:
         logging.error(f"AMC reminder generation failed: {str(e)}")
         return f"AMC reminder generation failed: {str(e)}", pd.DataFrame()
 # Dashboard insights
 def generate_dashboard_insights(df):
-    start_time = time.time()
     try:
         total_devices = df["device_id"].nunique()
         avg_usage = df["usage_hours"].mean() if "usage_hours" in df.columns else 0
-        prompt = f"Insights: {total_devices} devices, avg usage {avg_usage:.2f} hours."
-        insights = summarizer(prompt, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
-        logging.info(f"Insights generation took {time.time() - start_time:.2f} seconds")
-        return insights
     except Exception as e:
         logging.error(f"Dashboard insights generation failed: {str(e)}")
-        return f"Dashboard insights generation failed: {str(e)}"
 # Create usage chart
 def create_usage_chart(df):
     try:
-        if df.empty:
-            return None
         usage_data = df.groupby("device_id")["usage_hours"].sum().reset_index()
         if len(usage_data) > 5:
             usage_data = usage_data.nlargest(5, "usage_hours")
@@ -303,11 +112,14 @@ def create_usage_chart(df):
         return fig
     except Exception as e:
         logging.error(f"Failed to create usage chart: {str(e)}")
-        return None
 # Create downtime chart
 def create_downtime_chart(df):
     try:
         downtime_data = df.groupby("device_id")["downtime"].sum().reset_index()
         if len(downtime_data) > 5:
             downtime_data = downtime_data.nlargest(5, "downtime")
@@ -322,13 +134,18 @@ def create_downtime_chart(df):
         return fig
     except Exception as e:
         logging.error(f"Failed to create downtime chart: {str(e)}")
-        return None
 # Create daily log trends chart
 def create_daily_log_trends_chart(df):
     try:
-        df['date'] = df['timestamp'].dt.date
         daily_logs = df.groupby('date').size().reset_index(name='log_count')
         fig = px.line(
             daily_logs,
             x='date',
@@ -340,19 +157,24 @@ def create_daily_log_trends_chart(df):
         return fig
     except Exception as e:
         logging.error(f"Failed to create daily log trends chart: {str(e)}")
-        return None
 # Create weekly uptime chart
 def create_weekly_uptime_chart(df):
     try:
-        df['week'] = df['timestamp'].dt.isocalendar().week
-        df['year'] = df['timestamp'].dt.year
         weekly_data = df.groupby(['year', 'week']).agg({
             'usage_hours': 'sum',
             'downtime': 'sum'
         }).reset_index()
         weekly_data['uptime_percent'] = (weekly_data['usage_hours'] / (weekly_data['usage_hours'] + weekly_data['downtime'])) * 100
         weekly_data['year_week'] = weekly_data['year'].astype(str) + '-W' + weekly_data['week'].astype(str)
         fig = px.bar(
             weekly_data,
             x='year_week',
@@ -364,15 +186,18 @@ def create_weekly_uptime_chart(df):
         return fig
     except Exception as e:
         logging.error(f"Failed to create weekly uptime chart: {str(e)}")
-        return None
 # Create anomaly alerts chart
 def create_anomaly_alerts_chart(anomalies_df):
     try:
-        if anomalies_df.empty:
-            return None
-        anomalies_df['date'] = anomalies_df['timestamp'].dt.date
         anomaly_counts = anomalies_df.groupby('date').size().reset_index(name='anomaly_count')
         fig = px.scatter(
             anomaly_counts,
             x='date',
@@ -384,7 +209,7 @@ def create_anomaly_alerts_chart(anomalies_df):
         return fig
     except Exception as e:
         logging.error(f"Failed to create anomaly alerts chart: {str(e)}")
-        return None
 # Generate device cards
 def generate_device_cards(df):
@@ -419,32 +244,12 @@ def generate_device_cards(df):
         logging.error(f"Failed to generate device cards: {str(e)}")
         return f'<p>Error generating device cards: {str(e)}</p>'
-# Generate monthly status
-def generate_monthly_status(df, selected_month):
-    try:
-        total_devices = df['device_id'].nunique()
-        total_usage_hours = df['usage_hours'].sum()
-        total_downtime = df['downtime'].sum()
-        avg_usage = total_usage_hours / total_devices if total_devices > 0 else 0
-        avg_downtime = total_downtime / total_devices if total_devices > 0 else 0
-        return f"""
-        Monthly Status for {selected_month}:
-        - Total Devices: {total_devices}
-        - Total Usage Hours: {total_usage_hours:.2f}
-        - Total Downtime Hours: {total_downtime:.2f}
-        - Average Usage per Device: {avg_usage:.2f} hours
-        - Average Downtime per Device: {avg_downtime:.2f} hours
-        """
-    except Exception as e:
-        logging.error(f"Failed to generate monthly status: {str(e)}")
-        return f"Failed to generate monthly status: {str(e)}"
 # Generate PDF content
-def generate_pdf_content(summary, preview_df, anomalies, amc_reminders, insights, device_cards_html, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart, df, selected_month):
     if not reportlab_available:
         return None
     try:
-        pdf_path = f"monthly_status_report_{selected_month.replace(' ', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
         doc = SimpleDocTemplate(pdf_path, pagesize=letter)
         styles = getSampleStyleSheet()
         story = []
@@ -452,16 +257,10 @@ def generate_pdf_content(summary, preview_df, anomalies, amc_reminders, insights
         def safe_paragraph(text, style):
             return Paragraph(str(text).replace('\n', '<br/>'), style) if text else Paragraph("", style)
-        story.append(Paragraph("LabOps Monthly Status Report", styles['Title']))
         story.append(Paragraph(f"Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
         story.append(Spacer(1, 12))
-        if selected_month != "All":
-            monthly_status = generate_monthly_status(df, selected_month)
-            story.append(Paragraph("Monthly Status Summary", styles['Heading2']))
-            story.append(safe_paragraph(monthly_status, styles['Normal']))
-            story.append(Spacer(1, 12))
         story.append(Paragraph("Summary Report", styles['Heading2']))
         story.append(safe_paragraph(summary, styles['Normal']))
         story.append(Spacer(1, 12))
@@ -516,111 +315,117 @@ def generate_pdf_content(summary, preview_df, anomalies, amc_reminders, insights
         return None
 # Main processing function
-async def process_logs(file_obj, lab_site_filter, equipment_type_filter, date_range, month_filter, last_modified_state):
     start_time = time.time()
     try:
         if not file_obj:
-            return "No file uploaded.", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, "No anomalies detected.", "No AMC reminders.", "No insights generated.", None, last_modified_state
         file_path = file_obj.name
         current_modified_time = os.path.getmtime(file_path)
-        if last_modified_state and current_modified_time == last_modified_state:
-            return None, None, None, None, None, None, None, None, None, None, None, None, last_modified_state
-        logging.info(f"Processing file: {file_path}")
-        if not file_path.endswith(".csv"):
-            return "Please upload a CSV file.", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, "", "", "", None, last_modified_state
-        required_columns = ["device_id", "log_type", "status", "timestamp", "usage_hours", "downtime", "amc_date"]
-        dtypes = {
-            "device_id": "string",
-            "log_type": "string",
-            "status": "string",
-            "usage_hours": "float32",
-            "downtime": "float32",
-            "amc_date": "string"
-        }
-        df = pd.read_csv(file_path, dtype=dtypes)
-        missing_columns = [col for col in required_columns if col not in df.columns]
-        if missing_columns:
-            return f"Missing columns: {missing_columns}", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state
-        df["timestamp"] = pd.to_datetime(df["timestamp"], errors='coerce')
-        df["amc_date"] = pd.to_datetime(df["amc_date"], errors='coerce')
-        if df["timestamp"].dt.tz is None:
-            df["timestamp"] = df["timestamp"].dt.tz_localize('UTC').dt.tz_convert('Asia/Kolkata')
-        if df.empty:
-            return "No data available.", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state
-        # Apply filters
-        filtered_df = df.copy()
-        if lab_site_filter and lab_site_filter != 'All' and 'lab_site' in filtered_df.columns:
-            filtered_df = filtered_df[filtered_df['lab_site'] == lab_site_filter]
-        if equipment_type_filter and equipment_type_filter != 'All' and 'equipment_type' in filtered_df.columns:
-            filtered_df = filtered_df[filtered_df['equipment_type'] == equipment_type_filter]
-        if date_range and len(date_range) == 2:
-            days_start, days_end = date_range
-            today = pd.to_datetime(datetime.now().date()).tz_localize('Asia/Kolkata')
-            start_date = today + pd.Timedelta(days=days_start)
-            end_date = today + pd.Timedelta(days=days_end) + pd.Timedelta(days=1) - pd.Timedelta(seconds=1)
-            filtered_df = filtered_df[(filtered_df['timestamp'] >= start_date) & (filtered_df['timestamp'] <= end_date)]
-        if month_filter and month_filter != "All":
-            selected_date = pd.to_datetime(month_filter, format="%B %Y")
-            filtered_df = filtered_df[
-                (filtered_df['timestamp'].dt.year == selected_date.year) &
-                (filtered_df['timestamp'].dt.month == selected_date.month)
-            ]
-        if filtered_df.empty:
-            return "No data after applying filters.", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state
         # Generate table for preview
         preview_df = filtered_df[['device_id', 'log_type', 'status', 'timestamp', 'usage_hours', 'downtime', 'amc_date']].head(5)
         preview_html = preview_df.to_html(index=False, classes='table table-striped', border=0)
-        # Run tasks concurrently
-        with ThreadPoolExecutor(max_workers=6) as executor:
-            future_summary = executor.submit(summarize_logs, filtered_df)
-            future_anomalies = executor.submit(detect_anomalies, filtered_df)
-            future_amc = executor.submit(check_amc_reminders, filtered_df, datetime.now())
-            future_insights = executor.submit(generate_dashboard_insights, filtered_df)
-            future_usage_chart = executor.submit(create_usage_chart, filtered_df)
-            future_downtime_chart = executor.submit(create_downtime_chart, filtered_df)
-            future_daily_log_chart = executor.submit(create_daily_log_trends_chart, filtered_df)
-            future_weekly_uptime_chart = executor.submit(create_weekly_uptime_chart, filtered_df)
-            future_device_cards = executor.submit(generate_device_cards, filtered_df)
-            future_reports = executor.submit(create_salesforce_reports, filtered_df)
-            summary = f"Step 1: Summary Report\n{future_summary.result()}"
-            anomalies, anomalies_df = future_anomalies.result()
-            anomalies = f"Anomaly Detection\n{anomalies}"
-            amc_reminders, reminders_df = future_amc.result()
-            amc_reminders = f"AMC Reminders\n{amc_reminders}"
-            insights = f"Dashboard Insights (AI)\n{future_insights.result()}"
-            usage_chart = future_usage_chart.result()
-            downtime_chart = future_downtime_chart.result()
-            daily_log_chart = future_daily_log_chart.result()
-            weekly_uptime_chart = future_weekly_uptime_chart.result()
-            anomaly_alerts_chart = create_anomaly_alerts_chart(anomalies_df)  # Use anomalies_df
-            device_cards = future_device_cards.result()
-        save_to_salesforce(filtered_df, reminders_df)
-        pdf_file = generate_pdf_content(summary, preview_df, anomalies, amc_reminders, insights, device_cards, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart, filtered_df, month_filter)
         elapsed_time = time.time() - start_time
         logging.info(f"Processing completed in {elapsed_time:.2f} seconds")
-        if elapsed_time > 10:
-            logging.warning(f"Processing time exceeded 10 seconds: {elapsed_time:.2f} seconds")
-        return (summary, preview_html, usage_chart, device_cards, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart, anomalies, amc_reminders, insights, pdf_file, current_modified_time)
     except Exception as e:
         logging.error(f"Failed to process file: {str(e)}")
-        return f"Error: {str(e)}", pd.DataFrame(), None, '<p>Error processing data.</p>', None, None, None, None, None, None, None, None, last_modified_state
 # Update filters
-def update_filters(file_obj):
-    if not file_obj:
-        return gr.update(choices=['All'], value='All'), gr.update(choices=['All'], value='All'), gr.update(choices=['All'], value='All')
     try:
         with open(file_obj.name, 'rb') as f:
             csv_content = f.read().decode('utf-8')
@@ -629,12 +434,11 @@ def update_filters(file_obj):
         lab_site_options = ['All'] + [site for site in df['lab_site'].dropna().astype(str).unique().tolist() if site.strip()] if 'lab_site' in df.columns else ['All']
         equipment_type_options = ['All'] + [equip for equip in df['equipment_type'].dropna().astype(str).unique().tolist() if equip.strip()] if 'equipment_type' in df.columns else ['All']
-        month_options = ['All'] + sorted(df['timestamp'].dt.strftime('%B %Y').dropna().unique().tolist()) if 'timestamp' in df.columns else ['All']
-        return gr.update(choices=lab_site_options, value='All'), gr.update(choices=equipment_type_options, value='All'), gr.update(choices=month_options, value='All')
     except Exception as e:
         logging.error(f"Failed to update filters: {str(e)}")
-        return gr.update(choices=['All'], value='All'), gr.update(choices=['All'], value='All'), gr.update(choices=['All'], value='All')
 # Gradio Interface
 try:
@@ -651,10 +455,13 @@ try:
         .table th {background-color: #f2f2f2;}
         .table tr:nth-child(even) {background-color: #f9f9f9;}
     """) as iface:
-        gr.Markdown("<h1>LabOps Log Analyzer Dashboard (Hugging Face AI)</h1>")
-        gr.Markdown("Upload a CSV file to analyze. Click 'Analyze' to refresh the dashboard with the latest data.")
         last_modified_state = gr.State(value=None)
         with gr.Row():
             with gr.Column(scale=1):
@@ -664,7 +471,8 @@ try:
                     lab_site_filter = gr.Dropdown(label="Lab Site", choices=['All'], value='All', interactive=True)
                     equipment_type_filter = gr.Dropdown(label="Equipment Type", choices=['All'], value='All', interactive=True)
                     date_range_filter = gr.Slider(label="Date Range (Days from Today)", minimum=-365, maximum=0, step=1, value=[-30, 0])
-                    submit_button = gr.Button("Analyze", variant="primary")
             with gr.Column(scale=2):
                 with gr.Group(elem_classes="dashboard-container"):
@@ -697,23 +505,29 @@ try:
                         gr.Markdown("### Step 5: AMC Reminders")
                         amc_output = gr.Markdown()
                     with gr.Group(elem_classes="dashboard-section"):
-                        gr.Markdown("### Step 6: Insights (AI)")
                         insights_output = gr.Markdown()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Export Report")
-                        pdf_output = gr.File(label="Download Monthly Status Report as PDF")
         file_input.change(
             fn=update_filters,
-            inputs=[file_input],
-            outputs=[lab_site_filter, equipment_type_filter],
             queue=False
         )
         submit_button.click(
             fn=process_logs,
-            inputs=[file_input, lab_site_filter, equipment_type_filter, date_range_filter, last_modified_state],
-            outputs=[summary_output, preview_output, usage_chart_output, device_cards_output, daily_log_trends_output, weekly_uptime_output, anomaly_alerts_output, downtime_chart_output, anomaly_output, amc_output, insights_output, pdf_output, last_modified_state]
         )
     logging.info("Gradio interface initialized successfully")

 from datetime import datetime, timedelta
 import logging
 import plotly.express as px
+import plotly.graph_objects as go
 from sklearn.ensemble import IsolationForest
+from concurrent.futures import ThreadPoolExecutor  # Added missing import
 import os
 import io
 import time
+import asyncio
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Try to import reportlab
 try:
     from reportlab.lib.pagesizes import letter
     logging.warning("reportlab module not found. PDF generation disabled.")
     reportlab_available = False
 # Summarize logs
 def summarize_logs(df):
     try:
         total_devices = df["device_id"].nunique()
+        total_usage = df["usage_hours"].sum() if "usage_hours" in df.columns else 0
+        return f"{total_devices} devices processed with {total_usage:.2f} total usage hours."
     except Exception as e:
         logging.error(f"Summary generation failed: {str(e)}")
+        return "Failed to generate summary."
 # Anomaly detection
 def detect_anomalies(df):
     try:
         if "usage_hours" not in df.columns or "downtime" not in df.columns:
             return "Anomaly detection requires 'usage_hours' and 'downtime' columns.", pd.DataFrame()
         features = df[["usage_hours", "downtime"]].fillna(0)
+        if len(features) > 50:
+            features = features.sample(n=50, random_state=42)
         iso_forest = IsolationForest(contamination=0.1, random_state=42)
         df["anomaly"] = iso_forest.fit_predict(features)
         anomalies = df[df["anomaly"] == -1][["device_id", "usage_hours", "downtime", "timestamp"]]
         if anomalies.empty:
             return "No anomalies detected.", anomalies
+        return "\n".join([f"- Device ID: {row['device_id']}, Usage: {row['usage_hours']}, Downtime: {row['downtime']}, Timestamp: {row['timestamp']}" for _, row in anomalies.head(5).iterrows()]), anomalies
     except Exception as e:
         logging.error(f"Anomaly detection failed: {str(e)}")
         return f"Anomaly detection failed: {str(e)}", pd.DataFrame()
 # AMC reminders
 def check_amc_reminders(df, current_date):
     try:
         if "device_id" not in df.columns or "amc_date" not in df.columns:
             return "AMC reminders require 'device_id' and 'amc_date' columns.", pd.DataFrame()
         reminders = df[(df["days_to_amc"] >= 0) & (df["days_to_amc"] <= 30)][["device_id", "log_type", "status", "timestamp", "usage_hours", "downtime", "amc_date"]]
         if reminders.empty:
             return "No AMC reminders due within the next 30 days.", reminders
+        return "\n".join([f"- Device ID: {row['device_id']}, AMC Date: {row['amc_date']}" for _, row in reminders.head(5).iterrows()]), reminders
     except Exception as e:
         logging.error(f"AMC reminder generation failed: {str(e)}")
         return f"AMC reminder generation failed: {str(e)}", pd.DataFrame()
 # Dashboard insights
 def generate_dashboard_insights(df):
     try:
         total_devices = df["device_id"].nunique()
         avg_usage = df["usage_hours"].mean() if "usage_hours" in df.columns else 0
+        return f"{total_devices} devices with average usage of {avg_usage:.2f} hours."
     except Exception as e:
         logging.error(f"Dashboard insights generation failed: {str(e)}")
+        return "Failed to generate insights."
+# Placeholder chart for empty data
+def create_placeholder_chart(title):
+    fig = go.Figure()
+    fig.add_annotation(
+        text="No data available for this chart",
+        xref="paper", yref="paper",
+        x=0.5, y=0.5, showarrow=False,
+        font=dict(size=16)
+    )
+    fig.update_layout(title=title, margin=dict(l=20, r=20, t=40, b=20))
+    return fig
 # Create usage chart
 def create_usage_chart(df):
     try:
+        if df.empty or "usage_hours" not in df.columns or "device_id" not in df.columns:
+            logging.warning("Insufficient data for usage chart")
+            return create_placeholder_chart("Usage Hours per Device")
         usage_data = df.groupby("device_id")["usage_hours"].sum().reset_index()
         if len(usage_data) > 5:
             usage_data = usage_data.nlargest(5, "usage_hours")
         return fig
     except Exception as e:
         logging.error(f"Failed to create usage chart: {str(e)}")
+        return create_placeholder_chart("Usage Hours per Device")
 # Create downtime chart
 def create_downtime_chart(df):
     try:
+        if df.empty or "downtime" not in df.columns or "device_id" not in df.columns:
+            logging.warning("Insufficient data for downtime chart")
+            return create_placeholder_chart("Downtime per Device")
         downtime_data = df.groupby("device_id")["downtime"].sum().reset_index()
         if len(downtime_data) > 5:
             downtime_data = downtime_data.nlargest(5, "downtime")
         return fig
     except Exception as e:
         logging.error(f"Failed to create downtime chart: {str(e)}")
+        return create_placeholder_chart("Downtime per Device")
 # Create daily log trends chart
 def create_daily_log_trends_chart(df):
     try:
+        if df.empty or "timestamp" not in df.columns:
+            logging.warning("Insufficient data for daily log trends chart")
+            return create_placeholder_chart("Daily Log Trends")
+        df['date'] = pd.to_datetime(df['timestamp'], errors='coerce').dt.date
         daily_logs = df.groupby('date').size().reset_index(name='log_count')
+        if daily_logs.empty:
+            return create_placeholder_chart("Daily Log Trends")
         fig = px.line(
             daily_logs,
             x='date',
         return fig
     except Exception as e:
         logging.error(f"Failed to create daily log trends chart: {str(e)}")
+        return create_placeholder_chart("Daily Log Trends")
 # Create weekly uptime chart
 def create_weekly_uptime_chart(df):
     try:
+        if df.empty or "timestamp" not in df.columns or "usage_hours" not in df.columns or "downtime" not in df.columns:
+            logging.warning("Insufficient data for weekly uptime chart")
+            return create_placeholder_chart("Weekly Uptime Percentage")
+        df['week'] = pd.to_datetime(df['timestamp'], errors='coerce').dt.isocalendar().week
+        df['year'] = pd.to_datetime(df['timestamp'], errors='coerce').dt.year
         weekly_data = df.groupby(['year', 'week']).agg({
             'usage_hours': 'sum',
             'downtime': 'sum'
         }).reset_index()
         weekly_data['uptime_percent'] = (weekly_data['usage_hours'] / (weekly_data['usage_hours'] + weekly_data['downtime'])) * 100
         weekly_data['year_week'] = weekly_data['year'].astype(str) + '-W' + weekly_data['week'].astype(str)
+        if weekly_data.empty:
+            return create_placeholder_chart("Weekly Uptime Percentage")
         fig = px.bar(
             weekly_data,
             x='year_week',
         return fig
     except Exception as e:
         logging.error(f"Failed to create weekly uptime chart: {str(e)}")
+        return create_placeholder_chart("Weekly Uptime Percentage")
 # Create anomaly alerts chart
 def create_anomaly_alerts_chart(anomalies_df):
     try:
+        if anomalies_df is None or anomalies_df.empty or "timestamp" not in anomalies_df.columns:
+            logging.warning("Insufficient data for anomaly alerts chart")
+            return create_placeholder_chart("Anomaly Alerts Over Time")
+        anomalies_df['date'] = pd.to_datetime(anomalies_df['timestamp'], errors='coerce').dt.date
         anomaly_counts = anomalies_df.groupby('date').size().reset_index(name='anomaly_count')
+        if anomaly_counts.empty:
+            return create_placeholder_chart("Anomaly Alerts Over Time")
         fig = px.scatter(
             anomaly_counts,
             x='date',
         return fig
     except Exception as e:
         logging.error(f"Failed to create anomaly alerts chart: {str(e)}")
+        return create_placeholder_chart("Anomaly Alerts Over Time")
 # Generate device cards
 def generate_device_cards(df):
         logging.error(f"Failed to generate device cards: {str(e)}")
         return f'<p>Error generating device cards: {str(e)}</p>'
 # Generate PDF content
+def generate_pdf_content(summary, preview_df, anomalies, amc_reminders, insights, device_cards_html, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart):
     if not reportlab_available:
         return None
     try:
+        pdf_path = f"status_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
         doc = SimpleDocTemplate(pdf_path, pagesize=letter)
         styles = getSampleStyleSheet()
         story = []
         def safe_paragraph(text, style):
             return Paragraph(str(text).replace('\n', '<br/>'), style) if text else Paragraph("", style)
+        story.append(Paragraph("LabOps Status Report", styles['Title']))
         story.append(Paragraph(f"Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
         story.append(Spacer(1, 12))
         story.append(Paragraph("Summary Report", styles['Heading2']))
         story.append(safe_paragraph(summary, styles['Normal']))
         story.append(Spacer(1, 12))
         return None
 # Main processing function
+async def process_logs(file_obj, lab_site_filter, equipment_type_filter, date_range, last_modified_state, cached_df_state, cached_filtered_df_state):
     start_time = time.time()
     try:
         if not file_obj:
+            return "No file uploaded.", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, "No anomalies detected.", "No AMC reminders.", "No insights generated.", None, last_modified_state, cached_df_state, cached_filtered_df_state
         file_path = file_obj.name
         current_modified_time = os.path.getmtime(file_path)
+        if last_modified_state and current_modified_time == last_modified_state and cached_filtered_df_state is not None:
+            filtered_df = cached_filtered_df_state
+        else:
+            if cached_df_state is None or current_modified_time != last_modified_state:
+                logging.info(f"Processing file: {file_path}")
+                if not file_path.endswith(".csv"):
+                    return "Please upload a CSV file.", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, "", "", "", None, last_modified_state, cached_df_state, cached_filtered_df_state
+                required_columns = ["device_id", "log_type", "status", "timestamp", "usage_hours", "downtime", "amc_date"]
+                dtypes = {
+                    "device_id": "string",
+                    "log_type": "string",
+                    "status": "string",
+                    "usage_hours": "float32",
+                    "downtime": "float32",
+                    "amc_date": "string"
+                }
+                df = pd.read_csv(file_path, dtype=dtypes)
+                missing_columns = [col for col in required_columns if col not in df.columns]
+                if missing_columns:
+                    return f"Missing columns: {missing_columns}", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state, cached_df_state, cached_filtered_df_state
+                df["timestamp"] = pd.to_datetime(df["timestamp"], errors='coerce')
+                df["amc_date"] = pd.to_datetime(df["amc_date"], errors='coerce')
+                if df["timestamp"].dt.tz is None:
+                    df["timestamp"] = df["timestamp"].dt.tz_localize('UTC').dt.tz_convert('Asia/Kolkata')
+                if df.empty:
+                    return "No data available.", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state, df, cached_filtered_df_state
+            else:
+                df = cached_df_state
+            # Apply filters
+            filtered_df = df.copy()
+            if lab_site_filter and lab_site_filter != 'All' and 'lab_site' in filtered_df.columns:
+                filtered_df = filtered_df[filtered_df['lab_site'] == lab_site_filter]
+            if equipment_type_filter and equipment_type_filter != 'All' and 'equipment_type' in filtered_df.columns:
+                filtered_df = filtered_df[filtered_df['equipment_type'] == equipment_type_filter]
+            if date_range and len(date_range) == 2:
+                days_start, days_end = date_range
+                today = pd.to_datetime(datetime.now().date()).tz_localize('Asia/Kolkata')
+                start_date = today + pd.Timedelta(days=days_start)
+                end_date = today + pd.Timedelta(days=days_end) + pd.Timedelta(days=1) - pd.Timedelta(seconds=1)
+                filtered_df = filtered_df[(filtered_df['timestamp'] >= start_date) & (filtered_df['timestamp'] <= end_date)]
+            if filtered_df.empty:
+                return "No data after applying filters.", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state, df, filtered_df
         # Generate table for preview
         preview_df = filtered_df[['device_id', 'log_type', 'status', 'timestamp', 'usage_hours', 'downtime', 'amc_date']].head(5)
         preview_html = preview_df.to_html(index=False, classes='table table-striped', border=0)
+        # Run critical tasks concurrently
+        try:
+            with ThreadPoolExecutor(max_workers=2) as executor:
+                future_anomalies = executor.submit(detect_anomalies, filtered_df)
+                future_amc = executor.submit(check_amc_reminders, filtered_df, datetime.now())
+                summary = f"Step 1: Summary Report\n{summarize_logs(filtered_df)}"
+                anomalies, anomalies_df = future_anomalies.result()
+                anomalies = f"Anomaly Detection\n{anomalies}"
+                amc_reminders, reminders_df = future_amc.result()
+                amc_reminders = f"AMC Reminders\n{amc_reminders}"
+                insights = f"Dashboard Insights\n{generate_dashboard_insights(filtered_df)}"
+        except Exception as e:
+            logging.error(f"Concurrent task execution failed: {str(e)}")
+            summary = "Failed to generate summary due to processing error."
+            anomalies = "Anomaly detection failed due to processing error."
+            amc_reminders = "AMC reminders failed due to processing error."
+            insights = "Insights generation failed due to processing error."
+            anomalies_df = pd.DataFrame()
+        # Generate charts sequentially
+        usage_chart = create_usage_chart(filtered_df)
+        downtime_chart = create_downtime_chart(filtered_df)
+        daily_log_chart = create_daily_log_trends_chart(filtered_df)
+        weekly_uptime_chart = create_weekly_uptime_chart(filtered_df)
+        anomaly_alerts_chart = create_anomaly_alerts_chart(anomalies_df)
+        device_cards = generate_device_cards(filtered_df)
         elapsed_time = time.time() - start_time
         logging.info(f"Processing completed in {elapsed_time:.2f} seconds")
+        if elapsed_time > 3:
+            logging.warning(f"Processing time exceeded 3 seconds: {elapsed_time:.2f} seconds")
+        return (summary, preview_html, usage_chart, device_cards, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart, anomalies, amc_reminders, insights, None, current_modified_time, df, filtered_df)
     except Exception as e:
         logging.error(f"Failed to process file: {str(e)}")
+        return f"Error: {str(e)}", pd.DataFrame(), None, '<p>Error processing data.</p>', None, None, None, None, None, None, None, None, last_modified_state, cached_df_state, cached_filtered_df_state
+# Generate PDF separately
+async def generate_pdf(summary, preview_html, usage_chart, device_cards, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart, anomalies, amc_reminders, insights):
+    try:
+        preview_df = pd.read_html(preview_html)[0]
+        pdf_file = generate_pdf_content(summary, preview_df, anomalies, amc_reminders, insights, device_cards, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart)
+        return pdf_file
+    except Exception as e:
+        logging.error(f"Failed to generate PDF: {str(e)}")
+        return None
 # Update filters
+def update_filters(file_obj, current_file_state):
+    if not file_obj or file_obj.name == current_file_state:
+        return gr.update(), gr.update(), current_file_state
     try:
         with open(file_obj.name, 'rb') as f:
             csv_content = f.read().decode('utf-8')
         lab_site_options = ['All'] + [site for site in df['lab_site'].dropna().astype(str).unique().tolist() if site.strip()] if 'lab_site' in df.columns else ['All']
         equipment_type_options = ['All'] + [equip for equip in df['equipment_type'].dropna().astype(str).unique().tolist() if equip.strip()] if 'equipment_type' in df.columns else ['All']
+        return gr.update(choices=lab_site_options, value='All'), gr.update(choices=equipment_type_options, value='All'), file_obj.name
     except Exception as e:
         logging.error(f"Failed to update filters: {str(e)}")
+        return gr.update(choices=['All'], value='All'), gr.update(choices=['All'], value='All'), current_file_state
 # Gradio Interface
 try:
         .table th {background-color: #f2f2f2;}
         .table tr:nth-child(even) {background-color: #f9f9f9;}
     """) as iface:
+        gr.Markdown("<h1>LabOps Log Analyzer Dashboard</h1>")
+        gr.Markdown("Upload a CSV file to analyze. Click 'Analyze' to refresh the dashboard. Use 'Export PDF' for report download.")
         last_modified_state = gr.State(value=None)
+        current_file_state = gr.State(value=None)
+        cached_df_state = gr.State(value=None)
+        cached_filtered_df_state = gr.State(value=None)
         with gr.Row():
             with gr.Column(scale=1):
                     lab_site_filter = gr.Dropdown(label="Lab Site", choices=['All'], value='All', interactive=True)
                     equipment_type_filter = gr.Dropdown(label="Equipment Type", choices=['All'], value='All', interactive=True)
                     date_range_filter = gr.Slider(label="Date Range (Days from Today)", minimum=-365, maximum=0, step=1, value=[-30, 0])
+                submit_button = gr.Button("Analyze", variant="primary")
+                pdf_button = gr.Button("Export PDF", variant="secondary")
             with gr.Column(scale=2):
                 with gr.Group(elem_classes="dashboard-container"):
                         gr.Markdown("### Step 5: AMC Reminders")
                         amc_output = gr.Markdown()
                     with gr.Group(elem_classes="dashboard-section"):
+                        gr.Markdown("### Step 6: Insights")
                         insights_output = gr.Markdown()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Export Report")
+                        pdf_output = gr.File(label="Download Status Report as PDF")
         file_input.change(
             fn=update_filters,
+            inputs=[file_input, current_file_state],
+            outputs=[lab_site_filter, equipment_type_filter, current_file_state],
             queue=False
         )
         submit_button.click(
             fn=process_logs,
+            inputs=[file_input, lab_site_filter, equipment_type_filter, date_range_filter, last_modified_state, cached_df_state, cached_filtered_df_state],
+            outputs=[summary_output, preview_output, usage_chart_output, device_cards_output, daily_log_trends_output, weekly_uptime_output, anomaly_alerts_output, downtime_chart_output, anomaly_output, amc_output, insights_output, pdf_output, last_modified_state, cached_df_state, cached_filtered_df_state]
+        )
+        pdf_button.click(
+            fn=generate_pdf,
+            inputs=[summary_output, preview_output, usage_chart_output, device_cards_output, daily_log_trends_output, weekly_uptime_output, anomaly_alerts_output, downtime_chart_output, anomaly_output, amc_output, insights_output],
+            outputs=[pdf_output]
         )
     logging.info("Gradio interface initialized successfully")