Spaces:

lavanya121
/

LabOpsDashboard

Sleeping

App Files Files Community

lavanya121 commited on Jun 6, 2025

Commit

dcea1b4

verified ·

1 Parent(s): dd6837c

Update app.py

Browse files

Files changed (1) hide show

app.py +290 -135

app.py CHANGED Viewed

@@ -1,163 +1,318 @@
-from flask import Flask, request, jsonify
-from simple_salesforce import Salesforce
 import pandas as pd
 from datetime import datetime
 import logging
-from sklearn.ensemble import IsolationForest
 from transformers import pipeline
-import torch
-import os
-import time
-import requests
-from requests.exceptions import Timeout
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.FileHandler('app.log'),
-        logging.StreamHandler()
-    ]
-)
-# Initialize Flask app
-app = Flask(__name__)
-# Salesforce credentials
-SF_USERNAME = os.getenv('SF_USERNAME', 'your_username')
-SF_PASSWORD = os.getenv('SF_PASSWORD', 'your_password')
-SF_SECURITY_TOKEN = os.getenv('SF_SECURITY_TOKEN', 'your_token')
-SF_INSTANCE_URL = os.getenv('SF_INSTANCE_URL', 'https://login.salesforce.com')
-sf = None
-summarizer = None
-@app.route('/health', methods=['GET'])
-def health_check():
-    return jsonify({"status": "App is running"}), 200
 def connect_to_salesforce():
-    global sf
-    logging.info("Connecting to Salesforce...")
     try:
-        session = requests.Session()
-        adapter = requests.adapters.HTTPAdapter(max_retries=3)
-        session.mount('https://', adapter)
-        session.request('GET', SF_INSTANCE_URL, timeout=10)
         sf = Salesforce(
-            username=SF_USERNAME,
-            password=SF_PASSWORD,
-            security_token=SF_SECURITY_TOKEN,
-            instance_url=SF_INSTANCE_URL,
-            session=session
         )
-        logging.info("Connected to Salesforce successfully.")
-    except Timeout:
-        logging.error("Salesforce connection timed out.")
-        sf = None
     except Exception as e:
-        logging.error(f"Salesforce connection error: {e}")
-        sf = None
-def load_huggingface_model():
-    global summarizer
-    if summarizer is None:
-        logging.info("Loading Hugging Face model...")
-        try:
-            device = 0 if torch.cuda.is_available() else -1
-            summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)
-            logging.info("Model loaded.")
-        except Exception as e:
-            logging.error(f"Model load error: {e}")
-            summarizer = None
-def fetch_smartlog_records(lab_site=None, start_date=None, end_date=None, equipment_type=None):
-    if sf is None:
-        raise Exception("Salesforce connection not established.")
     try:
-        logging.info("Fetching SmartLog records...")
         query = """
-            SELECT Device_Id__c, Log_Type__c, Status__c, Timestamp__c,
-                   Usage_Hours__c, Downtime__c, AMC_Date__c
-            FROM SmartLog__c
-        """
-        conditions = []
-        if lab_site:
-            conditions.append(f"Lab_Site__c = '{lab_site}'")
-        if start_date:
-            conditions.append(f"Timestamp__c >= {start_date}")
-        if end_date:
-            conditions.append(f"Timestamp__c <= {end_date}")
-        if equipment_type:
-            conditions.append(f"Log_Type__c = '{equipment_type}'")
-        if conditions:
-            query += " WHERE " + " AND ".join(conditions)
         result = sf.query_all(query)
-        records = result['records']
-        data = [{
-            'device_id': r['Device_Id__c'],
-            'log_type': r['Log_Type__c'],
-            'status': r['Status__c'],
-            'timestamp': r['Timestamp__c'],
-            'usage_hours': r['Usage_Hours__c'],
-            'downtime': r['Downtime__c'],
-            'amc_date': r['AMC_Date__c']
-        } for r in records]
-        df = pd.DataFrame(data)
-        df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
-        df['amc_date'] = pd.to_datetime(df['amc_date'], errors='coerce')
         return df
     except Exception as e:
-        logging.error(f"Error fetching records: {e}")
         raise e
-def summarize_logs(df):
-    load_huggingface_model()
-    if summarizer is None:
-        return "Model not available"
-    text = ""
-    for _, row in df.iterrows():
-        text += f"Device {row['device_id']} had status {row['status']} on {row['timestamp'].strftime('%Y-%m-%d')}.\n"
-    if len(text) < 20:
-        return "Not enough log data for summarization."
     try:
-        summary = summarizer(text[:1024], max_length=100, min_length=30, do_sample=False)[0]['summary_text']
-        return summary
     except Exception as e:
-        logging.error(f"Summarization failed: {e}")
-        return "Error summarizing logs"
-@app.route('/summarize', methods=['POST'])
-def summarize_endpoint():
     try:
-        data = request.json
-        lab_site = data.get("lab_site")
-        start_date = data.get("start_date")
-        end_date = data.get("end_date")
-        equipment_type = data.get("equipment_type")
-        df = fetch_smartlog_records(lab_site, start_date, end_date, equipment_type)
-        summary = summarize_logs(df)
-        return jsonify({
-            "summary": summary,
-            "records_fetched": len(df)
-        })
     except Exception as e:
-        logging.error(f"API error: {e}")
-        return jsonify({"error": str(e)}), 500
-if __name__ == '__main__':
-    connect_to_salesforce()
-    app.run(host='0.0.0.0', port=5000)

+import gradio as gr
 import pandas as pd
 from datetime import datetime
 import logging
+import plotly.express as px
+from sklearn.ensemble import IsolationForest  # For anomaly detection
 from transformers import pipeline
+import torch  # For GPU availability check
+from simple_salesforce import Salesforce  # For Salesforce connection
+# Configure logging for debugging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Salesforce credentials (replace with your actual credentials or use environment variables)
+SALESFORCE_USERNAME = "your_username"
+SALESFORCE_PASSWORD = "your_password"
+SALESFORCE_SECURITY_TOKEN = "your_security_token"
+SALESFORCE_DOMAIN = "login"  # Use "test" for sandbox, "login" for production
+# Preload Hugging Face summarization model at startup
+logging.info("Preloading Hugging Face model...")
+try:
+    device = 0 if torch.cuda.is_available() else -1
+    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)  # Lighter model
+    logging.info(f"Hugging Face model preloaded successfully on device: {'GPU' if device == 0 else 'CPU'}")
+except Exception as e:
+    logging.error(f"Failed to preload model: {str(e)}")
+    raise e
+# Connect to Salesforce
 def connect_to_salesforce():
     try:
         sf = Salesforce(
+            username=SALESFORCE_USERNAME,
+            password=SALESFORCE_PASSWORD,
+            security_token=SALESFORCE_SECURITY_TOKEN,
+            domain=SALESFORCE_DOMAIN
         )
+        logging.info("Successfully connected to Salesforce")
+        return sf
     except Exception as e:
+        logging.error(f"Failed to connect to Salesforce: {str(e)}")
+        raise e
+# Fetch data from Salesforce
+def fetch_salesforce_data(sf, row_limit=10000, progress=gr.Progress()):
+    progress(0.05, "Fetching data from Salesforce...")
     try:
+        # Query Salesforce for LabEquipmentLog__c object
         query = """
+            SELECT Device_ID__c, Log_Type__c, Status__c, Timestamp__c,
+                   Usage_Hours__c, Downtime__c, AMC_Date__c
+            FROM LabEquipmentLog__c
+            LIMIT {}
+        """.format(row_limit)
         result = sf.query_all(query)
+        records = result["records"]
+        # Convert to DataFrame
+        df = pd.DataFrame(records)
+        df = df.rename(columns={
+            "Device_ID__c": "device_id",
+            "Log_Type__c": "log_type",
+            "Status__c": "status",
+            "Timestamp__c": "timestamp",
+            "Usage_Hours__c": "usage_hours",
+            "Downtime__c": "downtime",
+            "AMC_Date__c": "amc_date"
+        })
+        # Ensure proper data types
+        df["timestamp"] = pd.to_datetime(df["timestamp"], errors='coerce')
+        df["amc_date"] = pd.to_datetime(df["amc_date"], errors='coerce')
+        df["usage_hours"] = df["usage_hours"].astype("float32", errors='ignore')
+        df["downtime"] = df["downtime"].astype("float32", errors='ignore')
+        df["device_id"] = df["device_id"].astype("string")
+        logging.info(f"Fetched {len(df)} records from Salesforce")
         return df
     except Exception as e:
+        logging.error(f"Failed to fetch Salesforce data: {str(e)}")
         raise e
+# Format summary prompt and generate report
+def summarize_logs(df, progress=gr.Progress()):
+    progress(0.1, "Generating summary report...")
+    try:
+        total_devices = df["device_id"].nunique()
+        most_used = df.groupby("device_id")["usage_hours"].sum().idxmax() if not df.empty else "N/A"
+        prompt = f"Maintenance logs: {total_devices} devices. Most used: {most_used}."
+        summary = summarizer(prompt, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
+        logging.info("Summary generated successfully")
+        return summary
+    except Exception as e:
+        logging.error(f"Summary generation failed: {str(e)}")
+        return "Failed to generate summary."
+# Anomaly Detection using Isolation Forest with sampling for large datasets
+def detect_anomalies(df, progress=gr.Progress()):
+    progress(0.4, "Detecting anomalies...")
+    try:
+        if "usage_hours" not in df.columns or "downtime" not in df.columns:
+            logging.warning("Required columns for anomaly detection not found")
+            return "Anomaly detection requires 'usage_hours' and 'downtime' columns."
+        if len(df) > 5000:
+            df = df.sample(n=5000, random_state=42)
+            logging.info("Sampled data for anomaly detection to 5,000 rows")
+        features = df[["usage_hours", "downtime"]].fillna(0)
+        iso_forest = IsolationForest(contamination=0.1, random_state=42, n_jobs=-1)
+        df["anomaly"] = iso_forest.fit_predict(features)
+        anomalies = df[df["anomaly"] == -1][["device_id", "usage_hours", "downtime", "timestamp"]]
+        if anomalies.empty:
+            return "No anomalies detected."
+        anomaly_lines = ["**Detected Anomalies:**"]
+        for idx, row in anomalies.head(5).iterrows():
+            anomaly_lines.append(f"- Device ID: {row['device_id']}, Usage Hours: {row['usage_hours']}, Downtime: {row['downtime']}, Timestamp: {row['timestamp']}")
+        anomaly_list = "\n".join(anomaly_lines)
+        logging.info("Anomalies detected successfully")
+        return anomaly_list
+    except Exception as e:
+        logging.error(f"Anomaly detection failed: {str(e)}")
+        return f"Anomaly detection failed: {str(e)}"
+# AMC Reminders based on device and AMC date
+def check_amc_reminders(df, current_date, progress=gr.Progress()):
+    progress(0.6, "Checking AMC reminders...")
     try:
+        if "device_id" not in df.columns or "amc_date" not in df.columns:
+            logging.warning("Required columns for AMC reminders not found")
+            return "AMC reminders require 'device_id' and 'amc_date' columns."
+        df["amc_date"] = pd.to_datetime(df["amc_date"], errors='coerce')
+        current_date = pd.to_datetime(current_date)
+        df["days_to_amc"] = (df["amc_date"] - current_date).dt.days
+        reminders = df[(df["days_to_amc"] >= 0) & (df["days_to_amc"] <= 30)][["device_id", "amc_date"]]
+        if reminders.empty:
+            return "No AMC reminders due within the next 30 days."
+        reminder_lines = ["**Upcoming AMC Reminders:**"]
+        for idx, row in reminders.head(5).iterrows():
+            reminder_lines.append(f"- Device ID: {row['device_id']}, AMC Date: {row['amc_date']}")
+        reminder_list = "\n".join(reminder_lines)
+        logging.info("AMC reminders generated successfully")
+        return reminder_list
     except Exception as e:
+        logging.error(f"AMC reminder generation failed: {str(e)}")
+        return f"AMC reminder generation failed: {str(e)}"
+# Dashboard Insights (AI-generated executive-level insights)
+def generate_dashboard_insights(df, progress=gr.Progress()):
+    progress(0.8, "Generating dashboard insights...")
     try:
+        total_devices = df["device_id"].nunique()
+        avg_usage = df["usage_hours"].mean() if "usage_hours" in df.columns else 0
+        prompt = f"Insights: {total_devices} devices, avg usage {avg_usage:.2f} hours."
+        insights = summarizer(prompt, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
+        logging.info("Dashboard insights generated successfully")
+        return insights
+    except Exception as e:
+        logging.error(f"Dashboard insights generation failed: {str(e)}")
+        return f"Dashboard insights generation failed: {str(e)}"
+# Create a bar chart for usage hours per device
+def create_usage_chart(df, progress=gr.Progress()):
+    progress(0.9, "Creating usage chart...")
+    try:
+        usage_data = df.groupby("device_id")["usage_hours"].sum().reset_index()
+        if len(usage_data) > 5:
+            usage_data = usage_data.nlargest(5, "usage_hours")
+            logging.info("Limited chart data to top 5 devices")
+        custom_colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']
+        fig = px.bar(
+            usage_data,
+            x="device_id",
+            y="usage_hours",
+            title="Usage Hours per Device",
+            labels={"device_id": "Device ID", "usage_hours": "Usage Hours"},
+            color="device_id",
+            color_discrete_sequence=custom_colors
+        )
+        fig.update_layout(
+            title_font_size=16,
+            margin=dict(l=20, r=20, t=40, b=20),
+            plot_bgcolor="white",
+            paper_bgcolor="white",
+            font=dict(size=12)
+        )
+        return fig
+    except Exception as e:
+        logging.error(f"Failed to create usage chart: {str(e)}")
+        return None
+# Main Gradio function
+async def process_logs(progress=gr.Progress()):
+    try:
+        progress(0, "Starting Salesforce data processing...")
+        # Connect to Salesforce and fetch data
+        sf = connect_to_salesforce()
+        df = fetch_salesforce_data(sf, row_limit=10000, progress=progress)
+        if df.empty:
+            logging.warning("No data retrieved from Salesforce")
+            return "No data available from Salesforce.", "No data to preview.", None, "No anomalies detected.", "No AMC reminders.", "No insights generated."
+        # Step 1: Summary Report
+        progress(0.2, "Generating summary...")
+        summary = f"**Step 1: Summary Report**  \n{summarize_logs(df, progress)}"
+        # Step 2: Log Preview
+        progress(0.3, "Previewing logs...")
+        if not df.empty:
+            preview_lines = ["**Step 2: Log Preview (First 5 Rows)**"]
+            for idx, row in df.head().iterrows():
+                preview_lines.append(f"**Row {idx + 1}:** Device ID: {row['device_id']}, Timestamp: {row['timestamp']}, Usage Hours: {row['usage_hours']}, Downtime: {row['downtime']}, AMC Date: {row['amc_date']}, Log Type: {row['log_type']}, Status: {row['status']}")
+            preview = "\n".join(preview_lines)
+        else:
+            preview = "**Step 2: Log Preview**  \nNo data available."
+        # Step 3: Usage Chart
+        chart = create_usage_chart(df, progress)
+        # Step 4: Anomaly Detection
+        anomalies = f"**Step 3: Anomaly Detection**  \n{detect_anomalies(df, progress)}"
+        # Step 5: AMC Reminders
+        amc_reminders = f"**Step 4: AMC Reminders**  \n{check_amc_reminders(df, datetime.now(), progress)}"
+        # Step 6: Dashboard Insights
+        insights = f"**Step 5: Dashboard Insights (AI)**  \n{generate_dashboard_insights(df, progress)}"
+        progress(1.0, "Processing complete!")
+        return summary, preview, chart, anomalies, amc_reminders, insights
     except Exception as e:
+        logging.error(f"Failed to process Salesforce data: {str(e)}")
+        return f"Failed to process Salesforce data: {str(e)}", None, None, None, None, None
+# Gradio Interface with Step-by-Step Layout
+try:
+    logging.info("Initializing Gradio Blocks interface...")
+    with gr.Blocks(css="""
+        .dashboard-container {border: 1px solid #e0e0e0; padding: 10px; border-radius: 5px; background-color: #f9f9f9;}
+        .dashboard-title {font-size: 24px; font-weight: bold; margin-bottom: 5px;}
+        .dashboard-section {margin-bottom: 5px;}
+        .dashboard-section h3 {font-size: 18px; margin-bottom: 2px;}
+        .dashboard-section p {margin: 1px 0; line-height: 1.2;}
+        .dashboard-section li {margin: 1px 0; line-height: 1.2;}
+        .dashboard-section ul {margin: 2px 0; padding-left: 20px;}
+    """) as iface:
+        gr.Markdown("<h1>LabOps Log Analyzer Dashboard (Salesforce + Hugging Face AI)</h1>")
+        gr.Markdown("Analyze lab equipment logs fetched from Salesforce.")
+        with gr.Row():
+            with gr.Column(scale=1):
+                submit_button = gr.Button("Fetch and Analyze Salesforce Data", variant="primary")
+            with gr.Column(scale=2):
+                with gr.Group(elem_classes="dashboard-container"):
+                    gr.Markdown("<div class='dashboard-title'>Analysis Results (Step-by-Step)</div>")
+                    # Step 1: Summary Report
+                    with gr.Group(elem_classes="dashboard-section"):
+                        gr.Markdown("### Step 1: Summary Report")
+                        summary_output = gr.Markdown()
+                    # Step 2: Log Preview
+                    with gr.Group(elem_classes="dashboard-section"):
+                        gr.Markdown("### Step 2: Log Preview")
+                        preview_output = gr.Markdown()
+                    # Step 3: Usage Chart
+                    with gr.Group(elem_classes="dashboard-section"):
+                        gr.Markdown("### Step 3: Usage Chart")
+                        chart_output = gr.Plot()
+                    # Step 4: Anomaly Detection
+                    with gr.Group(elem_classes="dashboard-section"):
+                        gr.Markdown("### Step 4: Anomaly Detection")
+                        anomaly_output = gr.Markdown()
+                    # Step 5: AMC Reminders
+                    with gr.Group(elem_classes="dashboard-section"):
+                        gr.Markdown("### Step 5: AMC Reminders")
+                        amc_output = gr.Markdown()
+                    # Step 6: Dashboard Insights
+                    with gr.Group(elem_classes="dashboard-section"):
+                        gr.Markdown("### Step 6: Dashboard Insights (AI)")
+                        insights_output = gr.Markdown()
+        submit_button.click(
+            fn=process_logs,
+            inputs=[],
+            outputs=[summary_output, preview_output, chart_output, anomaly_output, amc_output, insights_output]
+        )
+    logging.info("Gradio interface initialized successfully")
+except Exception as e:
+    logging.error(f"Failed to initialize Gradio interface: {str(e)}")
+    raise e
+if __name__ == "__main__":
+    try:
+        logging.info("Launching Gradio interface...")
+        iface.launch(server_name="0.0.0.0", server_port=7860, debug=True, share=False)
+        logging.info("Gradio interface launched successfully")
+    except Exception as e:
+        logging.error(f"Failed to launch Gradio interface: {str(e)}")
+        print(f"Error launching app: {str(e)}")
+        raise e