lavanya121 commited on
Commit
1565ec3
·
verified ·
1 Parent(s): 20123f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -174
app.py CHANGED
@@ -1,194 +1,195 @@
1
- import gradio as gr
 
2
  import pandas as pd
3
- from datetime import datetime
4
  import logging
5
- import plotly.express as px
6
  from sklearn.ensemble import IsolationForest
7
  from transformers import pipeline
 
 
 
 
 
8
 
9
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
 
 
 
 
 
 
10
 
11
- try:
12
- logging.info("Attempting to load Hugging Face model...")
13
- summarizer = pipeline("text2text-generation", model="google/flan-t5-base")
14
- logging.info("Hugging Face model loaded successfully")
15
- except Exception as e:
16
- logging.error(f"Failed to load model: {str(e)}")
17
- raise e
18
 
19
- def summarize_logs(df):
20
- try:
21
- total_devices = df["device_id"].nunique()
22
- avg_uptime = "97%"
23
- most_used = df.groupby("device_id")["usage_hours"].sum().idxmax() if not df.empty else "N/A"
24
- downtime_events = 3
25
- prompt = (f"Summarize maintenance and usage logs. There were {total_devices} devices. The most used device was {most_used}.")
26
- summary = summarizer(prompt, max_length=200, do_sample=False)[0]["generated_text"]
27
- logging.info("Summary generated successfully")
28
- return summary
29
- except Exception as e:
30
- logging.error(f"Summary generation failed: {str(e)}")
31
- return "Failed to generate summary."
32
 
33
- def detect_anomalies(df):
34
- try:
35
- if "usage_hours" not in df.columns or "downtime" not in df.columns:
36
- logging.warning("Required columns for anomaly detection not found")
37
- return "Anomaly detection requires 'usage_hours' and 'downtime' columns."
38
- features = df[["usage_hours", "downtime"]].fillna(0)
39
- iso_forest = IsolationForest(contamination=0.1, random_state=42)
40
- df["anomaly"] = iso_forest.fit_predict(features)
41
- anomalies = df[df["anomaly"] == -1][["device_id", "usage_hours", "downtime", "timestamp"]]
42
- if anomalies.empty:
43
- return "No anomalies detected."
44
- anomaly_lines = ["**Detected Anomalies:**"]
45
- for idx, row in anomalies.iterrows():
46
- anomaly_lines.append(f"- Device ID: {row['device_id']}")
47
- anomaly_lines.append(f" Usage Hours: {row['usage_hours']}")
48
- anomaly_lines.append(f" Downtime: {row['downtime']}")
49
- anomaly_lines.append(f" Timestamp: {row['timestamp']}")
50
- anomaly_lines.append("---")
51
- anomaly_list = "\n".join(anomaly_lines)
52
- logging.info("Anomalies detected successfully")
53
- return anomaly_list
54
- except Exception as e:
55
- logging.error(f"Anomaly detection failed: {str(e)}")
56
- return f"Anomaly detection failed: {str(e)}"
57
 
58
- def check_amc_reminders(df, current_date):
59
- try:
60
- if "device_id" not in df.columns or "amc_date" not in df.columns:
61
- logging.warning("Required columns for AMC reminders not found")
62
- return "AMC reminders require 'device_id' and 'amc_date' columns."
63
- df["amc_date"] = pd.to_datetime(df["amc_date"])
64
- current_date = pd.to_datetime(current_date)
65
- df["days_to_amc"] = (df["amc_date"] - current_date).dt.days
66
- reminders = df[(df["days_to_amc"] >= 0) & (df["days_to_amc"] <= 30)][["device_id", "amc_date"]]
67
- if reminders.empty:
68
- return "No AMC reminders due within the next 30 days."
69
- reminder_lines = ["**Upcoming AMC Reminders:**"]
70
- for idx, row in reminders.iterrows():
71
- reminder_lines.append(f"- Device ID: {row['device_id']}")
72
- reminder_lines.append(f" AMC Date: {row['amc_date']}")
73
- reminder_lines.append("---")
74
- reminder_list = "\n".join(reminder_lines)
75
- logging.info("AMC reminders generated successfully")
76
- return reminder_list
77
- except Exception as e:
78
- logging.error(f"AMC reminder generation failed: {str(e)}")
79
- return f"AMC reminder generation failed: {str(e)}"
80
 
81
- def generate_dashboard_insights(df):
 
 
 
 
82
  try:
83
- total_devices = df["device_id"].nunique()
84
- avg_usage = df["usage_hours"].mean() if "usage_hours" in df.columns else 0
85
- prompt = (f"Generate executive-level insights. There were {total_devices} devices with an average usage of {avg_usage:.2f} hours.")
86
- insights = summarizer(prompt, max_length=150, do_sample=False)[0]["generated_text"]
87
- logging.info("Dashboard insights generated successfully")
88
- return insights
 
 
 
 
 
 
 
 
 
 
 
 
89
  except Exception as e:
90
- logging.error(f"Dashboard insights generation failed: {str(e)}")
91
- return f"Dashboard insights generation failed: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
- def create_usage_chart(df):
 
 
 
94
  try:
95
- usage_data = df.groupby("device_id")["usage_hours"].sum().reset_index()
96
- fig = px.bar(usage_data, x="device_id", y="usage_hours", title="Usage Hours per Device",
97
- labels={"device_id": "Device ID", "usage_hours": "Usage Hours"},
98
- color="usage_hours", color_continuous_scale="Blues")
99
- fig.update_layout(title_font_size=16, margin=dict(l=20, r=20, t=40, b=20),
100
- plot_bgcolor="white", paper_bgcolor="white", font=dict(size=12))
101
- return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  except Exception as e:
103
- logging.error(f"Failed to create usage chart: {str(e)}")
104
- return None
 
 
 
 
 
 
105
 
106
- def process_logs(file_obj):
107
  try:
108
- if file_obj is None:
109
- logging.warning("No file uploaded, returning empty results")
110
- return "No file uploaded.", "No data to preview.", None, "No anomalies detected.", "No AMC reminders.", "No insights generated."
111
- file_name = file_obj.name if hasattr(file_obj, 'name') else file_obj
112
- logging.info(f"Processing file: {file_name}")
113
- if not file_name.endswith(".csv"):
114
- logging.error("Unsupported file format")
115
- return "Unsupported file format. Please upload a CSV file.", None, None, None, None, None
116
- df = pd.read_csv(file_name)
117
- logging.info(f"File loaded successfully with {len(df)} rows")
118
- try:
119
- df["timestamp"] = pd.to_datetime(df["timestamp"])
120
- except Exception as e:
121
- logging.error(f"Date conversion failed: {str(e)}")
122
- return f"Failed to convert timestamp to datetime: {str(e)}", None, None, None, None, None
123
- if df.empty:
124
- logging.warning("No data in the file")
125
- return "No data available in the file.", "No data to preview.", None, "No anomalies detected.", "No AMC reminders.", "No insights generated."
126
- summary = f"**Step 1: Summary Report**\n\n{summarize_logs(df)}\n\n---\n"
127
- if not df.empty:
128
- preview_lines = ["**Step 2: Log Preview (First 5 Rows)**\n"]
129
- for idx, row in df.head().iterrows():
130
- preview_lines.append(f"**Row {idx + 1}:**")
131
- preview_lines.append(f"- Device ID: {row['device_id']}")
132
- preview_lines.append(f"- Timestamp: {row['timestamp']}")
133
- preview_lines.append(f"- Usage Hours: {row['usage_hours']}")
134
- preview_lines.append(f"- Downtime: {row['downtime']}")
135
- preview_lines.append(f"- AMC Date: {row['amc_date']}")
136
- preview_lines.append("---")
137
- preview = "\n".join(preview_lines) + "\n---\n"
138
- else:
139
- preview = "**Step 2: Log Preview**\n\nNo data available.\n\n---\n"
140
- chart = create_usage_chart(df)
141
- anomalies = f"**Step 3: Anomaly Detection**\n\n{detect_anomalies(df)}\n\n---\n"
142
- amc_reminders = f"**Step 4: AMC Reminders**\n\n{check_amc_reminders(df, datetime.now())}\n\n---\n"
143
- insights = f"**Step 5: Dashboard Insights (AI)**\n\n{generate_dashboard_insights(df)}\n\n---\n"
144
- return summary, preview, chart, anomalies, amc_reminders, insights
145
  except Exception as e:
146
- logging.error(f"Failed to process file: {str(e)}")
147
- return f"Failed to process file: {str(e)}", None, None, None, None, None
148
-
149
- try:
150
- logging.info("Initializing Gradio Blocks interface...")
151
- with gr.Blocks(css=""".dashboard-container {border: 1px solid #e0e0e0; padding: 10px; border-radius: 5px; background-color: #f9f9f9;} .dashboard-title {font-size: 24px; font-weight: bold; margin-bottom: 10px;} .dashboard-section {margin-bottom: 15px;} .dashboard-section h3 {font-size: 18px; margin-bottom: 5px;}""") as iface:
152
- gr.Markdown("<h1>LabOps Log Analyzer Dashboard (Hugging Face AI)</h1>")
153
- gr.Markdown("Upload a CSV file containing lab equipment logs to analyze usage.")
154
- with gr.Row():
155
- with gr.Column(scale=1):
156
- file_input = gr.File(label="Upload Logs (CSV)", file_types=[".csv"])
157
- submit_button = gr.Button("Submit", variant="primary")
158
- with gr.Column(scale=2):
159
- with gr.Group(elem_classes="dashboard-container"):
160
- gr.Markdown("<div class='dashboard-title'>Analysis Results (Step-by-Step)</div>")
161
- with gr.Group(elem_classes="dashboard-section"):
162
- gr.Markdown("### Step 1: Summary Report")
163
- summary_output = gr.Markdown()
164
- with gr.Group(elem_classes="dashboard-section"):
165
- gr.Markdown("### Step 2: Log Preview")
166
- preview_output = gr.Markdown()
167
- with gr.Group(elem_classes="dashboard-section"):
168
- gr.Markdown("### Step 3: Usage Chart")
169
- chart_output = gr.Plot()
170
- with gr.Group(elem_classes="dashboard-section"):
171
- gr.Markdown("### Step 4: Anomaly Detection")
172
- anomaly_output = gr.Markdown()
173
- with gr.Group(elem_classes="dashboard-section"):
174
- gr.Markdown("### Step 5: AMC Reminders")
175
- amc_output = gr.Markdown()
176
- with gr.Group(elem_classes="dashboard-section"):
177
- gr.Markdown("### Step 6: Dashboard Insights (AI)")
178
- insights_output = gr.Markdown()
179
- submit_button.click(fn=process_logs, inputs=[file_input],
180
- outputs=[summary_output, preview_output, chart_output, anomaly_output, amc_output, insights_output])
181
- logging.info("Gradio interface initialized successfully")
182
- except Exception as e:
183
- logging.error(f"Failed to initialize Gradio interface: {str(e)}")
184
- raise e
185
-
186
- if __name__ == "__main__":
187
  try:
188
- logging.info("Launching Gradio interface...")
189
- iface.launch(server_name="0.0.0.0", server_port=7860, debug=True, share=False)
190
- logging.info("Gradio interface launched successfully")
191
  except Exception as e:
192
- logging.error(f"Failed to launch Gradio interface: {str(e)}")
193
- print(f"Error launching app: {str(e)}")
194
- raise e
 
 
1
+ from flask import Flask, request, jsonify
2
+ from simple_salesforce import Salesforce
3
  import pandas as pd
4
+ from datetime import datetime, timedelta
5
  import logging
 
6
  from sklearn.ensemble import IsolationForest
7
  from transformers import pipeline
8
+ import torch
9
+ import os
10
+ import time
11
+ import requests
12
+ from requests.exceptions import Timeout
13
 
14
+ # Configure logging
15
+ logging.basicConfig(
16
+ level=logging.INFO,
17
+ format='%(asctime)s - %(levelname)s - %(message)s',
18
+ handlers=[
19
+ logging.FileHandler('app.log'),
20
+ logging.StreamHandler()
21
+ ]
22
+ )
23
 
24
+ # Initialize Flask app
25
+ app = Flask(__name__)
 
 
 
 
 
26
 
27
+ # Salesforce credentials (use environment variables for security)
28
+ SF_USERNAME = os.getenv('SF_USERNAME', 'your_salesforce_username')
29
+ SF_PASSWORD = os.getenv('SF_PASSWORD', 'your_salesforce_password')
30
+ SF_SECURITY_TOKEN = os.getenv('SF_SECURITY_TOKEN', 'your_security_token')
31
+ SF_INSTANCE_URL = os.getenv('SF_INSTANCE_URL', 'https://login.salesforce.com')
 
 
 
 
 
 
 
 
32
 
33
+ # Global variables
34
+ sf = None
35
+ summarizer = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ # Health check endpoint
38
+ @app.route('/health', methods=['GET'])
39
+ def health_check():
40
+ return jsonify({"status": "App is running"}), 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ # Connect to Salesforce
43
+ def connect_to_salesforce():
44
+ global sf
45
+ logging.info("Attempting to connect to Salesforce...")
46
+ start_time = time.time()
47
  try:
48
+ session = requests.Session()
49
+ adapter = requests.adapters.HTTPAdapter(max_retries=3)
50
+ session.mount('https://', adapter)
51
+ session.request('GET', SF_INSTANCE_URL, timeout=10)
52
+
53
+ sf = Salesforce(
54
+ username=SF_USERNAME,
55
+ password=SF_PASSWORD,
56
+ security_token=SF_SECURITY_TOKEN,
57
+ instance_url=SF_INSTANCE_URL,
58
+ session=session
59
+ )
60
+ logging.info(f"Connected to Salesforce in {time.time() - start_time:.2f} seconds")
61
+ return True
62
+ except Timeout:
63
+ logging.error("Salesforce connection timed out after 10 seconds")
64
+ sf = None
65
+ return False
66
  except Exception as e:
67
+ logging.error(f"Failed to connect to Salesforce: {str(e)}")
68
+ sf = None
69
+ return False
70
+
71
+ # Load Hugging Face model
72
+ def load_huggingface_model():
73
+ global summarizer
74
+ if summarizer is None:
75
+ logging.info("Loading Hugging Face model...")
76
+ start_time = time.time()
77
+ try:
78
+ device = 0 if torch.cuda.is_available() else -1
79
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)
80
+ logging.info(f"Hugging Face model loaded in {time.time() - start_time:.2f} seconds on device: {'GPU' if device == 0 else 'CPU'}")
81
+ except Exception as e:
82
+ logging.error(f"Failed to load Hugging Face model: {str(e)}")
83
+ summarizer = None
84
 
85
+ # Fetch SmartLog records
86
+ def fetch_smartlog_records(lab_site=None, start_date=None, end_date=None, equipment_type=None):
87
+ if sf is None:
88
+ raise Exception("Salesforce connection not established")
89
  try:
90
+ logging.info("Fetching SmartLog records...")
91
+ query = "SELECT Device_Id__c, Log_Type__c, Status__c, Timestamp__c, Usage_Hours__c, Downtime__c, AMC_Date__c FROM SmartLog__c"
92
+ conditions = []
93
+ params = {}
94
+ if lab_site:
95
+ conditions.append("Lab_Site__c = :lab_site")
96
+ params['lab_site'] = lab_site
97
+ if start_date:
98
+ conditions.append("Timestamp__c >= :start_date")
99
+ params['start_date'] = start_date
100
+ if end_date:
101
+ conditions.append("Timestamp__c <= :end_date")
102
+ params['end_date'] = end_date
103
+ if equipment_type:
104
+ conditions.append("Log_Type__c = :equipment_type")
105
+ params['equipment_type'] = equipment_type
106
+
107
+ if conditions:
108
+ query += " WHERE " + " AND ".join(conditions)
109
+
110
+ result = sf.query_all(query, **params)
111
+ records = result['records']
112
+ data = [{
113
+ 'device_id': r['Device_Id__c'],
114
+ 'log_type': r['Log_Type__c'],
115
+ 'status': r['Status__c'],
116
+ 'timestamp': r['Timestamp__c'],
117
+ 'usage_hours': r['Usage_Hours__c'],
118
+ 'downtime': r['Downtime__c'],
119
+ 'amc_date': r['AMC_Date__c']
120
+ } for r in records]
121
+ df = pd.DataFrame(data)
122
+ df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
123
+ df['amc_date'] = pd.to_datetime(df['amc_date'], errors='coerce')
124
+ logging.info(f"Fetched {len(df)} SmartLog records")
125
+ return df
126
  except Exception as e:
127
+ logging.error(f"Failed to fetch SmartLog records: {str(e)}")
128
+ raise
129
+
130
+ # Summarize logs
131
+ def summarize_logs(df):
132
+ load_huggingface_model()
133
+ if summarizer is None:
134
+ return {"error": "Hugging Face model not loaded"}
135
 
 
136
  try:
137
+ # Generate summary statistics
138
+ total_records = len(df)
139
+ unique_devices = df['device_id'].nunique()
140
+ avg_usage_hours = df['usage_hours'].mean()
141
+ total_downtime = df['downtime'].sum()
142
+
143
+ # Create text for summarization
144
+ summary_text = (
145
+ f"Analyzed {total_records} SmartLog records from Salesforce. "
146
+ f"There are {unique_devices} unique devices. "
147
+ f"Average usage hours per device is {avg_usage_hours:.2f} hours. "
148
+ f"Total downtime recorded is {total_downtime:.2f} hours. "
149
+ f"Status distribution: {df['status'].value_counts().to_dict()}. "
150
+ )
151
+
152
+ # Generate summary using Hugging Face model
153
+ summary = summarizer(summary_text, max_length=100, min_length=30, do_sample=False)[0]['summary_text']
154
+
155
+ # Detect anomalies using Isolation Forest
156
+ features = df[['usage_hours', 'downtime']].fillna(0)
157
+ iso_forest = IsolationForest(contamination=0.1, random_state=42)
158
+ df['anomaly'] = iso_forest.fit_predict(features)
159
+ anomalies = df[df['anomaly'] == -1][['device_id', 'usage_hours', 'downtime']].to_dict('records')
160
+
161
+ return {
162
+ "summary": summary,
163
+ "statistics": {
164
+ "total_records": total_records,
165
+ "unique_devices": unique_devices,
166
+ "avg_usage_hours": avg_usage_hours,
167
+ "total_downtime": total_downtime
168
+ },
169
+ "anomalies": anomalies
170
+ }
 
 
 
171
  except Exception as e:
172
+ logging.error(f"Failed to summarize logs: {str(e)}")
173
+ return {"error": str(e)}
174
+
175
+ # Main endpoint to fetch and summarize logs
176
+ @app.route('/summarize', methods=['POST'])
177
+ def summarize():
178
+ if not connect_to_salesforce():
179
+ return jsonify({"error": "Failed to connect to Salesforce"}), 500
180
+
181
+ data = request.get_json()
182
+ lab_site = data.get('lab_site')
183
+ start_date = data.get('start_date')
184
+ end_date = data.get('end_date')
185
+ equipment_type = data.get('equipment_type')
186
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  try:
188
+ df = fetch_smartlog_records(lab_site, start_date, end_date, equipment_type)
189
+ result = summarize_logs(df)
190
+ return jsonify(result), 200
191
  except Exception as e:
192
+ return jsonify({"error": str(e)}), 500
193
+
194
+ if __name__ == '__main__':
195
+ app.run(debug=True, host='0.0.0.0', port=5000)