lavanya121 commited on
Commit
dcea1b4
·
verified ·
1 Parent(s): dd6837c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +290 -135
app.py CHANGED
@@ -1,163 +1,318 @@
1
- from flask import Flask, request, jsonify
2
- from simple_salesforce import Salesforce
3
  import pandas as pd
4
  from datetime import datetime
5
  import logging
6
- from sklearn.ensemble import IsolationForest
 
7
  from transformers import pipeline
8
- import torch
9
- import os
10
- import time
11
- import requests
12
- from requests.exceptions import Timeout
13
-
14
- # Configure logging
15
- logging.basicConfig(
16
- level=logging.INFO,
17
- format='%(asctime)s - %(levelname)s - %(message)s',
18
- handlers=[
19
- logging.FileHandler('app.log'),
20
- logging.StreamHandler()
21
- ]
22
- )
23
-
24
- # Initialize Flask app
25
- app = Flask(__name__)
26
-
27
- # Salesforce credentials
28
- SF_USERNAME = os.getenv('SF_USERNAME', 'your_username')
29
- SF_PASSWORD = os.getenv('SF_PASSWORD', 'your_password')
30
- SF_SECURITY_TOKEN = os.getenv('SF_SECURITY_TOKEN', 'your_token')
31
- SF_INSTANCE_URL = os.getenv('SF_INSTANCE_URL', 'https://login.salesforce.com')
32
-
33
- sf = None
34
- summarizer = None
35
-
36
- @app.route('/health', methods=['GET'])
37
- def health_check():
38
- return jsonify({"status": "App is running"}), 200
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  def connect_to_salesforce():
41
- global sf
42
- logging.info("Connecting to Salesforce...")
43
  try:
44
- session = requests.Session()
45
- adapter = requests.adapters.HTTPAdapter(max_retries=3)
46
- session.mount('https://', adapter)
47
- session.request('GET', SF_INSTANCE_URL, timeout=10)
48
-
49
  sf = Salesforce(
50
- username=SF_USERNAME,
51
- password=SF_PASSWORD,
52
- security_token=SF_SECURITY_TOKEN,
53
- instance_url=SF_INSTANCE_URL,
54
- session=session
55
  )
56
- logging.info("Connected to Salesforce successfully.")
57
- except Timeout:
58
- logging.error("Salesforce connection timed out.")
59
- sf = None
60
  except Exception as e:
61
- logging.error(f"Salesforce connection error: {e}")
62
- sf = None
63
-
64
- def load_huggingface_model():
65
- global summarizer
66
- if summarizer is None:
67
- logging.info("Loading Hugging Face model...")
68
- try:
69
- device = 0 if torch.cuda.is_available() else -1
70
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)
71
- logging.info("Model loaded.")
72
- except Exception as e:
73
- logging.error(f"Model load error: {e}")
74
- summarizer = None
75
-
76
- def fetch_smartlog_records(lab_site=None, start_date=None, end_date=None, equipment_type=None):
77
- if sf is None:
78
- raise Exception("Salesforce connection not established.")
79
 
 
 
 
80
  try:
81
- logging.info("Fetching SmartLog records...")
82
  query = """
83
- SELECT Device_Id__c, Log_Type__c, Status__c, Timestamp__c,
84
- Usage_Hours__c, Downtime__c, AMC_Date__c
85
- FROM SmartLog__c
86
- """
87
- conditions = []
88
- if lab_site:
89
- conditions.append(f"Lab_Site__c = '{lab_site}'")
90
- if start_date:
91
- conditions.append(f"Timestamp__c >= {start_date}")
92
- if end_date:
93
- conditions.append(f"Timestamp__c <= {end_date}")
94
- if equipment_type:
95
- conditions.append(f"Log_Type__c = '{equipment_type}'")
96
-
97
- if conditions:
98
- query += " WHERE " + " AND ".join(conditions)
99
-
100
  result = sf.query_all(query)
101
- records = result['records']
102
- data = [{
103
- 'device_id': r['Device_Id__c'],
104
- 'log_type': r['Log_Type__c'],
105
- 'status': r['Status__c'],
106
- 'timestamp': r['Timestamp__c'],
107
- 'usage_hours': r['Usage_Hours__c'],
108
- 'downtime': r['Downtime__c'],
109
- 'amc_date': r['AMC_Date__c']
110
- } for r in records]
111
-
112
- df = pd.DataFrame(data)
113
- df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
114
- df['amc_date'] = pd.to_datetime(df['amc_date'], errors='coerce')
 
 
 
 
 
 
 
 
115
  return df
116
-
117
  except Exception as e:
118
- logging.error(f"Error fetching records: {e}")
119
  raise e
120
 
121
- def summarize_logs(df):
122
- load_huggingface_model()
123
- if summarizer is None:
124
- return "Model not available"
 
 
125
 
126
- text = ""
127
- for _, row in df.iterrows():
128
- text += f"Device {row['device_id']} had status {row['status']} on {row['timestamp'].strftime('%Y-%m-%d')}.\n"
 
 
 
 
129
 
130
- if len(text) < 20:
131
- return "Not enough log data for summarization."
 
 
 
 
 
 
 
 
 
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  try:
134
- summary = summarizer(text[:1024], max_length=100, min_length=30, do_sample=False)[0]['summary_text']
135
- return summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  except Exception as e:
137
- logging.error(f"Summarization failed: {e}")
138
- return "Error summarizing logs"
139
 
140
- @app.route('/summarize', methods=['POST'])
141
- def summarize_endpoint():
 
142
  try:
143
- data = request.json
144
- lab_site = data.get("lab_site")
145
- start_date = data.get("start_date")
146
- end_date = data.get("end_date")
147
- equipment_type = data.get("equipment_type")
148
-
149
- df = fetch_smartlog_records(lab_site, start_date, end_date, equipment_type)
150
- summary = summarize_logs(df)
151
-
152
- return jsonify({
153
- "summary": summary,
154
- "records_fetched": len(df)
155
- })
 
 
 
 
 
156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  except Exception as e:
158
- logging.error(f"API error: {e}")
159
- return jsonify({"error": str(e)}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
- if __name__ == '__main__':
162
- connect_to_salesforce()
163
- app.run(host='0.0.0.0', port=5000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
 
2
  import pandas as pd
3
  from datetime import datetime
4
  import logging
5
+ import plotly.express as px
6
+ from sklearn.ensemble import IsolationForest # For anomaly detection
7
  from transformers import pipeline
8
+ import torch # For GPU availability check
9
+ from simple_salesforce import Salesforce # For Salesforce connection
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ # Configure logging for debugging
12
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13
+
14
+ # Salesforce credentials (replace with your actual credentials or use environment variables)
15
+ SALESFORCE_USERNAME = "your_username"
16
+ SALESFORCE_PASSWORD = "your_password"
17
+ SALESFORCE_SECURITY_TOKEN = "your_security_token"
18
+ SALESFORCE_DOMAIN = "login" # Use "test" for sandbox, "login" for production
19
+
20
+ # Preload Hugging Face summarization model at startup
21
+ logging.info("Preloading Hugging Face model...")
22
+ try:
23
+ device = 0 if torch.cuda.is_available() else -1
24
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device) # Lighter model
25
+ logging.info(f"Hugging Face model preloaded successfully on device: {'GPU' if device == 0 else 'CPU'}")
26
+ except Exception as e:
27
+ logging.error(f"Failed to preload model: {str(e)}")
28
+ raise e
29
+
30
+ # Connect to Salesforce
31
  def connect_to_salesforce():
 
 
32
  try:
 
 
 
 
 
33
  sf = Salesforce(
34
+ username=SALESFORCE_USERNAME,
35
+ password=SALESFORCE_PASSWORD,
36
+ security_token=SALESFORCE_SECURITY_TOKEN,
37
+ domain=SALESFORCE_DOMAIN
 
38
  )
39
+ logging.info("Successfully connected to Salesforce")
40
+ return sf
 
 
41
  except Exception as e:
42
+ logging.error(f"Failed to connect to Salesforce: {str(e)}")
43
+ raise e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ # Fetch data from Salesforce
46
+ def fetch_salesforce_data(sf, row_limit=10000, progress=gr.Progress()):
47
+ progress(0.05, "Fetching data from Salesforce...")
48
  try:
49
+ # Query Salesforce for LabEquipmentLog__c object
50
  query = """
51
+ SELECT Device_ID__c, Log_Type__c, Status__c, Timestamp__c,
52
+ Usage_Hours__c, Downtime__c, AMC_Date__c
53
+ FROM LabEquipmentLog__c
54
+ LIMIT {}
55
+ """.format(row_limit)
 
 
 
 
 
 
 
 
 
 
 
 
56
  result = sf.query_all(query)
57
+ records = result["records"]
58
+
59
+ # Convert to DataFrame
60
+ df = pd.DataFrame(records)
61
+ df = df.rename(columns={
62
+ "Device_ID__c": "device_id",
63
+ "Log_Type__c": "log_type",
64
+ "Status__c": "status",
65
+ "Timestamp__c": "timestamp",
66
+ "Usage_Hours__c": "usage_hours",
67
+ "Downtime__c": "downtime",
68
+ "AMC_Date__c": "amc_date"
69
+ })
70
+
71
+ # Ensure proper data types
72
+ df["timestamp"] = pd.to_datetime(df["timestamp"], errors='coerce')
73
+ df["amc_date"] = pd.to_datetime(df["amc_date"], errors='coerce')
74
+ df["usage_hours"] = df["usage_hours"].astype("float32", errors='ignore')
75
+ df["downtime"] = df["downtime"].astype("float32", errors='ignore')
76
+ df["device_id"] = df["device_id"].astype("string")
77
+
78
+ logging.info(f"Fetched {len(df)} records from Salesforce")
79
  return df
 
80
  except Exception as e:
81
+ logging.error(f"Failed to fetch Salesforce data: {str(e)}")
82
  raise e
83
 
84
+ # Format summary prompt and generate report
85
+ def summarize_logs(df, progress=gr.Progress()):
86
+ progress(0.1, "Generating summary report...")
87
+ try:
88
+ total_devices = df["device_id"].nunique()
89
+ most_used = df.groupby("device_id")["usage_hours"].sum().idxmax() if not df.empty else "N/A"
90
 
91
+ prompt = f"Maintenance logs: {total_devices} devices. Most used: {most_used}."
92
+ summary = summarizer(prompt, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
93
+ logging.info("Summary generated successfully")
94
+ return summary
95
+ except Exception as e:
96
+ logging.error(f"Summary generation failed: {str(e)}")
97
+ return "Failed to generate summary."
98
 
99
+ # Anomaly Detection using Isolation Forest with sampling for large datasets
100
+ def detect_anomalies(df, progress=gr.Progress()):
101
+ progress(0.4, "Detecting anomalies...")
102
+ try:
103
+ if "usage_hours" not in df.columns or "downtime" not in df.columns:
104
+ logging.warning("Required columns for anomaly detection not found")
105
+ return "Anomaly detection requires 'usage_hours' and 'downtime' columns."
106
+
107
+ if len(df) > 5000:
108
+ df = df.sample(n=5000, random_state=42)
109
+ logging.info("Sampled data for anomaly detection to 5,000 rows")
110
 
111
+ features = df[["usage_hours", "downtime"]].fillna(0)
112
+ iso_forest = IsolationForest(contamination=0.1, random_state=42, n_jobs=-1)
113
+ df["anomaly"] = iso_forest.fit_predict(features)
114
+
115
+ anomalies = df[df["anomaly"] == -1][["device_id", "usage_hours", "downtime", "timestamp"]]
116
+ if anomalies.empty:
117
+ return "No anomalies detected."
118
+
119
+ anomaly_lines = ["**Detected Anomalies:**"]
120
+ for idx, row in anomalies.head(5).iterrows():
121
+ anomaly_lines.append(f"- Device ID: {row['device_id']}, Usage Hours: {row['usage_hours']}, Downtime: {row['downtime']}, Timestamp: {row['timestamp']}")
122
+ anomaly_list = "\n".join(anomaly_lines)
123
+ logging.info("Anomalies detected successfully")
124
+ return anomaly_list
125
+ except Exception as e:
126
+ logging.error(f"Anomaly detection failed: {str(e)}")
127
+ return f"Anomaly detection failed: {str(e)}"
128
+
129
+ # AMC Reminders based on device and AMC date
130
+ def check_amc_reminders(df, current_date, progress=gr.Progress()):
131
+ progress(0.6, "Checking AMC reminders...")
132
  try:
133
+ if "device_id" not in df.columns or "amc_date" not in df.columns:
134
+ logging.warning("Required columns for AMC reminders not found")
135
+ return "AMC reminders require 'device_id' and 'amc_date' columns."
136
+
137
+ df["amc_date"] = pd.to_datetime(df["amc_date"], errors='coerce')
138
+ current_date = pd.to_datetime(current_date)
139
+
140
+ df["days_to_amc"] = (df["amc_date"] - current_date).dt.days
141
+ reminders = df[(df["days_to_amc"] >= 0) & (df["days_to_amc"] <= 30)][["device_id", "amc_date"]]
142
+
143
+ if reminders.empty:
144
+ return "No AMC reminders due within the next 30 days."
145
+
146
+ reminder_lines = ["**Upcoming AMC Reminders:**"]
147
+ for idx, row in reminders.head(5).iterrows():
148
+ reminder_lines.append(f"- Device ID: {row['device_id']}, AMC Date: {row['amc_date']}")
149
+ reminder_list = "\n".join(reminder_lines)
150
+ logging.info("AMC reminders generated successfully")
151
+ return reminder_list
152
  except Exception as e:
153
+ logging.error(f"AMC reminder generation failed: {str(e)}")
154
+ return f"AMC reminder generation failed: {str(e)}"
155
 
156
+ # Dashboard Insights (AI-generated executive-level insights)
157
+ def generate_dashboard_insights(df, progress=gr.Progress()):
158
+ progress(0.8, "Generating dashboard insights...")
159
  try:
160
+ total_devices = df["device_id"].nunique()
161
+ avg_usage = df["usage_hours"].mean() if "usage_hours" in df.columns else 0
162
+ prompt = f"Insights: {total_devices} devices, avg usage {avg_usage:.2f} hours."
163
+ insights = summarizer(prompt, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
164
+ logging.info("Dashboard insights generated successfully")
165
+ return insights
166
+ except Exception as e:
167
+ logging.error(f"Dashboard insights generation failed: {str(e)}")
168
+ return f"Dashboard insights generation failed: {str(e)}"
169
+
170
+ # Create a bar chart for usage hours per device
171
+ def create_usage_chart(df, progress=gr.Progress()):
172
+ progress(0.9, "Creating usage chart...")
173
+ try:
174
+ usage_data = df.groupby("device_id")["usage_hours"].sum().reset_index()
175
+ if len(usage_data) > 5:
176
+ usage_data = usage_data.nlargest(5, "usage_hours")
177
+ logging.info("Limited chart data to top 5 devices")
178
 
179
+ custom_colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']
180
+ fig = px.bar(
181
+ usage_data,
182
+ x="device_id",
183
+ y="usage_hours",
184
+ title="Usage Hours per Device",
185
+ labels={"device_id": "Device ID", "usage_hours": "Usage Hours"},
186
+ color="device_id",
187
+ color_discrete_sequence=custom_colors
188
+ )
189
+ fig.update_layout(
190
+ title_font_size=16,
191
+ margin=dict(l=20, r=20, t=40, b=20),
192
+ plot_bgcolor="white",
193
+ paper_bgcolor="white",
194
+ font=dict(size=12)
195
+ )
196
+ return fig
197
+ except Exception as e:
198
+ logging.error(f"Failed to create usage chart: {str(e)}")
199
+ return None
200
+
201
+ # Main Gradio function
202
+ async def process_logs(progress=gr.Progress()):
203
+ try:
204
+ progress(0, "Starting Salesforce data processing...")
205
+
206
+ # Connect to Salesforce and fetch data
207
+ sf = connect_to_salesforce()
208
+ df = fetch_salesforce_data(sf, row_limit=10000, progress=progress)
209
+
210
+ if df.empty:
211
+ logging.warning("No data retrieved from Salesforce")
212
+ return "No data available from Salesforce.", "No data to preview.", None, "No anomalies detected.", "No AMC reminders.", "No insights generated."
213
+
214
+ # Step 1: Summary Report
215
+ progress(0.2, "Generating summary...")
216
+ summary = f"**Step 1: Summary Report** \n{summarize_logs(df, progress)}"
217
+
218
+ # Step 2: Log Preview
219
+ progress(0.3, "Previewing logs...")
220
+ if not df.empty:
221
+ preview_lines = ["**Step 2: Log Preview (First 5 Rows)**"]
222
+ for idx, row in df.head().iterrows():
223
+ preview_lines.append(f"**Row {idx + 1}:** Device ID: {row['device_id']}, Timestamp: {row['timestamp']}, Usage Hours: {row['usage_hours']}, Downtime: {row['downtime']}, AMC Date: {row['amc_date']}, Log Type: {row['log_type']}, Status: {row['status']}")
224
+ preview = "\n".join(preview_lines)
225
+ else:
226
+ preview = "**Step 2: Log Preview** \nNo data available."
227
+
228
+ # Step 3: Usage Chart
229
+ chart = create_usage_chart(df, progress)
230
+
231
+ # Step 4: Anomaly Detection
232
+ anomalies = f"**Step 3: Anomaly Detection** \n{detect_anomalies(df, progress)}"
233
+
234
+ # Step 5: AMC Reminders
235
+ amc_reminders = f"**Step 4: AMC Reminders** \n{check_amc_reminders(df, datetime.now(), progress)}"
236
+
237
+ # Step 6: Dashboard Insights
238
+ insights = f"**Step 5: Dashboard Insights (AI)** \n{generate_dashboard_insights(df, progress)}"
239
+
240
+ progress(1.0, "Processing complete!")
241
+ return summary, preview, chart, anomalies, amc_reminders, insights
242
  except Exception as e:
243
+ logging.error(f"Failed to process Salesforce data: {str(e)}")
244
+ return f"Failed to process Salesforce data: {str(e)}", None, None, None, None, None
245
+
246
+ # Gradio Interface with Step-by-Step Layout
247
+ try:
248
+ logging.info("Initializing Gradio Blocks interface...")
249
+ with gr.Blocks(css="""
250
+ .dashboard-container {border: 1px solid #e0e0e0; padding: 10px; border-radius: 5px; background-color: #f9f9f9;}
251
+ .dashboard-title {font-size: 24px; font-weight: bold; margin-bottom: 5px;}
252
+ .dashboard-section {margin-bottom: 5px;}
253
+ .dashboard-section h3 {font-size: 18px; margin-bottom: 2px;}
254
+ .dashboard-section p {margin: 1px 0; line-height: 1.2;}
255
+ .dashboard-section li {margin: 1px 0; line-height: 1.2;}
256
+ .dashboard-section ul {margin: 2px 0; padding-left: 20px;}
257
+ """) as iface:
258
+ gr.Markdown("<h1>LabOps Log Analyzer Dashboard (Salesforce + Hugging Face AI)</h1>")
259
+ gr.Markdown("Analyze lab equipment logs fetched from Salesforce.")
260
+
261
+ with gr.Row():
262
+ with gr.Column(scale=1):
263
+ submit_button = gr.Button("Fetch and Analyze Salesforce Data", variant="primary")
264
+
265
+ with gr.Column(scale=2):
266
+ with gr.Group(elem_classes="dashboard-container"):
267
+ gr.Markdown("<div class='dashboard-title'>Analysis Results (Step-by-Step)</div>")
268
+
269
+ # Step 1: Summary Report
270
+ with gr.Group(elem_classes="dashboard-section"):
271
+ gr.Markdown("### Step 1: Summary Report")
272
+ summary_output = gr.Markdown()
273
+
274
+ # Step 2: Log Preview
275
+ with gr.Group(elem_classes="dashboard-section"):
276
+ gr.Markdown("### Step 2: Log Preview")
277
+ preview_output = gr.Markdown()
278
+
279
+ # Step 3: Usage Chart
280
+ with gr.Group(elem_classes="dashboard-section"):
281
+ gr.Markdown("### Step 3: Usage Chart")
282
+ chart_output = gr.Plot()
283
+
284
+ # Step 4: Anomaly Detection
285
+ with gr.Group(elem_classes="dashboard-section"):
286
+ gr.Markdown("### Step 4: Anomaly Detection")
287
+ anomaly_output = gr.Markdown()
288
+
289
+ # Step 5: AMC Reminders
290
+ with gr.Group(elem_classes="dashboard-section"):
291
+ gr.Markdown("### Step 5: AMC Reminders")
292
+ amc_output = gr.Markdown()
293
+
294
+ # Step 6: Dashboard Insights
295
+ with gr.Group(elem_classes="dashboard-section"):
296
+ gr.Markdown("### Step 6: Dashboard Insights (AI)")
297
+ insights_output = gr.Markdown()
298
 
299
+ submit_button.click(
300
+ fn=process_logs,
301
+ inputs=[],
302
+ outputs=[summary_output, preview_output, chart_output, anomaly_output, amc_output, insights_output]
303
+ )
304
+
305
+ logging.info("Gradio interface initialized successfully")
306
+ except Exception as e:
307
+ logging.error(f"Failed to initialize Gradio interface: {str(e)}")
308
+ raise e
309
+
310
+ if __name__ == "__main__":
311
+ try:
312
+ logging.info("Launching Gradio interface...")
313
+ iface.launch(server_name="0.0.0.0", server_port=7860, debug=True, share=False)
314
+ logging.info("Gradio interface launched successfully")
315
+ except Exception as e:
316
+ logging.error(f"Failed to launch Gradio interface: {str(e)}")
317
+ print(f"Error launching app: {str(e)}")
318
+ raise e