lavanya121 commited on
Commit
5ffa9be
·
verified ·
1 Parent(s): 1550572

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -157
app.py CHANGED
@@ -1,166 +1,70 @@
1
- from flask import Flask, request, jsonify
2
  import pandas as pd
3
- from transformers import pipeline
4
- from sklearn.ensemble import IsolationForest
5
  from datetime import datetime
6
- import logging
7
  import json
 
8
 
9
- app = Flask(__name__)
10
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
11
-
12
- # Initialize Hugging Face model for summarization
13
- try:
14
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
15
- except Exception as e:
16
- logging.error(f"Failed to initialize summarizer: {str(e)}")
17
- summarizer = None
18
-
19
- # Initialize anomaly detection model
20
- anomaly_detector = IsolationForest(contamination=0.1, random_state=42)
21
-
22
- def validate_logs(logs):
23
- """Validate log data structure."""
24
- required_fields = ['device_id', 'log_type', 'status', 'timestamp', 'usage_hours']
25
- for log in logs:
26
- if not all(field in log for field in required_fields):
27
- return False, f"Missing required fields in log: {log}"
28
- try:
29
- pd.to_datetime(log['timestamp'])
30
- float(log['usage_hours'])
31
- except (ValueError, TypeError):
32
- return False, f"Invalid timestamp or usage_hours in log: {log}"
33
- return True, ""
34
-
35
- def process_logs(log_data):
36
- """Convert JSON logs to DataFrame and preprocess."""
37
- try:
38
- df = pd.DataFrame(log_data)
39
- df['timestamp'] = pd.to_datetime(df['timestamp'])
40
- df['usage_hours'] = df['usage_hours'].astype(float)
41
- return True, df
42
- except Exception as e:
43
- return False, f"Error processing logs: {str(e)}"
44
 
 
45
  def detect_anomalies(df):
46
- """Detect anomalies in usage hours."""
47
- try:
48
- X = df[['usage_hours']].values
49
- predictions = anomaly_detector.fit_predict(X)
50
- anomalies = df[predictions == -1]
51
- return True, [
52
- {
53
- "device_id": row['device_id'],
54
  "issue": "Usage spike",
55
- "detected_on": row['timestamp'].strftime('%Y-%m-%d'),
56
- "severity": "high" if row['usage_hours'] > 10 else "medium"
57
- } for _, row in anomalies.iterrows()
58
- ]
59
- except Exception as e:
60
- return False, f"Error detecting anomalies: {str(e)}"
61
-
62
- def summarize_usage(df, lab_site, start_date, end_date):
63
- """Generate usage summary for a given lab site and date range."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  try:
65
- start_date = pd.to_datetime(start_date)
66
- end_date = pd.to_datetime(end_date)
67
- mask = (df['timestamp'] >= start_date) & (df['timestamp'] <= end_date)
68
- filtered_df = df[mask]
69
- if filtered_df.empty:
70
- return True, {
71
- "total_devices": 0,
72
- "avg_uptime": "0%",
73
- "downtime_events": 0,
74
- "most_used_device": "None"
75
- }
76
- total_devices = filtered_df['device_id'].nunique()
77
- avg_uptime = 100 * (1 - filtered_df['status'].eq('DOWN').mean())
78
- downtime_events = filtered_df['status'].eq('DOWN').sum()
79
- most_used = filtered_df.groupby('device_id')['usage_hours'].sum()
80
- most_used_device = most_used.idxmax() if not most_used.empty else "None"
81
- return True, {
82
- "total_devices": total_devices,
83
- "avg_uptime": f"{avg_uptime:.1f}%",
84
- "downtime_events": downtime_events,
85
- "most_used_device": most_used_device
86
- }
87
  except Exception as e:
88
- return False, f"Error summarizing usage: {str(e)}"
89
-
90
- def generate_maintenance_report(anomalies, df, amc_expiry_threshold):
91
- """Generate a natural language maintenance report."""
92
- try:
93
- if not summarizer:
94
- return False, "Summarizer model not initialized"
95
- if df.empty:
96
- return True, "No data available for report generation"
97
- prompt = f"""
98
- Generate a maintenance summary for SmartLab-1 from {df['timestamp'].min().strftime('%Y-%m-%d')} to {df['timestamp'].max().strftime('%Y-%m-%d')}:
99
- - {len(anomalies)} devices experienced abnormal usage patterns
100
- - AMC for Device D004 expires in {amc_expiry_threshold} days
101
- - UV Verifier-2 had 2.3 hrs of unplanned downtime
102
- """
103
- summary = summarizer(prompt, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
104
- return True, summary
105
- except Exception as e:
106
- return False, f"Error generating report: {str(e)}"
107
-
108
- @app.route('/api/process_logs', methods=['POST'])
109
- def process_logs_endpoint():
110
- try:
111
- data = request.get_json()
112
- if not data or 'logs' not in data:
113
- logging.error("No logs provided in request")
114
- return jsonify({"error": "No logs provided"}), 400
115
-
116
- # Extract and validate inputs
117
- logs = data['logs']
118
- is_valid, error_msg = validate_logs(logs)
119
- if not is_valid:
120
- logging.error(error_msg)
121
- return jsonify({"error": error_msg}), 400
122
-
123
- lab_site = data.get('lab_site', 'SmartLab-1')
124
- start_date = data.get('start_date', '2025-05-01')
125
- end_date = data.get('end_date', '2025-05-14')
126
- amc_expiry_threshold = data.get('amc_expiry_threshold', 12)
127
-
128
- # Process logs
129
- success, result = process_logs(logs)
130
- if not success:
131
- logging.error(result)
132
- return jsonify({"error": result}), 500
133
- df = result
134
-
135
- # Detect anomalies
136
- success, anomalies = detect_anomalies(df)
137
- if not success:
138
- logging.error(anomalies)
139
- return jsonify({"error": anomalies}), 500
140
-
141
- # Summarize usage
142
- success, usage_summary = summarize_usage(df, lab_site, start_date, end_date)
143
- if not success:
144
- logging.error(usage_summary)
145
- return jsonify({"error": usage_summary}), 500
146
-
147
- # Generate maintenance report
148
- success, maintenance_report = generate_maintenance_report(anomalies, df, amc_expiry_threshold)
149
- if not success:
150
- logging.error(maintenance_report)
151
- return jsonify({"error": maintenance_report}), 500
152
-
153
- # Prepare response
154
- response = {
155
- "anomalies": anomalies,
156
- "usage_summary": usage_summary,
157
- "maintenance_report": maintenance_report
158
- }
159
- logging.info("Successfully processed logs")
160
- return jsonify(response), 200
161
- except Exception as e:
162
- logging.error(f"Unexpected error: {str(e)}")
163
- return jsonify({"error": f"Unexpected error: {str(e)}"}), 500
164
-
165
- if __name__ == '__main__':
166
- app.run(debug=True, host='0.0.0.0', port=5000)
 
1
+ import gradio as gr
2
  import pandas as pd
 
 
3
  from datetime import datetime
 
4
  import json
5
+ from transformers import pipeline
6
 
7
+ # Load Hugging Face summarization model
8
+ summarizer = pipeline("text2text-generation", model="google/flan-t5-base")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ # Sample rule-based anomaly detector
11
  def detect_anomalies(df):
12
+ anomalies = []
13
+ for _, row in df.iterrows():
14
+ if row.get("usage_hours", 0) > 10: # Example threshold
15
+ anomalies.append({
16
+ "device_id": row["device_id"],
 
 
 
17
  "issue": "Usage spike",
18
+ "detected_on": row["timestamp"].split("T")[0],
19
+ "severity": "high"
20
+ })
21
+ return anomalies
22
+
23
+ # Format summary prompt and generate report
24
+ def summarize_logs(df, lab_name, start_date, end_date):
25
+ # Simple aggregation
26
+ total_devices = df["device_id"].nunique()
27
+ avg_uptime = "97%" # Placeholder
28
+ most_used = df.groupby("device_id")["usage_hours"].sum().idxmax()
29
+ downtime_events = 3 # Placeholder
30
+
31
+ prompt = (
32
+ f"Summarize maintenance and usage logs for lab {lab_name} "
33
+ f"from {start_date} to {end_date}. "
34
+ f"There were {total_devices} devices. "
35
+ f"The most used device was {most_used}."
36
+ )
37
+ summary = summarizer(prompt, max_length=200, do_sample=False)[0]["generated_text"]
38
+ return summary
39
+
40
+ # Main Gradio function
41
+ def process_logs(file_obj, lab_site, start_date, end_date):
42
  try:
43
+ df = pd.read_json(file_obj.name) if file_obj.name.endswith(".json") else pd.read_csv(file_obj.name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  except Exception as e:
45
+ return f"Failed to read file: {str(e)}", None, None
46
+
47
+ anomalies = detect_anomalies(df)
48
+ summary = summarize_logs(df, lab_site, start_date, end_date)
49
+
50
+ return summary, anomalies, df.head().to_markdown()
51
+
52
+ # Gradio Interface
53
+ iface = gr.Interface(
54
+ fn=process_logs,
55
+ inputs=[
56
+ gr.File(label="Upload Logs (CSV or JSON)"),
57
+ gr.Textbox(label="Lab Site"),
58
+ gr.Textbox(label="Start Date (YYYY-MM-DD)"),
59
+ gr.Textbox(label="End Date (YYYY-MM-DD)")
60
+ ],
61
+ outputs=[
62
+ gr.Textbox(label="Summary Report"),
63
+ gr.JSON(label="Anomalies"),
64
+ gr.Markdown(label="Preview of Logs")
65
+ ],
66
+ title="LabOps Log Analyzer (Hugging Face AI)"
67
+ )
68
+
69
+ if __name__ == "__main__":
70
+ iface.launch()