lavanya121 commited on
Commit
1550572
·
verified ·
1 Parent(s): 746f32c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -51
app.py CHANGED
@@ -2,86 +2,153 @@ from flask import Flask, request, jsonify
2
  import pandas as pd
3
  from transformers import pipeline
4
  from sklearn.ensemble import IsolationForest
5
- from datetime import datetime, timedelta
6
- import json
7
  import logging
 
8
 
9
  app = Flask(__name__)
10
- logging.basicConfig(level=logging.INFO)
11
 
12
- # Initialize Hugging Face models
13
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
14
- text_generator = pipeline("text-generation", model="gpt2")
 
 
 
15
 
16
- # Sample anomaly detection model
17
  anomaly_detector = IsolationForest(contamination=0.1, random_state=42)
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def process_logs(log_data):
20
- """Convert JSON logs to DataFrame and preprocess for analysis."""
21
- df = pd.DataFrame(log_data)
22
- df['timestamp'] = pd.to_datetime(df['timestamp'])
23
- df['usage_hours'] = df['usage_hours'].astype(float)
24
- return df
 
 
 
25
 
26
  def detect_anomalies(df):
27
- """Detect anomalies in usage hours using IsolationForest."""
28
- X = df[['usage_hours']].values
29
- predictions = anomaly_detector.fit_predict(X)
30
- anomalies = df[predictions == -1]
31
- return [
32
- {
33
- "device_id": row['device_id'],
34
- "issue": "Usage spike",
35
- "detected_on": row['timestamp'].strftime('%Y-%m-%d'),
36
- "severity": "high" if row['usage_hours'] > 10 else "medium"
37
- } for _, row in anomalies.iterrows()
38
- ]
 
 
 
39
 
40
  def summarize_usage(df, lab_site, start_date, end_date):
41
  """Generate usage summary for a given lab site and date range."""
42
- mask = (df['timestamp'] >= start_date) & (df['timestamp'] <= end_date)
43
- filtered_df = df[mask]
44
- total_devices = filtered_df['device_id'].nunique()
45
- avg_uptime = 100 * (1 - filtered_df['status'].eq('DOWN').mean())
46
- downtime_events = filtered_df['status'].eq('DOWN').sum()
47
- most_used_device = filtered_df.groupby('device_id')['usage_hours'].sum().idxmax()
48
- return {
49
- "total_devices": total_devices,
50
- "avg_uptime": f"{avg_uptime:.1f}%",
51
- "downtime_events": downtime_events,
52
- "most_used_device": most_used_device
53
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  def generate_maintenance_report(anomalies, df, amc_expiry_threshold):
56
  """Generate a natural language maintenance report."""
57
- prompt = f"""
58
- Generate a maintenance summary for SmartLab-1 from {df['timestamp'].min().strftime('%Y-%m-%d')} to {df['timestamp'].max().strftime('%Y-%m-%d')}:
59
- - {len(anomalies)} devices experienced abnormal usage patterns
60
- - AMC for Device D004 expires in {amc_expiry_threshold} days
61
- - UV Verifier-2 had 2.3 hrs of unplanned downtime
62
- """
63
- summary = summarizer(prompt, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
64
- return summary
 
 
 
 
 
 
 
65
 
66
  @app.route('/api/process_logs', methods=['POST'])
67
  def process_logs_endpoint():
68
  try:
69
  data = request.get_json()
70
  if not data or 'logs' not in data:
 
71
  return jsonify({"error": "No logs provided"}), 400
72
 
73
- # Extract inputs
74
  logs = data['logs']
 
 
 
 
 
75
  lab_site = data.get('lab_site', 'SmartLab-1')
76
  start_date = data.get('start_date', '2025-05-01')
77
  end_date = data.get('end_date', '2025-05-14')
78
  amc_expiry_threshold = data.get('amc_expiry_threshold', 12)
79
 
80
  # Process logs
81
- df = process_logs(logs)
82
- anomalies = detect_anomalies(df)
83
- usage_summary = summarize_usage(df, lab_site, start_date, end_date)
84
- maintenance_report = generate_maintenance_report(anomalies, df, amc_expiry_threshold)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  # Prepare response
87
  response = {
@@ -89,10 +156,11 @@ def process_logs_endpoint():
89
  "usage_summary": usage_summary,
90
  "maintenance_report": maintenance_report
91
  }
 
92
  return jsonify(response), 200
93
  except Exception as e:
94
- logging.error(f"Error processing logs: {str(e)}")
95
- return jsonify({"error": str(e)}), 500
96
 
97
  if __name__ == '__main__':
98
  app.run(debug=True, host='0.0.0.0', port=5000)
 
2
  import pandas as pd
3
  from transformers import pipeline
4
  from sklearn.ensemble import IsolationForest
5
+ from datetime import datetime
 
6
  import logging
7
+ import json
8
 
9
  app = Flask(__name__)
10
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
11
 
12
+ # Initialize Hugging Face model for summarization
13
+ try:
14
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
15
+ except Exception as e:
16
+ logging.error(f"Failed to initialize summarizer: {str(e)}")
17
+ summarizer = None
18
 
19
+ # Initialize anomaly detection model
20
  anomaly_detector = IsolationForest(contamination=0.1, random_state=42)
21
 
22
+ def validate_logs(logs):
23
+ """Validate log data structure."""
24
+ required_fields = ['device_id', 'log_type', 'status', 'timestamp', 'usage_hours']
25
+ for log in logs:
26
+ if not all(field in log for field in required_fields):
27
+ return False, f"Missing required fields in log: {log}"
28
+ try:
29
+ pd.to_datetime(log['timestamp'])
30
+ float(log['usage_hours'])
31
+ except (ValueError, TypeError):
32
+ return False, f"Invalid timestamp or usage_hours in log: {log}"
33
+ return True, ""
34
+
35
  def process_logs(log_data):
36
+ """Convert JSON logs to DataFrame and preprocess."""
37
+ try:
38
+ df = pd.DataFrame(log_data)
39
+ df['timestamp'] = pd.to_datetime(df['timestamp'])
40
+ df['usage_hours'] = df['usage_hours'].astype(float)
41
+ return True, df
42
+ except Exception as e:
43
+ return False, f"Error processing logs: {str(e)}"
44
 
45
  def detect_anomalies(df):
46
+ """Detect anomalies in usage hours."""
47
+ try:
48
+ X = df[['usage_hours']].values
49
+ predictions = anomaly_detector.fit_predict(X)
50
+ anomalies = df[predictions == -1]
51
+ return True, [
52
+ {
53
+ "device_id": row['device_id'],
54
+ "issue": "Usage spike",
55
+ "detected_on": row['timestamp'].strftime('%Y-%m-%d'),
56
+ "severity": "high" if row['usage_hours'] > 10 else "medium"
57
+ } for _, row in anomalies.iterrows()
58
+ ]
59
+ except Exception as e:
60
+ return False, f"Error detecting anomalies: {str(e)}"
61
 
62
  def summarize_usage(df, lab_site, start_date, end_date):
63
  """Generate usage summary for a given lab site and date range."""
64
+ try:
65
+ start_date = pd.to_datetime(start_date)
66
+ end_date = pd.to_datetime(end_date)
67
+ mask = (df['timestamp'] >= start_date) & (df['timestamp'] <= end_date)
68
+ filtered_df = df[mask]
69
+ if filtered_df.empty:
70
+ return True, {
71
+ "total_devices": 0,
72
+ "avg_uptime": "0%",
73
+ "downtime_events": 0,
74
+ "most_used_device": "None"
75
+ }
76
+ total_devices = filtered_df['device_id'].nunique()
77
+ avg_uptime = 100 * (1 - filtered_df['status'].eq('DOWN').mean())
78
+ downtime_events = filtered_df['status'].eq('DOWN').sum()
79
+ most_used = filtered_df.groupby('device_id')['usage_hours'].sum()
80
+ most_used_device = most_used.idxmax() if not most_used.empty else "None"
81
+ return True, {
82
+ "total_devices": total_devices,
83
+ "avg_uptime": f"{avg_uptime:.1f}%",
84
+ "downtime_events": downtime_events,
85
+ "most_used_device": most_used_device
86
+ }
87
+ except Exception as e:
88
+ return False, f"Error summarizing usage: {str(e)}"
89
 
90
  def generate_maintenance_report(anomalies, df, amc_expiry_threshold):
91
  """Generate a natural language maintenance report."""
92
+ try:
93
+ if not summarizer:
94
+ return False, "Summarizer model not initialized"
95
+ if df.empty:
96
+ return True, "No data available for report generation"
97
+ prompt = f"""
98
+ Generate a maintenance summary for SmartLab-1 from {df['timestamp'].min().strftime('%Y-%m-%d')} to {df['timestamp'].max().strftime('%Y-%m-%d')}:
99
+ - {len(anomalies)} devices experienced abnormal usage patterns
100
+ - AMC for Device D004 expires in {amc_expiry_threshold} days
101
+ - UV Verifier-2 had 2.3 hrs of unplanned downtime
102
+ """
103
+ summary = summarizer(prompt, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
104
+ return True, summary
105
+ except Exception as e:
106
+ return False, f"Error generating report: {str(e)}"
107
 
108
  @app.route('/api/process_logs', methods=['POST'])
109
  def process_logs_endpoint():
110
  try:
111
  data = request.get_json()
112
  if not data or 'logs' not in data:
113
+ logging.error("No logs provided in request")
114
  return jsonify({"error": "No logs provided"}), 400
115
 
116
+ # Extract and validate inputs
117
  logs = data['logs']
118
+ is_valid, error_msg = validate_logs(logs)
119
+ if not is_valid:
120
+ logging.error(error_msg)
121
+ return jsonify({"error": error_msg}), 400
122
+
123
  lab_site = data.get('lab_site', 'SmartLab-1')
124
  start_date = data.get('start_date', '2025-05-01')
125
  end_date = data.get('end_date', '2025-05-14')
126
  amc_expiry_threshold = data.get('amc_expiry_threshold', 12)
127
 
128
  # Process logs
129
+ success, result = process_logs(logs)
130
+ if not success:
131
+ logging.error(result)
132
+ return jsonify({"error": result}), 500
133
+ df = result
134
+
135
+ # Detect anomalies
136
+ success, anomalies = detect_anomalies(df)
137
+ if not success:
138
+ logging.error(anomalies)
139
+ return jsonify({"error": anomalies}), 500
140
+
141
+ # Summarize usage
142
+ success, usage_summary = summarize_usage(df, lab_site, start_date, end_date)
143
+ if not success:
144
+ logging.error(usage_summary)
145
+ return jsonify({"error": usage_summary}), 500
146
+
147
+ # Generate maintenance report
148
+ success, maintenance_report = generate_maintenance_report(anomalies, df, amc_expiry_threshold)
149
+ if not success:
150
+ logging.error(maintenance_report)
151
+ return jsonify({"error": maintenance_report}), 500
152
 
153
  # Prepare response
154
  response = {
 
156
  "usage_summary": usage_summary,
157
  "maintenance_report": maintenance_report
158
  }
159
+ logging.info("Successfully processed logs")
160
  return jsonify(response), 200
161
  except Exception as e:
162
+ logging.error(f"Unexpected error: {str(e)}")
163
+ return jsonify({"error": f"Unexpected error: {str(e)}"}), 500
164
 
165
  if __name__ == '__main__':
166
  app.run(debug=True, host='0.0.0.0', port=5000)