dhruv575 commited on
Commit
c31b74d
·
1 Parent(s): f3a4783
controllers/incident_controller.py CHANGED
@@ -5,46 +5,128 @@ from models.incident import Incident
5
  from models.workflow import Workflow
6
  from models.log import Log
7
  from utils.celery_tasks import process_incident_forms
 
 
 
 
 
 
 
8
 
9
  # Configure logging
10
  logger = logging.getLogger(__name__)
11
 
12
  def get_incident(current_user, incident_id):
13
  """Get incident by ID"""
14
- incident = Incident.find_by_id(incident_id)
15
- if not incident:
16
- return jsonify({'message': 'Incident not found'}), 404
17
-
18
- # Check if user has access to this incident
19
- if str(incident.department_id) != str(current_user.department_id):
20
- return jsonify({'message': 'Access denied to incidents from other departments'}), 403
21
-
22
- return jsonify({'incident': incident.to_dict()}), 200
23
-
 
 
 
 
 
 
 
24
  def delete_incident(current_user, incident_id):
25
- """Delete an incident"""
26
- incident = Incident.find_by_id(incident_id)
27
- if not incident:
28
- return jsonify({'message': 'Incident not found'}), 404
29
-
30
- # Check if user has access to this incident
31
- if str(incident.department_id) != str(current_user.department_id):
32
- return jsonify({'message': 'Access denied to incidents from other departments'}), 403
33
-
34
- # Additional check: only incident owner or admin can delete
35
- if str(incident.user_id) != str(current_user._id) and current_user.permissions != 'Admin':
36
- return jsonify({'message': 'Only the incident owner or department admin can delete incidents'}), 403
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- # Delete the incident
39
- if incident.delete():
40
- return jsonify({'message': 'Incident deleted successfully'}), 200
41
- else:
42
- return jsonify({'message': 'Failed to delete incident'}), 500
43
 
44
  def get_user_incidents(current_user):
45
  """Get all incidents for the current user"""
46
- incidents = Incident.find_by_user(current_user._id)
47
- return jsonify({'incidents': [incident.to_dict() for incident in incidents]}), 200
 
 
 
 
 
 
 
 
 
48
 
49
  def get_department_incidents(current_user):
50
  """Get all incidents for the user's department"""
@@ -115,4 +197,75 @@ def reprocess_incident(current_user, incident_id):
115
  except Exception as e:
116
  logger.error(f"Error reprocessing incident {incident_id}: {str(e)}")
117
  incident.update_status("failed")
118
- return jsonify({'message': f'Error reprocessing incident: {str(e)}'}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  from models.workflow import Workflow
6
  from models.log import Log
7
  from utils.celery_tasks import process_incident_forms
8
+ from models.user import User
9
+ from models.department import Department
10
+ from utils.pdf_utils import fill_markdown_form, extract_required_data, save_filled_form
11
+ from db import get_gridfs
12
+ from bson.objectid import ObjectId
13
+ import os
14
+ import openai
15
 
16
  # Configure logging
17
  logger = logging.getLogger(__name__)
18
 
19
  def get_incident(current_user, incident_id):
20
  """Get incident by ID"""
21
+ try:
22
+ # Find incident by ID
23
+ incident = Incident.find_by_id(incident_id)
24
+
25
+ if not incident:
26
+ return jsonify({'message': 'Incident not found'}), 404
27
+
28
+ # Check if user has access to this incident
29
+ if not current_user.is_admin and str(incident.user_id) != str(current_user._id) and str(incident.department_id) != str(current_user.department_id):
30
+ return jsonify({'message': 'You do not have permission to access this incident'}), 403
31
+
32
+ # Return incident
33
+ return jsonify({'incident': incident.to_dict()}), 200
34
+ except Exception as e:
35
+ logger.error(f"Error getting incident: {str(e)}")
36
+ return jsonify({'message': f'Error getting incident: {str(e)}'}), 500
37
+
38
  def delete_incident(current_user, incident_id):
39
+ """Delete incident by ID"""
40
+ try:
41
+ # Find incident by ID
42
+ incident = Incident.find_by_id(incident_id)
43
+
44
+ if not incident:
45
+ return jsonify({'message': 'Incident not found'}), 404
46
+
47
+ # Check if user has access to this incident
48
+ if not current_user.is_admin and str(incident.user_id) != str(current_user._id):
49
+ return jsonify({'message': 'You do not have permission to delete this incident'}), 403
50
+
51
+ # Delete incident
52
+ if incident.delete():
53
+ return jsonify({'message': 'Incident deleted successfully'}), 200
54
+ else:
55
+ return jsonify({'message': 'Failed to delete incident'}), 500
56
+ except Exception as e:
57
+ logger.error(f"Error deleting incident: {str(e)}")
58
+ return jsonify({'message': f'Error deleting incident: {str(e)}'}), 500
59
+
60
+ def process_incident_sync(current_user, incident_id):
61
+ """Process an incident synchronously (fill form with LLM)"""
62
+ try:
63
+ # Check if OpenAI API key is set
64
+ if not os.environ.get('OPENAI_API_KEY'):
65
+ logger.error("OPENAI_API_KEY environment variable is not set")
66
+ return jsonify({'message': 'OpenAI API key not configured'}), 500
67
+
68
+ # Set OpenAI API key
69
+ openai.api_key = os.environ.get('OPENAI_API_KEY')
70
+
71
+ # Find incident by ID
72
+ incident = Incident.find_by_id(incident_id)
73
+
74
+ if not incident:
75
+ return jsonify({'message': 'Incident not found'}), 404
76
+
77
+ # Check if user has permission to process this incident
78
+ if not current_user.is_admin and str(incident.user_id) != str(current_user._id) and str(incident.department_id) != str(current_user.department_id):
79
+ return jsonify({'message': 'You do not have permission to process this incident'}), 403
80
+
81
+ # Get the workflow for this incident
82
+ workflow = Workflow.find_by_id(incident.workflow_id)
83
+ if not workflow:
84
+ return jsonify({'message': 'Workflow not found for this incident'}), 404
85
+
86
+ # Check if workflow has a template
87
+ if not workflow.markdown_template:
88
+ return jsonify({'message': 'No form template found for this workflow'}), 404
89
+
90
+ # Extract required data from incident
91
+ logger.info(f"Extracting required data for incident {incident_id}")
92
+ required_data = extract_required_data(incident.activity_text, workflow.data_requirements)
93
+
94
+ # Store the extracted data in the incident
95
+ incident.extracted_data = required_data
96
+
97
+ # Fill markdown form
98
+ logger.info(f"Filling form for incident {incident_id}")
99
+ filled_markdown = fill_markdown_form(workflow.markdown_template, required_data, incident.activity_text)
100
+
101
+ # Update incident status
102
+ incident.status = "completed"
103
+
104
+ if incident.save():
105
+ return jsonify({
106
+ 'message': 'Incident processed successfully',
107
+ 'incident': incident.to_dict(),
108
+ 'filled_markdown': filled_markdown
109
+ }), 200
110
+ else:
111
+ return jsonify({'message': 'Failed to update incident'}), 500
112
 
113
+ except Exception as e:
114
+ logger.error(f"Error processing incident {incident_id}: {str(e)}")
115
+ return jsonify({'message': f'Error processing incident: {str(e)}'}), 500
 
 
116
 
117
  def get_user_incidents(current_user):
118
  """Get all incidents for the current user"""
119
+ try:
120
+ # Find incidents by user ID
121
+ incidents = Incident.find_by_user(current_user._id)
122
+
123
+ # Convert incidents to dict
124
+ incidents_dict = [incident.to_dict() for incident in incidents]
125
+
126
+ return jsonify({'incidents': incidents_dict}), 200
127
+ except Exception as e:
128
+ logger.error(f"Error getting user incidents: {str(e)}")
129
+ return jsonify({'message': f'Error getting user incidents: {str(e)}'}), 500
130
 
131
  def get_department_incidents(current_user):
132
  """Get all incidents for the user's department"""
 
197
  except Exception as e:
198
  logger.error(f"Error reprocessing incident {incident_id}: {str(e)}")
199
  incident.update_status("failed")
200
+ return jsonify({'message': f'Error reprocessing incident: {str(e)}'}), 500
201
+
202
+ def create_incident_from_activity(current_user):
203
+ """Create an incident from activity data and workflow ID"""
204
+ try:
205
+ data = request.get_json()
206
+
207
+ # Check if required fields are present
208
+ required_fields = ['activity', 'workflow_id', 'date', 'log_text']
209
+ for field in required_fields:
210
+ if field not in data:
211
+ return jsonify({'message': f'Missing required field: {field}'}), 400
212
+
213
+ # Validate date format
214
+ try:
215
+ if isinstance(data['date'], str):
216
+ date = datetime.strptime(data['date'], '%Y-%m-%d').date()
217
+ else:
218
+ date = data['date']
219
+ except ValueError:
220
+ return jsonify({'message': 'Invalid date format. Please use YYYY-MM-DD'}), 400
221
+
222
+ # Check if workflow exists and user has access
223
+ workflow = Workflow.find_by_id(data['workflow_id'])
224
+ if not workflow:
225
+ return jsonify({'message': 'Workflow not found'}), 404
226
+
227
+ # Check if user has access to this workflow
228
+ if str(workflow.department_id) != str(current_user.department_id):
229
+ return jsonify({'message': 'Access denied to workflows from other departments'}), 403
230
+
231
+ # Create log entry first if log_id is not provided
232
+ log_id = data.get('log_id')
233
+ if not log_id:
234
+ log = Log(
235
+ user_id=current_user._id,
236
+ department_id=current_user.department_id,
237
+ log_date=date,
238
+ log_text=data['log_text']
239
+ )
240
+ if log.save():
241
+ log_id = log._id
242
+
243
+ # Create the incident
244
+ incident = Incident(
245
+ department_id=current_user.department_id,
246
+ user_id=current_user._id,
247
+ workflow_id=ObjectId(data['workflow_id']),
248
+ description=data['activity'].get('activity', 'No description'),
249
+ date=date,
250
+ activity_text=data['activity'].get('text', ''),
251
+ log_id=log_id,
252
+ status="pending"
253
+ )
254
+
255
+ if incident.save():
256
+ # Add incident to log if log exists
257
+ if log_id:
258
+ log = Log.find_by_id(log_id)
259
+ if log:
260
+ log.add_incident(incident._id)
261
+
262
+ return jsonify({
263
+ 'message': 'Incident created successfully',
264
+ 'incident': incident.to_dict()
265
+ }), 201
266
+ else:
267
+ return jsonify({'message': 'Failed to create incident'}), 500
268
+
269
+ except Exception as e:
270
+ logger.error(f"Error creating incident from activity: {str(e)}")
271
+ return jsonify({'message': f'Error creating incident: {str(e)}'}), 500
controllers/log_controller.py CHANGED
@@ -1,6 +1,7 @@
1
  from flask import jsonify, request
2
  import logging
3
  import os
 
4
  from datetime import datetime
5
  import uuid
6
  import pytesseract
@@ -14,8 +15,7 @@ from models.user import User
14
  from models.department import Department
15
  from models.workflow import Workflow
16
  from models.incident import Incident
17
- from utils.celery_tasks import process_log_document
18
- from utils.pdf_utils import pdf_to_text
19
  from db import get_gridfs
20
  from bson.objectid import ObjectId
21
 
@@ -23,7 +23,7 @@ from bson.objectid import ObjectId
23
  logger = logging.getLogger(__name__)
24
 
25
  def upload_log(current_user):
26
- """Upload a new log file"""
27
  if 'file' not in request.files:
28
  return jsonify({'message': 'No file part'}), 400
29
 
@@ -44,47 +44,31 @@ def upload_log(current_user):
44
  # Parse the date string
45
  log_date = datetime.strptime(log_date_str, '%Y-%m-%d').date()
46
 
47
- # Upload file to GridFS
48
- fs = get_gridfs()
49
- file_id = fs.put(
50
- file.read(),
51
- filename=file.filename,
52
- content_type='application/pdf',
53
- metadata={
54
- 'user_id': str(current_user._id),
55
- 'department_id': str(current_user.department_id),
56
- 'log_date': log_date_str,
57
- 'upload_date': datetime.now()
58
- }
59
- )
60
 
61
- # Create the file URL for retrieval
62
- log_file_url = f"/api/logs/files/{file_id}"
 
63
 
64
- # Create new log entry
65
  log = Log(
66
  user_id=current_user._id,
67
  department_id=current_user.department_id,
68
  log_date=log_date,
69
- log_file=log_file_url
70
  )
71
 
72
  if log.save():
73
- # Start asynchronous processing using Celery
74
- process_log_document.delay(str(log._id))
75
 
76
  return jsonify({
77
- 'message': 'Log uploaded successfully and processing started',
78
- 'log': log.to_dict()
 
79
  }), 201
80
  else:
81
- # Clean up GridFS file if log save fails
82
- try:
83
- fs.delete(file_id)
84
- logger.info(f"Deleted file {file_id} from GridFS after failed log save")
85
- except Exception as del_e:
86
- logger.error(f"Failed to delete GridFS file {file_id} after DB error: {del_e}")
87
-
88
  return jsonify({'message': 'Failed to save log entry'}), 500
89
 
90
  except ValueError:
@@ -93,6 +77,176 @@ def upload_log(current_user):
93
  logger.error(f"Error uploading log: {str(e)}")
94
  return jsonify({'message': f'Error uploading log: {str(e)}'}), 500
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  def get_log(current_user, log_id):
97
  """Get log by ID"""
98
  log = Log.find_by_id(log_id)
@@ -119,17 +273,6 @@ def delete_log(current_user, log_id):
119
  if str(log.user_id) != str(current_user._id) and current_user.permissions != 'Admin':
120
  return jsonify({'message': 'Only the log owner or department admin can delete logs'}), 403
121
 
122
- # Delete the log file from GridFS if it exists
123
- if log.log_file and '/files/' in log.log_file:
124
- try:
125
- # Extract file_id from URL
126
- file_id = log.log_file.split('/')[-1]
127
- fs = get_gridfs()
128
- fs.delete(ObjectId(file_id))
129
- logger.info(f"Deleted file {file_id} from GridFS")
130
- except Exception as e:
131
- logger.error(f"Error deleting log file from GridFS: {str(e)}")
132
-
133
  # Delete associated incidents if they exist
134
  for incident_id in log.incidents:
135
  incident = Incident.find_by_id(incident_id)
@@ -177,4 +320,117 @@ def get_logs_by_date_range(current_user):
177
  return jsonify({'message': 'Invalid date format. Please use YYYY-MM-DD'}), 400
178
  except Exception as e:
179
  logger.error(f"Error fetching logs by date range: {str(e)}")
180
- return jsonify({'message': f'Error fetching logs: {str(e)}'}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from flask import jsonify, request
2
  import logging
3
  import os
4
+ import json
5
  from datetime import datetime
6
  import uuid
7
  import pytesseract
 
15
  from models.department import Department
16
  from models.workflow import Workflow
17
  from models.incident import Incident
18
+ from utils.pdf_utils import pdf_to_text, extract_activities
 
19
  from db import get_gridfs
20
  from bson.objectid import ObjectId
21
 
 
23
  logger = logging.getLogger(__name__)
24
 
25
  def upload_log(current_user):
26
+ """Upload a new log file, extract text using OCR, and save only the text"""
27
  if 'file' not in request.files:
28
  return jsonify({'message': 'No file part'}), 400
29
 
 
44
  # Parse the date string
45
  log_date = datetime.strptime(log_date_str, '%Y-%m-%d').date()
46
 
47
+ # Read the file content
48
+ file_content = file.read()
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ # Extract text from PDF using OCR
51
+ logger.info(f"Extracting text from PDF using OCR")
52
+ extracted_text = pdf_to_text(file_content, is_bytes=True)
53
 
54
+ # Create new log entry with the extracted text instead of file reference
55
  log = Log(
56
  user_id=current_user._id,
57
  department_id=current_user.department_id,
58
  log_date=log_date,
59
+ log_text=extracted_text # Store the extracted text, not a file reference
60
  )
61
 
62
  if log.save():
63
+ # Process log synchronously
64
+ result = process_log_sync(str(log._id))
65
 
66
  return jsonify({
67
+ 'message': 'Log uploaded and processed successfully',
68
+ 'log': log.to_dict(),
69
+ 'incidents_created': result.get('incidents_created', 0)
70
  }), 201
71
  else:
 
 
 
 
 
 
 
72
  return jsonify({'message': 'Failed to save log entry'}), 500
73
 
74
  except ValueError:
 
77
  logger.error(f"Error uploading log: {str(e)}")
78
  return jsonify({'message': f'Error uploading log: {str(e)}'}), 500
79
 
80
+ def process_log_sync(log_id):
81
+ """Process a log document synchronously"""
82
+ try:
83
+ # Check if OpenAI API key is set
84
+ if not os.environ.get('OPENAI_API_KEY'):
85
+ logger.error("OPENAI_API_KEY environment variable is not set")
86
+ return {"status": "error", "message": "OpenAI API key not configured"}
87
+
88
+ # Set OpenAI API key
89
+ openai.api_key = os.environ.get('OPENAI_API_KEY')
90
+
91
+ # Retrieve the log
92
+ log = Log.find_by_id(log_id)
93
+ if not log:
94
+ logger.error(f"Log not found: {log_id}")
95
+ return {"status": "error", "message": "Log not found"}
96
+
97
+ # Use the stored text directly instead of extracting from PDF
98
+ logger.info(f"Using stored text for log {log_id}")
99
+ extracted_text = log.log_text
100
+
101
+ # 2. Extract activities using LLM
102
+ logger.info(f"Extracting activities for log {log_id}")
103
+ activities_json = extract_activities(extracted_text)
104
+
105
+ # Parse the activities JSON
106
+ activities_data = json.loads(activities_json)
107
+ activities = activities_data.get('activities', [])
108
+
109
+ # 3. Classify each activity and create incidents
110
+ logger.info(f"Classifying activities and creating incidents for log {log_id}")
111
+
112
+ # Get all workflows for this department
113
+ workflows = Workflow.find_by_department(log.department_id)
114
+
115
+ # Skip if no workflows defined
116
+ if not workflows:
117
+ logger.warning(f"No workflows defined for department {log.department_id}")
118
+ return {"status": "completed", "message": "No workflows to process", "incidents_created": 0}
119
+
120
+ # Prepare workflow information for classification
121
+ workflow_info = []
122
+ for workflow in workflows:
123
+ workflow_info.append({
124
+ "id": str(workflow._id),
125
+ "title": workflow.title,
126
+ "description": workflow.description
127
+ })
128
+
129
+ # Classify each activity against workflows
130
+ classified_activities = []
131
+ created_incidents = 0
132
+
133
+ for activity in activities:
134
+ # Classify activity against workflows
135
+ workflow_id = classify_activity(activity, workflow_info)
136
+
137
+ # If classified as a workflow, create an incident
138
+ if workflow_id:
139
+ logger.info(f"Creating incident for activity: {activity['activity']}")
140
+
141
+ # Create incident
142
+ incident = Incident(
143
+ department_id=log.department_id,
144
+ user_id=log.user_id,
145
+ workflow_id=ObjectId(workflow_id),
146
+ description=activity['activity'],
147
+ date=log.log_date,
148
+ activity_text=activity['text'],
149
+ log_id=log._id,
150
+ status="completed" # Mark as completed since we're processing synchronously
151
+ )
152
+
153
+ if incident.save():
154
+ # Add incident to log
155
+ log.add_incident(incident._id)
156
+ created_incidents += 1
157
+
158
+ # Add to classified activities
159
+ classified_activities.append({
160
+ "activity": activity,
161
+ "workflow_id": workflow_id,
162
+ "incident_id": str(incident._id)
163
+ })
164
+
165
+ return {
166
+ "status": "completed",
167
+ "message": "Log processing completed",
168
+ "incidents_created": created_incidents,
169
+ "classified_activities": classified_activities
170
+ }
171
+
172
+ except Exception as e:
173
+ logger.error(f"Error processing log {log_id}: {str(e)}")
174
+ return {"status": "error", "message": str(e)}
175
+
176
+ def classify_activity(activity, workflow_info):
177
+ """
178
+ Classify an activity against available workflows
179
+ Returns workflow_id if matched, None otherwise
180
+ """
181
+ try:
182
+ # Check if OpenAI API key is set
183
+ if not os.environ.get('OPENAI_API_KEY'):
184
+ logger.error("OPENAI_API_KEY environment variable is not set")
185
+ return None
186
+
187
+ # Set OpenAI API key
188
+ openai.api_key = os.environ.get('OPENAI_API_KEY')
189
+
190
+ # Prepare prompt for OpenAI
191
+ workflows_text = "\n".join([
192
+ f"Workflow {i+1}: {w['title']} - {w['description']}"
193
+ for i, w in enumerate(workflow_info)
194
+ ])
195
+
196
+ prompt = f"""
197
+ I need to classify a law enforcement activity into one of our defined workflows,
198
+ or determine if it's a routine/mundane activity that doesn't match any workflow.
199
+
200
+ Here are the available workflows:
201
+ {workflows_text}
202
+
203
+ Here is the activity:
204
+ Activity: {activity['activity']}
205
+ Full Text: {activity['text']}
206
+ Time: {activity.get('time', 'Not specified')}
207
+ Location: {activity.get('location', 'Not specified')}
208
+
209
+ Please classify this activity into one of the workflows, or indicate it's mundane.
210
+ Respond with just the workflow ID if it matches, or "mundane" if it doesn't match any workflow.
211
+ """
212
+
213
+ # Call OpenAI API
214
+ response = openai.chat.completions.create(
215
+ model="gpt-4o-mini",
216
+ messages=[
217
+ {"role": "system", "content": "You are a law enforcement activity classifier that matches activities to defined workflows."},
218
+ {"role": "user", "content": prompt}
219
+ ]
220
+ )
221
+
222
+ # Get classification result
223
+ result = response.choices[0].message.content.strip()
224
+
225
+ # Check if result is a workflow ID or "mundane"
226
+ if result == "mundane":
227
+ return None
228
+
229
+ # Find the workflow by ID or index
230
+ for workflow in workflow_info:
231
+ if workflow['id'] in result:
232
+ return workflow['id']
233
+ if workflow['title'] in result:
234
+ return workflow['id']
235
+
236
+ # If we got a number, try to use it as an index
237
+ try:
238
+ index = int(result) - 1
239
+ if 0 <= index < len(workflow_info):
240
+ return workflow_info[index]['id']
241
+ except ValueError:
242
+ pass
243
+
244
+ return None
245
+
246
+ except Exception as e:
247
+ logger.error(f"Error classifying activity: {str(e)}")
248
+ return None
249
+
250
  def get_log(current_user, log_id):
251
  """Get log by ID"""
252
  log = Log.find_by_id(log_id)
 
273
  if str(log.user_id) != str(current_user._id) and current_user.permissions != 'Admin':
274
  return jsonify({'message': 'Only the log owner or department admin can delete logs'}), 403
275
 
 
 
 
 
 
 
 
 
 
 
 
276
  # Delete associated incidents if they exist
277
  for incident_id in log.incidents:
278
  incident = Incident.find_by_id(incident_id)
 
320
  return jsonify({'message': 'Invalid date format. Please use YYYY-MM-DD'}), 400
321
  except Exception as e:
322
  logger.error(f"Error fetching logs by date range: {str(e)}")
323
+ return jsonify({'message': f'Error fetching logs: {str(e)}'}), 500
324
+
325
+ def classify_log_activities(current_user):
326
+ """
327
+ Extract and classify activities from a log file without creating incidents
328
+ Returns classified activities for the frontend to handle
329
+ """
330
+ logger.info(f"classify_log_activities called. Files in request: {request.files.keys()}")
331
+
332
+ if 'file' not in request.files:
333
+ logger.error("No file part in the request")
334
+ return jsonify({'message': 'No file part'}), 400
335
+
336
+ file = request.files['file']
337
+ if file.filename == '':
338
+ logger.error("No selected file")
339
+ return jsonify({'message': 'No selected file'}), 400
340
+
341
+ # Validate file is PDF
342
+ if not file.filename.lower().endswith('.pdf'):
343
+ logger.error(f"Invalid file type: {file.filename}")
344
+ return jsonify({'message': 'Only PDF files are allowed'}), 400
345
+
346
+ try:
347
+ # Check if OpenAI API key is set
348
+ if not os.environ.get('OPENAI_API_KEY'):
349
+ logger.error("OPENAI_API_KEY environment variable is not set")
350
+ return jsonify({'message': 'OpenAI API key not configured'}), 500
351
+
352
+ # Set OpenAI API key
353
+ openai.api_key = os.environ.get('OPENAI_API_KEY')
354
+
355
+ # Read file content
356
+ file_content = file.read()
357
+ logger.info(f"Read {len(file_content)} bytes from file {file.filename}")
358
+
359
+ # Extract text from PDF using OCR
360
+ logger.info(f"Starting OCR for uploaded log")
361
+ extracted_text = pdf_to_text(file_content, is_bytes=True)
362
+ logger.info(f"Extracted {len(extracted_text)} characters of text")
363
+
364
+ # Extract activities using LLM
365
+ logger.info(f"Extracting activities from log")
366
+ activities_json = extract_activities(extracted_text)
367
+
368
+ # Parse the activities JSON
369
+ activities_data = json.loads(activities_json)
370
+ activities = activities_data.get('activities', [])
371
+ logger.info(f"Extracted {len(activities)} activities")
372
+
373
+ # Classify each activity against workflows
374
+ logger.info(f"Classifying activities")
375
+
376
+ # Get all workflows for this department
377
+ workflows = Workflow.find_by_department(current_user.department_id)
378
+
379
+ # Skip if no workflows defined
380
+ if not workflows:
381
+ logger.warning(f"No workflows defined for department {current_user.department_id}")
382
+ return jsonify({
383
+ 'message': 'No workflows to classify against',
384
+ 'activities': activities,
385
+ 'classified_activities': [],
386
+ 'extracted_text': extracted_text # Include the extracted text in the response
387
+ }), 200
388
+
389
+ # Prepare workflow information for classification
390
+ workflow_info = []
391
+ for workflow in workflows:
392
+ workflow_info.append({
393
+ "id": str(workflow._id),
394
+ "title": workflow.title,
395
+ "description": workflow.description
396
+ })
397
+
398
+ # Classify each activity
399
+ classified_activities = []
400
+
401
+ for activity in activities:
402
+ # Classify activity against workflows
403
+ workflow_id = classify_activity(activity, workflow_info)
404
+
405
+ # Add classification result
406
+ if workflow_id:
407
+ workflow = next((w for w in workflow_info if w["id"] == workflow_id), None)
408
+ workflow_title = workflow["title"] if workflow else "Unknown workflow"
409
+
410
+ classified_activities.append({
411
+ "activity": activity,
412
+ "workflow_id": workflow_id,
413
+ "workflow_title": workflow_title,
414
+ "classified": True
415
+ })
416
+ else:
417
+ classified_activities.append({
418
+ "activity": activity,
419
+ "classified": False
420
+ })
421
+
422
+ logger.info(f"Classification complete. {len(classified_activities)} activities classified.")
423
+
424
+ return jsonify({
425
+ 'message': 'Log activities extracted and classified',
426
+ 'activities': activities,
427
+ 'classified_activities': classified_activities,
428
+ 'workflows': workflow_info,
429
+ 'extracted_text': extracted_text # Include the extracted text in the response
430
+ }), 200
431
+
432
+ except Exception as e:
433
+ logger.error(f"Error classifying log activities: {str(e)}")
434
+ import traceback
435
+ logger.error(traceback.format_exc())
436
+ return jsonify({'message': f'Error classifying log activities: {str(e)}'}), 500
controllers/workflow_controller.py CHANGED
@@ -86,6 +86,12 @@ def update_workflow(current_user, workflow_id):
86
  return jsonify({'message': 'Access denied to workflows from other departments'}), 403
87
 
88
  data = request.get_json()
 
 
 
 
 
 
89
 
90
  # Update fields if provided
91
  if 'title' in data:
 
86
  return jsonify({'message': 'Access denied to workflows from other departments'}), 403
87
 
88
  data = request.get_json()
89
+ logger.info(f"Update workflow request data keys: {data.keys() if data else 'None'}")
90
+ if 'markdown_template' in data:
91
+ template_length = len(data['markdown_template']) if data['markdown_template'] else 0
92
+ logger.info(f"Received markdown_template with length: {template_length}")
93
+ if template_length > 0:
94
+ logger.info(f"First 100 chars: {data['markdown_template'][:100]}")
95
 
96
  # Update fields if provided
97
  if 'title' in data:
models/log.py CHANGED
@@ -5,12 +5,13 @@ from models.user import User
5
  from models.department import Department
6
 
7
  class Log:
8
- def __init__(self, user_id, department_id, log_date, log_file, incidents=None,
9
  _id=None, created_at=None, updated_at=None):
10
  self.user_id = user_id
11
  self.department_id = department_id
12
  self.log_date = log_date
13
- self.log_file = log_file # URL/path to the stored log file
 
14
  self.incidents = incidents or [] # Array of incident IDs
15
  self._id = _id
16
  self.created_at = created_at or datetime.now()
@@ -22,7 +23,8 @@ class Log:
22
  "user_id": str(self.user_id) if self.user_id else None,
23
  "department_id": str(self.department_id) if self.department_id else None,
24
  "log_date": self.log_date,
25
- "log_file": self.log_file,
 
26
  "incidents": [str(incident_id) for incident_id in self.incidents],
27
  "created_at": self.created_at,
28
  "updated_at": self.updated_at
 
5
  from models.department import Department
6
 
7
  class Log:
8
+ def __init__(self, user_id, department_id, log_date, log_text=None, log_file=None, incidents=None,
9
  _id=None, created_at=None, updated_at=None):
10
  self.user_id = user_id
11
  self.department_id = department_id
12
  self.log_date = log_date
13
+ self.log_text = log_text # Extracted text content from the log
14
+ self.log_file = log_file # For backward compatibility
15
  self.incidents = incidents or [] # Array of incident IDs
16
  self._id = _id
17
  self.created_at = created_at or datetime.now()
 
23
  "user_id": str(self.user_id) if self.user_id else None,
24
  "department_id": str(self.department_id) if self.department_id else None,
25
  "log_date": self.log_date,
26
+ "log_text": self.log_text,
27
+ "log_file": self.log_file, # Keep for backward compatibility
28
  "incidents": [str(incident_id) for incident_id in self.incidents],
29
  "created_at": self.created_at,
30
  "updated_at": self.updated_at
routes/incident_routes.py CHANGED
@@ -1,8 +1,9 @@
1
- from flask import Blueprint
2
  from controllers.incident_controller import (
3
  get_incident, delete_incident, get_user_incidents,
4
  get_department_incidents, get_workflow_incidents,
5
- get_incidents_by_date_range, reprocess_incident
 
6
  )
7
  from utils.auth import token_required, admin_required
8
 
@@ -15,7 +16,14 @@ incident_bp.route('/date-range', methods=['POST'])(token_required(get_incidents_
15
  incident_bp.route('/<incident_id>', methods=['GET'])(token_required(get_incident))
16
  incident_bp.route('/<incident_id>', methods=['DELETE'])(token_required(delete_incident))
17
  incident_bp.route('/workflow/<workflow_id>', methods=['GET'])(token_required(get_workflow_incidents))
 
18
 
19
  # Routes that require admin permissions
20
  incident_bp.route('/department', methods=['GET'])(admin_required(get_department_incidents))
21
- incident_bp.route('/<incident_id>/reprocess', methods=['POST'])(admin_required(reprocess_incident))
 
 
 
 
 
 
 
1
+ from flask import Blueprint, request
2
  from controllers.incident_controller import (
3
  get_incident, delete_incident, get_user_incidents,
4
  get_department_incidents, get_workflow_incidents,
5
+ get_incidents_by_date_range, reprocess_incident,
6
+ process_incident_sync, create_incident_from_activity
7
  )
8
  from utils.auth import token_required, admin_required
9
 
 
16
  incident_bp.route('/<incident_id>', methods=['GET'])(token_required(get_incident))
17
  incident_bp.route('/<incident_id>', methods=['DELETE'])(token_required(delete_incident))
18
  incident_bp.route('/workflow/<workflow_id>', methods=['GET'])(token_required(get_workflow_incidents))
19
+ incident_bp.route('/create-from-activity', methods=['POST'])(token_required(create_incident_from_activity))
20
 
21
  # Routes that require admin permissions
22
  incident_bp.route('/department', methods=['GET'])(admin_required(get_department_incidents))
23
+ incident_bp.route('/<incident_id>/reprocess', methods=['POST'])(admin_required(reprocess_incident))
24
+
25
+ @incident_bp.route('/<incident_id>/process', methods=['POST'])
26
+ @token_required
27
+ def process_incident_route(current_user, incident_id):
28
+ """Process an incident's form synchronously"""
29
+ return process_incident_sync(current_user, incident_id)
routes/log_routes.py CHANGED
@@ -1,7 +1,7 @@
1
  from flask import Blueprint, send_file, jsonify, request
2
  from controllers.log_controller import (
3
  upload_log, get_log, delete_log, get_user_logs,
4
- get_department_logs, get_logs_by_date_range
5
  )
6
  from utils.auth import token_required, admin_required
7
  from db import get_gridfs
@@ -9,10 +9,17 @@ from bson.objectid import ObjectId
9
  from io import BytesIO
10
 
11
  # Create blueprint
12
- log_bp = Blueprint('log', __name__)
13
 
14
  # Routes that require authentication
15
- log_bp.route('/', methods=['POST'])(token_required(upload_log))
 
 
 
 
 
 
 
16
  log_bp.route('/user', methods=['GET'])(token_required(get_user_logs))
17
  log_bp.route('/date-range', methods=['POST'])(token_required(get_logs_by_date_range))
18
  log_bp.route('/<log_id>', methods=['GET'])(token_required(get_log))
 
1
  from flask import Blueprint, send_file, jsonify, request
2
  from controllers.log_controller import (
3
  upload_log, get_log, delete_log, get_user_logs,
4
+ get_department_logs, get_logs_by_date_range, classify_log_activities
5
  )
6
  from utils.auth import token_required, admin_required
7
  from db import get_gridfs
 
9
  from io import BytesIO
10
 
11
  # Create blueprint
12
+ log_bp = Blueprint('logs', __name__)
13
 
14
  # Routes that require authentication
15
+ log_bp.route('/upload', methods=['POST'])(token_required(upload_log))
16
+
17
+ # Explicitly define the blueprint route with function directly
18
+ @log_bp.route('/classify', methods=['POST'])
19
+ @token_required
20
+ def classify_route(current_user):
21
+ return classify_log_activities(current_user)
22
+
23
  log_bp.route('/user', methods=['GET'])(token_required(get_user_logs))
24
  log_bp.route('/date-range', methods=['POST'])(token_required(get_logs_by_date_range))
25
  log_bp.route('/<log_id>', methods=['GET'])(token_required(get_log))
utils/pdf_utils.py CHANGED
@@ -16,32 +16,60 @@ from datetime import datetime
16
  # Configure logging
17
  logger = logging.getLogger(__name__)
18
 
19
- def pdf_to_text(pdf_url):
20
- """Extract text from a PDF file using OCR"""
21
- try:
22
- # Download the PDF file
23
- response = requests.get(pdf_url)
24
- if response.status_code != 200:
25
- raise Exception(f"Failed to download PDF: HTTP {response.status_code}")
26
-
27
- pdf_bytes = io.BytesIO(response.content)
28
 
29
- # Create a temporary directory for the PDF pages
 
 
 
 
30
  with tempfile.TemporaryDirectory() as temp_dir:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  # Convert PDF to images
32
- pages = pdf2image.convert_from_bytes(
33
- pdf_bytes.read(),
34
- dpi=300,
35
- output_folder=temp_dir
36
- )
 
37
 
38
- # Extract text from each page
39
- text = ""
40
- for i, page in enumerate(pages):
41
- logger.info(f"Processing page {i+1}/{len(pages)}")
42
- text += pytesseract.image_to_string(page) + "\n\n"
43
 
44
- return text
45
  except Exception as e:
46
  logger.error(f"Error extracting text from PDF: {str(e)}")
47
  raise
 
16
  # Configure logging
17
  logger = logging.getLogger(__name__)
18
 
19
+ def pdf_to_text(pdf_source, is_bytes=False):
20
+ """
21
+ Extract text from PDF using OCR
22
+
23
+ Args:
24
+ pdf_source: Either a URL to a PDF or the PDF content as bytes
25
+ is_bytes: Whether pdf_source is bytes (True) or a URL (False)
 
 
26
 
27
+ Returns:
28
+ str: Extracted text from PDF
29
+ """
30
+ try:
31
+ # Set up temporary directory for processing
32
  with tempfile.TemporaryDirectory() as temp_dir:
33
+ if not is_bytes:
34
+ # If pdf_source is a URL, download the PDF first
35
+ if pdf_source.startswith('/api/'):
36
+ # Handle internal URLs by prepending hostname
37
+ pdf_url = f"http://localhost:5000{pdf_source}"
38
+ else:
39
+ pdf_url = pdf_source
40
+
41
+ # Download PDF file
42
+ logger.info(f"Downloading PDF from {pdf_url}")
43
+ response = requests.get(pdf_url)
44
+ if response.status_code != 200:
45
+ logger.error(f"Failed to download PDF: {response.status_code}")
46
+ raise Exception(f"Failed to download PDF: {response.status_code}")
47
+
48
+ # Save PDF to temporary file
49
+ pdf_path = os.path.join(temp_dir, "document.pdf")
50
+ with open(pdf_path, 'wb') as f:
51
+ f.write(response.content)
52
+ else:
53
+ # If pdf_source is already bytes, save directly
54
+ pdf_path = os.path.join(temp_dir, "document.pdf")
55
+ with open(pdf_path, 'wb') as f:
56
+ f.write(pdf_source)
57
+
58
  # Convert PDF to images
59
+ logger.info(f"Converting PDF to images")
60
+ images = pdf2image.convert_from_path(pdf_path)
61
+
62
+ # Extract text from each page with OCR
63
+ logger.info(f"Extracting text with OCR from {len(images)} pages")
64
+ extracted_text = ""
65
 
66
+ for i, image in enumerate(images):
67
+ logger.info(f"Processing page {i+1}/{len(images)}")
68
+ # Use OCR to extract text
69
+ text = pytesseract.image_to_string(image)
70
+ extracted_text += text + "\n\n"
71
 
72
+ return extracted_text
73
  except Exception as e:
74
  logger.error(f"Error extracting text from PDF: {str(e)}")
75
  raise