Spaces:

droov
/

enflow-api

Sleeping

App Files Files Community

dhruv575 commited on Apr 26, 2025

Commit

c31b74d

1 Parent(s): f3a4783

hihi

Browse files

Files changed (7) hide show

controllers/incident_controller.py +183 -30
controllers/log_controller.py +299 -43
controllers/workflow_controller.py +6 -0
models/log.py +5 -3
routes/incident_routes.py +11 -3
routes/log_routes.py +10 -3
utils/pdf_utils.py +49 -21

controllers/incident_controller.py CHANGED Viewed

@@ -5,46 +5,128 @@ from models.incident import Incident
 from models.workflow import Workflow
 from models.log import Log
 from utils.celery_tasks import process_incident_forms
 # Configure logging
 logger = logging.getLogger(__name__)
 def get_incident(current_user, incident_id):
     """Get incident by ID"""
-    incident = Incident.find_by_id(incident_id)
-    if not incident:
-        return jsonify({'message': 'Incident not found'}), 404
-    # Check if user has access to this incident
-    if str(incident.department_id) != str(current_user.department_id):
-        return jsonify({'message': 'Access denied to incidents from other departments'}), 403
-    return jsonify({'incident': incident.to_dict()}), 200
 def delete_incident(current_user, incident_id):
-    """Delete an incident"""
-    incident = Incident.find_by_id(incident_id)
-    if not incident:
-        return jsonify({'message': 'Incident not found'}), 404
-    # Check if user has access to this incident
-    if str(incident.department_id) != str(current_user.department_id):
-        return jsonify({'message': 'Access denied to incidents from other departments'}), 403
-    # Additional check: only incident owner or admin can delete
-    if str(incident.user_id) != str(current_user._id) and current_user.permissions != 'Admin':
-        return jsonify({'message': 'Only the incident owner or department admin can delete incidents'}), 403
-    # Delete the incident
-    if incident.delete():
-        return jsonify({'message': 'Incident deleted successfully'}), 200
-    else:
-        return jsonify({'message': 'Failed to delete incident'}), 500
 def get_user_incidents(current_user):
     """Get all incidents for the current user"""
-    incidents = Incident.find_by_user(current_user._id)
-    return jsonify({'incidents': [incident.to_dict() for incident in incidents]}), 200
 def get_department_incidents(current_user):
     """Get all incidents for the user's department"""
@@ -115,4 +197,75 @@ def reprocess_incident(current_user, incident_id):
     except Exception as e:
         logger.error(f"Error reprocessing incident {incident_id}: {str(e)}")
         incident.update_status("failed")
-        return jsonify({'message': f'Error reprocessing incident: {str(e)}'}), 500

 from models.workflow import Workflow
 from models.log import Log
 from utils.celery_tasks import process_incident_forms
+from models.user import User
+from models.department import Department
+from utils.pdf_utils import fill_markdown_form, extract_required_data, save_filled_form
+from db import get_gridfs
+from bson.objectid import ObjectId
+import os
+import openai
 # Configure logging
 logger = logging.getLogger(__name__)
 def get_incident(current_user, incident_id):
     """Get incident by ID"""
+    try:
+        # Find incident by ID
+        incident = Incident.find_by_id(incident_id)
+        if not incident:
+            return jsonify({'message': 'Incident not found'}), 404
+        # Check if user has access to this incident
+        if not current_user.is_admin and str(incident.user_id) != str(current_user._id) and str(incident.department_id) != str(current_user.department_id):
+            return jsonify({'message': 'You do not have permission to access this incident'}), 403
+        # Return incident
+        return jsonify({'incident': incident.to_dict()}), 200
+    except Exception as e:
+        logger.error(f"Error getting incident: {str(e)}")
+        return jsonify({'message': f'Error getting incident: {str(e)}'}), 500
 def delete_incident(current_user, incident_id):
+    """Delete incident by ID"""
+    try:
+        # Find incident by ID
+        incident = Incident.find_by_id(incident_id)
+        if not incident:
+            return jsonify({'message': 'Incident not found'}), 404
+        # Check if user has access to this incident
+        if not current_user.is_admin and str(incident.user_id) != str(current_user._id):
+            return jsonify({'message': 'You do not have permission to delete this incident'}), 403
+        # Delete incident
+        if incident.delete():
+            return jsonify({'message': 'Incident deleted successfully'}), 200
+        else:
+            return jsonify({'message': 'Failed to delete incident'}), 500
+    except Exception as e:
+        logger.error(f"Error deleting incident: {str(e)}")
+        return jsonify({'message': f'Error deleting incident: {str(e)}'}), 500
+def process_incident_sync(current_user, incident_id):
+    """Process an incident synchronously (fill form with LLM)"""
+    try:
+        # Check if OpenAI API key is set
+        if not os.environ.get('OPENAI_API_KEY'):
+            logger.error("OPENAI_API_KEY environment variable is not set")
+            return jsonify({'message': 'OpenAI API key not configured'}), 500
+        # Set OpenAI API key
+        openai.api_key = os.environ.get('OPENAI_API_KEY')
+        # Find incident by ID
+        incident = Incident.find_by_id(incident_id)
+        if not incident:
+            return jsonify({'message': 'Incident not found'}), 404
+        # Check if user has permission to process this incident
+        if not current_user.is_admin and str(incident.user_id) != str(current_user._id) and str(incident.department_id) != str(current_user.department_id):
+            return jsonify({'message': 'You do not have permission to process this incident'}), 403
+        # Get the workflow for this incident
+        workflow = Workflow.find_by_id(incident.workflow_id)
+        if not workflow:
+            return jsonify({'message': 'Workflow not found for this incident'}), 404
+        # Check if workflow has a template
+        if not workflow.markdown_template:
+            return jsonify({'message': 'No form template found for this workflow'}), 404
+        # Extract required data from incident
+        logger.info(f"Extracting required data for incident {incident_id}")
+        required_data = extract_required_data(incident.activity_text, workflow.data_requirements)
+        # Store the extracted data in the incident
+        incident.extracted_data = required_data
+        # Fill markdown form
+        logger.info(f"Filling form for incident {incident_id}")
+        filled_markdown = fill_markdown_form(workflow.markdown_template, required_data, incident.activity_text)
+        # Update incident status
+        incident.status = "completed"
+        if incident.save():
+            return jsonify({
+                'message': 'Incident processed successfully',
+                'incident': incident.to_dict(),
+                'filled_markdown': filled_markdown
+            }), 200
+        else:
+            return jsonify({'message': 'Failed to update incident'}), 500
+    except Exception as e:
+        logger.error(f"Error processing incident {incident_id}: {str(e)}")
+        return jsonify({'message': f'Error processing incident: {str(e)}'}), 500
 def get_user_incidents(current_user):
     """Get all incidents for the current user"""
+    try:
+        # Find incidents by user ID
+        incidents = Incident.find_by_user(current_user._id)
+        # Convert incidents to dict
+        incidents_dict = [incident.to_dict() for incident in incidents]
+        return jsonify({'incidents': incidents_dict}), 200
+    except Exception as e:
+        logger.error(f"Error getting user incidents: {str(e)}")
+        return jsonify({'message': f'Error getting user incidents: {str(e)}'}), 500
 def get_department_incidents(current_user):
     """Get all incidents for the user's department"""
     except Exception as e:
         logger.error(f"Error reprocessing incident {incident_id}: {str(e)}")
         incident.update_status("failed")
+        return jsonify({'message': f'Error reprocessing incident: {str(e)}'}), 500
+def create_incident_from_activity(current_user):
+    """Create an incident from activity data and workflow ID"""
+    try:
+        data = request.get_json()
+        # Check if required fields are present
+        required_fields = ['activity', 'workflow_id', 'date', 'log_text']
+        for field in required_fields:
+            if field not in data:
+                return jsonify({'message': f'Missing required field: {field}'}), 400
+        # Validate date format
+        try:
+            if isinstance(data['date'], str):
+                date = datetime.strptime(data['date'], '%Y-%m-%d').date()
+            else:
+                date = data['date']
+        except ValueError:
+            return jsonify({'message': 'Invalid date format. Please use YYYY-MM-DD'}), 400
+        # Check if workflow exists and user has access
+        workflow = Workflow.find_by_id(data['workflow_id'])
+        if not workflow:
+            return jsonify({'message': 'Workflow not found'}), 404
+        # Check if user has access to this workflow
+        if str(workflow.department_id) != str(current_user.department_id):
+            return jsonify({'message': 'Access denied to workflows from other departments'}), 403
+        # Create log entry first if log_id is not provided
+        log_id = data.get('log_id')
+        if not log_id:
+            log = Log(
+                user_id=current_user._id,
+                department_id=current_user.department_id,
+                log_date=date,
+                log_text=data['log_text']
+            )
+            if log.save():
+                log_id = log._id
+        # Create the incident
+        incident = Incident(
+            department_id=current_user.department_id,
+            user_id=current_user._id,
+            workflow_id=ObjectId(data['workflow_id']),
+            description=data['activity'].get('activity', 'No description'),
+            date=date,
+            activity_text=data['activity'].get('text', ''),
+            log_id=log_id,
+            status="pending"
+        )
+        if incident.save():
+            # Add incident to log if log exists
+            if log_id:
+                log = Log.find_by_id(log_id)
+                if log:
+                    log.add_incident(incident._id)
+            return jsonify({
+                'message': 'Incident created successfully',
+                'incident': incident.to_dict()
+            }), 201
+        else:
+            return jsonify({'message': 'Failed to create incident'}), 500
+    except Exception as e:
+        logger.error(f"Error creating incident from activity: {str(e)}")
+        return jsonify({'message': f'Error creating incident: {str(e)}'}), 500

controllers/log_controller.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from flask import jsonify, request
 import logging
 import os
 from datetime import datetime
 import uuid
 import pytesseract
@@ -14,8 +15,7 @@ from models.user import User
 from models.department import Department
 from models.workflow import Workflow
 from models.incident import Incident
-from utils.celery_tasks import process_log_document
-from utils.pdf_utils import pdf_to_text
 from db import get_gridfs
 from bson.objectid import ObjectId
@@ -23,7 +23,7 @@ from bson.objectid import ObjectId
 logger = logging.getLogger(__name__)
 def upload_log(current_user):
-    """Upload a new log file"""
     if 'file' not in request.files:
         return jsonify({'message': 'No file part'}), 400
@@ -44,47 +44,31 @@ def upload_log(current_user):
         # Parse the date string
         log_date = datetime.strptime(log_date_str, '%Y-%m-%d').date()
-        # Upload file to GridFS
-        fs = get_gridfs()
-        file_id = fs.put(
-            file.read(),
-            filename=file.filename,
-            content_type='application/pdf',
-            metadata={
-                'user_id': str(current_user._id),
-                'department_id': str(current_user.department_id),
-                'log_date': log_date_str,
-                'upload_date': datetime.now()
-            }
-        )
-        # Create the file URL for retrieval
-        log_file_url = f"/api/logs/files/{file_id}"
-        # Create new log entry
         log = Log(
             user_id=current_user._id,
             department_id=current_user.department_id,
             log_date=log_date,
-            log_file=log_file_url
         )
         if log.save():
-            # Start asynchronous processing using Celery
-            process_log_document.delay(str(log._id))
             return jsonify({
-                'message': 'Log uploaded successfully and processing started',
-                'log': log.to_dict()
             }), 201
         else:
-            # Clean up GridFS file if log save fails
-            try:
-                fs.delete(file_id)
-                logger.info(f"Deleted file {file_id} from GridFS after failed log save")
-            except Exception as del_e:
-                logger.error(f"Failed to delete GridFS file {file_id} after DB error: {del_e}")
             return jsonify({'message': 'Failed to save log entry'}), 500
     except ValueError:
@@ -93,6 +77,176 @@ def upload_log(current_user):
         logger.error(f"Error uploading log: {str(e)}")
         return jsonify({'message': f'Error uploading log: {str(e)}'}), 500
 def get_log(current_user, log_id):
     """Get log by ID"""
     log = Log.find_by_id(log_id)
@@ -119,17 +273,6 @@ def delete_log(current_user, log_id):
     if str(log.user_id) != str(current_user._id) and current_user.permissions != 'Admin':
         return jsonify({'message': 'Only the log owner or department admin can delete logs'}), 403
-    # Delete the log file from GridFS if it exists
-    if log.log_file and '/files/' in log.log_file:
-        try:
-            # Extract file_id from URL
-            file_id = log.log_file.split('/')[-1]
-            fs = get_gridfs()
-            fs.delete(ObjectId(file_id))
-            logger.info(f"Deleted file {file_id} from GridFS")
-        except Exception as e:
-            logger.error(f"Error deleting log file from GridFS: {str(e)}")
     # Delete associated incidents if they exist
     for incident_id in log.incidents:
         incident = Incident.find_by_id(incident_id)
@@ -177,4 +320,117 @@ def get_logs_by_date_range(current_user):
         return jsonify({'message': 'Invalid date format. Please use YYYY-MM-DD'}), 400
     except Exception as e:
         logger.error(f"Error fetching logs by date range: {str(e)}")
-        return jsonify({'message': f'Error fetching logs: {str(e)}'}), 500

 from flask import jsonify, request
 import logging
 import os
+import json
 from datetime import datetime
 import uuid
 import pytesseract
 from models.department import Department
 from models.workflow import Workflow
 from models.incident import Incident
+from utils.pdf_utils import pdf_to_text, extract_activities
 from db import get_gridfs
 from bson.objectid import ObjectId
 logger = logging.getLogger(__name__)
 def upload_log(current_user):
+    """Upload a new log file, extract text using OCR, and save only the text"""
     if 'file' not in request.files:
         return jsonify({'message': 'No file part'}), 400
         # Parse the date string
         log_date = datetime.strptime(log_date_str, '%Y-%m-%d').date()
+        # Read the file content
+        file_content = file.read()
+        # Extract text from PDF using OCR
+        logger.info(f"Extracting text from PDF using OCR")
+        extracted_text = pdf_to_text(file_content, is_bytes=True)
+        # Create new log entry with the extracted text instead of file reference
         log = Log(
             user_id=current_user._id,
             department_id=current_user.department_id,
             log_date=log_date,
+            log_text=extracted_text  # Store the extracted text, not a file reference
         )
         if log.save():
+            # Process log synchronously
+            result = process_log_sync(str(log._id))
             return jsonify({
+                'message': 'Log uploaded and processed successfully',
+                'log': log.to_dict(),
+                'incidents_created': result.get('incidents_created', 0)
             }), 201
         else:
             return jsonify({'message': 'Failed to save log entry'}), 500
     except ValueError:
         logger.error(f"Error uploading log: {str(e)}")
         return jsonify({'message': f'Error uploading log: {str(e)}'}), 500
+def process_log_sync(log_id):
+    """Process a log document synchronously"""
+    try:
+        # Check if OpenAI API key is set
+        if not os.environ.get('OPENAI_API_KEY'):
+            logger.error("OPENAI_API_KEY environment variable is not set")
+            return {"status": "error", "message": "OpenAI API key not configured"}
+        # Set OpenAI API key
+        openai.api_key = os.environ.get('OPENAI_API_KEY')
+        # Retrieve the log
+        log = Log.find_by_id(log_id)
+        if not log:
+            logger.error(f"Log not found: {log_id}")
+            return {"status": "error", "message": "Log not found"}
+        # Use the stored text directly instead of extracting from PDF
+        logger.info(f"Using stored text for log {log_id}")
+        extracted_text = log.log_text
+        # 2. Extract activities using LLM
+        logger.info(f"Extracting activities for log {log_id}")
+        activities_json = extract_activities(extracted_text)
+        # Parse the activities JSON
+        activities_data = json.loads(activities_json)
+        activities = activities_data.get('activities', [])
+        # 3. Classify each activity and create incidents
+        logger.info(f"Classifying activities and creating incidents for log {log_id}")
+        # Get all workflows for this department
+        workflows = Workflow.find_by_department(log.department_id)
+        # Skip if no workflows defined
+        if not workflows:
+            logger.warning(f"No workflows defined for department {log.department_id}")
+            return {"status": "completed", "message": "No workflows to process", "incidents_created": 0}
+        # Prepare workflow information for classification
+        workflow_info = []
+        for workflow in workflows:
+            workflow_info.append({
+                "id": str(workflow._id),
+                "title": workflow.title,
+                "description": workflow.description
+            })
+        # Classify each activity against workflows
+        classified_activities = []
+        created_incidents = 0
+        for activity in activities:
+            # Classify activity against workflows
+            workflow_id = classify_activity(activity, workflow_info)
+            # If classified as a workflow, create an incident
+            if workflow_id:
+                logger.info(f"Creating incident for activity: {activity['activity']}")
+                # Create incident
+                incident = Incident(
+                    department_id=log.department_id,
+                    user_id=log.user_id,
+                    workflow_id=ObjectId(workflow_id),
+                    description=activity['activity'],
+                    date=log.log_date,
+                    activity_text=activity['text'],
+                    log_id=log._id,
+                    status="completed"  # Mark as completed since we're processing synchronously
+                )
+                if incident.save():
+                    # Add incident to log
+                    log.add_incident(incident._id)
+                    created_incidents += 1
+                    # Add to classified activities
+                    classified_activities.append({
+                        "activity": activity,
+                        "workflow_id": workflow_id,
+                        "incident_id": str(incident._id)
+                    })
+        return {
+            "status": "completed",
+            "message": "Log processing completed",
+            "incidents_created": created_incidents,
+            "classified_activities": classified_activities
+        }
+    except Exception as e:
+        logger.error(f"Error processing log {log_id}: {str(e)}")
+        return {"status": "error", "message": str(e)}
+def classify_activity(activity, workflow_info):
+    """
+    Classify an activity against available workflows
+    Returns workflow_id if matched, None otherwise
+    """
+    try:
+        # Check if OpenAI API key is set
+        if not os.environ.get('OPENAI_API_KEY'):
+            logger.error("OPENAI_API_KEY environment variable is not set")
+            return None
+        # Set OpenAI API key
+        openai.api_key = os.environ.get('OPENAI_API_KEY')
+        # Prepare prompt for OpenAI
+        workflows_text = "\n".join([
+            f"Workflow {i+1}: {w['title']} - {w['description']}"
+            for i, w in enumerate(workflow_info)
+        ])
+        prompt = f"""
+        I need to classify a law enforcement activity into one of our defined workflows,
+        or determine if it's a routine/mundane activity that doesn't match any workflow.
+        Here are the available workflows:
+        {workflows_text}
+        Here is the activity:
+        Activity: {activity['activity']}
+        Full Text: {activity['text']}
+        Time: {activity.get('time', 'Not specified')}
+        Location: {activity.get('location', 'Not specified')}
+        Please classify this activity into one of the workflows, or indicate it's mundane.
+        Respond with just the workflow ID if it matches, or "mundane" if it doesn't match any workflow.
+        """
+        # Call OpenAI API
+        response = openai.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {"role": "system", "content": "You are a law enforcement activity classifier that matches activities to defined workflows."},
+                {"role": "user", "content": prompt}
+            ]
+        )
+        # Get classification result
+        result = response.choices[0].message.content.strip()
+        # Check if result is a workflow ID or "mundane"
+        if result == "mundane":
+            return None
+        # Find the workflow by ID or index
+        for workflow in workflow_info:
+            if workflow['id'] in result:
+                return workflow['id']
+            if workflow['title'] in result:
+                return workflow['id']
+        # If we got a number, try to use it as an index
+        try:
+            index = int(result) - 1
+            if 0 <= index < len(workflow_info):
+                return workflow_info[index]['id']
+        except ValueError:
+            pass
+        return None
+    except Exception as e:
+        logger.error(f"Error classifying activity: {str(e)}")
+        return None
 def get_log(current_user, log_id):
     """Get log by ID"""
     log = Log.find_by_id(log_id)
     if str(log.user_id) != str(current_user._id) and current_user.permissions != 'Admin':
         return jsonify({'message': 'Only the log owner or department admin can delete logs'}), 403
     # Delete associated incidents if they exist
     for incident_id in log.incidents:
         incident = Incident.find_by_id(incident_id)
         return jsonify({'message': 'Invalid date format. Please use YYYY-MM-DD'}), 400
     except Exception as e:
         logger.error(f"Error fetching logs by date range: {str(e)}")
+        return jsonify({'message': f'Error fetching logs: {str(e)}'}), 500
+def classify_log_activities(current_user):
+    """
+    Extract and classify activities from a log file without creating incidents
+    Returns classified activities for the frontend to handle
+    """
+    logger.info(f"classify_log_activities called. Files in request: {request.files.keys()}")
+    if 'file' not in request.files:
+        logger.error("No file part in the request")
+        return jsonify({'message': 'No file part'}), 400
+    file = request.files['file']
+    if file.filename == '':
+        logger.error("No selected file")
+        return jsonify({'message': 'No selected file'}), 400
+    # Validate file is PDF
+    if not file.filename.lower().endswith('.pdf'):
+        logger.error(f"Invalid file type: {file.filename}")
+        return jsonify({'message': 'Only PDF files are allowed'}), 400
+    try:
+        # Check if OpenAI API key is set
+        if not os.environ.get('OPENAI_API_KEY'):
+            logger.error("OPENAI_API_KEY environment variable is not set")
+            return jsonify({'message': 'OpenAI API key not configured'}), 500
+        # Set OpenAI API key
+        openai.api_key = os.environ.get('OPENAI_API_KEY')
+        # Read file content
+        file_content = file.read()
+        logger.info(f"Read {len(file_content)} bytes from file {file.filename}")
+        # Extract text from PDF using OCR
+        logger.info(f"Starting OCR for uploaded log")
+        extracted_text = pdf_to_text(file_content, is_bytes=True)
+        logger.info(f"Extracted {len(extracted_text)} characters of text")
+        # Extract activities using LLM
+        logger.info(f"Extracting activities from log")
+        activities_json = extract_activities(extracted_text)
+        # Parse the activities JSON
+        activities_data = json.loads(activities_json)
+        activities = activities_data.get('activities', [])
+        logger.info(f"Extracted {len(activities)} activities")
+        # Classify each activity against workflows
+        logger.info(f"Classifying activities")
+        # Get all workflows for this department
+        workflows = Workflow.find_by_department(current_user.department_id)
+        # Skip if no workflows defined
+        if not workflows:
+            logger.warning(f"No workflows defined for department {current_user.department_id}")
+            return jsonify({
+                'message': 'No workflows to classify against',
+                'activities': activities,
+                'classified_activities': [],
+                'extracted_text': extracted_text  # Include the extracted text in the response
+            }), 200
+        # Prepare workflow information for classification
+        workflow_info = []
+        for workflow in workflows:
+            workflow_info.append({
+                "id": str(workflow._id),
+                "title": workflow.title,
+                "description": workflow.description
+            })
+        # Classify each activity
+        classified_activities = []
+        for activity in activities:
+            # Classify activity against workflows
+            workflow_id = classify_activity(activity, workflow_info)
+            # Add classification result
+            if workflow_id:
+                workflow = next((w for w in workflow_info if w["id"] == workflow_id), None)
+                workflow_title = workflow["title"] if workflow else "Unknown workflow"
+                classified_activities.append({
+                    "activity": activity,
+                    "workflow_id": workflow_id,
+                    "workflow_title": workflow_title,
+                    "classified": True
+                })
+            else:
+                classified_activities.append({
+                    "activity": activity,
+                    "classified": False
+                })
+        logger.info(f"Classification complete. {len(classified_activities)} activities classified.")
+        return jsonify({
+            'message': 'Log activities extracted and classified',
+            'activities': activities,
+            'classified_activities': classified_activities,
+            'workflows': workflow_info,
+            'extracted_text': extracted_text  # Include the extracted text in the response
+        }), 200
+    except Exception as e:
+        logger.error(f"Error classifying log activities: {str(e)}")
+        import traceback
+        logger.error(traceback.format_exc())
+        return jsonify({'message': f'Error classifying log activities: {str(e)}'}), 500

controllers/workflow_controller.py CHANGED Viewed

@@ -86,6 +86,12 @@ def update_workflow(current_user, workflow_id):
         return jsonify({'message': 'Access denied to workflows from other departments'}), 403
     data = request.get_json()
     # Update fields if provided
     if 'title' in data:

         return jsonify({'message': 'Access denied to workflows from other departments'}), 403
     data = request.get_json()
+    logger.info(f"Update workflow request data keys: {data.keys() if data else 'None'}")
+    if 'markdown_template' in data:
+        template_length = len(data['markdown_template']) if data['markdown_template'] else 0
+        logger.info(f"Received markdown_template with length: {template_length}")
+        if template_length > 0:
+            logger.info(f"First 100 chars: {data['markdown_template'][:100]}")
     # Update fields if provided
     if 'title' in data:

models/log.py CHANGED Viewed

@@ -5,12 +5,13 @@ from models.user import User
 from models.department import Department
 class Log:
-    def __init__(self, user_id, department_id, log_date, log_file, incidents=None,
                  _id=None, created_at=None, updated_at=None):
         self.user_id = user_id
         self.department_id = department_id
         self.log_date = log_date
-        self.log_file = log_file  # URL/path to the stored log file
         self.incidents = incidents or []  # Array of incident IDs
         self._id = _id
         self.created_at = created_at or datetime.now()
@@ -22,7 +23,8 @@ class Log:
             "user_id": str(self.user_id) if self.user_id else None,
             "department_id": str(self.department_id) if self.department_id else None,
             "log_date": self.log_date,
-            "log_file": self.log_file,
             "incidents": [str(incident_id) for incident_id in self.incidents],
             "created_at": self.created_at,
             "updated_at": self.updated_at

 from models.department import Department
 class Log:
+    def __init__(self, user_id, department_id, log_date, log_text=None, log_file=None, incidents=None,
                  _id=None, created_at=None, updated_at=None):
         self.user_id = user_id
         self.department_id = department_id
         self.log_date = log_date
+        self.log_text = log_text  # Extracted text content from the log
+        self.log_file = log_file  # For backward compatibility
         self.incidents = incidents or []  # Array of incident IDs
         self._id = _id
         self.created_at = created_at or datetime.now()
             "user_id": str(self.user_id) if self.user_id else None,
             "department_id": str(self.department_id) if self.department_id else None,
             "log_date": self.log_date,
+            "log_text": self.log_text,
+            "log_file": self.log_file,  # Keep for backward compatibility
             "incidents": [str(incident_id) for incident_id in self.incidents],
             "created_at": self.created_at,
             "updated_at": self.updated_at

routes/incident_routes.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from flask import Blueprint
 from controllers.incident_controller import (
     get_incident, delete_incident, get_user_incidents,
     get_department_incidents, get_workflow_incidents,
-    get_incidents_by_date_range, reprocess_incident
 )
 from utils.auth import token_required, admin_required
@@ -15,7 +16,14 @@ incident_bp.route('/date-range', methods=['POST'])(token_required(get_incidents_
 incident_bp.route('/<incident_id>', methods=['GET'])(token_required(get_incident))
 incident_bp.route('/<incident_id>', methods=['DELETE'])(token_required(delete_incident))
 incident_bp.route('/workflow/<workflow_id>', methods=['GET'])(token_required(get_workflow_incidents))
 # Routes that require admin permissions
 incident_bp.route('/department', methods=['GET'])(admin_required(get_department_incidents))
-incident_bp.route('/<incident_id>/reprocess', methods=['POST'])(admin_required(reprocess_incident))

+from flask import Blueprint, request
 from controllers.incident_controller import (
     get_incident, delete_incident, get_user_incidents,
     get_department_incidents, get_workflow_incidents,
+    get_incidents_by_date_range, reprocess_incident,
+    process_incident_sync, create_incident_from_activity
 )
 from utils.auth import token_required, admin_required
 incident_bp.route('/<incident_id>', methods=['GET'])(token_required(get_incident))
 incident_bp.route('/<incident_id>', methods=['DELETE'])(token_required(delete_incident))
 incident_bp.route('/workflow/<workflow_id>', methods=['GET'])(token_required(get_workflow_incidents))
+incident_bp.route('/create-from-activity', methods=['POST'])(token_required(create_incident_from_activity))
 # Routes that require admin permissions
 incident_bp.route('/department', methods=['GET'])(admin_required(get_department_incidents))
+incident_bp.route('/<incident_id>/reprocess', methods=['POST'])(admin_required(reprocess_incident))
+@incident_bp.route('/<incident_id>/process', methods=['POST'])
+@token_required
+def process_incident_route(current_user, incident_id):
+    """Process an incident's form synchronously"""
+    return process_incident_sync(current_user, incident_id)

routes/log_routes.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from flask import Blueprint, send_file, jsonify, request
 from controllers.log_controller import (
     upload_log, get_log, delete_log, get_user_logs,
-    get_department_logs, get_logs_by_date_range
 )
 from utils.auth import token_required, admin_required
 from db import get_gridfs
@@ -9,10 +9,17 @@ from bson.objectid import ObjectId
 from io import BytesIO
 # Create blueprint
-log_bp = Blueprint('log', __name__)
 # Routes that require authentication
-log_bp.route('/', methods=['POST'])(token_required(upload_log))
 log_bp.route('/user', methods=['GET'])(token_required(get_user_logs))
 log_bp.route('/date-range', methods=['POST'])(token_required(get_logs_by_date_range))
 log_bp.route('/<log_id>', methods=['GET'])(token_required(get_log))

 from flask import Blueprint, send_file, jsonify, request
 from controllers.log_controller import (
     upload_log, get_log, delete_log, get_user_logs,
+    get_department_logs, get_logs_by_date_range, classify_log_activities
 )
 from utils.auth import token_required, admin_required
 from db import get_gridfs
 from io import BytesIO
 # Create blueprint
+log_bp = Blueprint('logs', __name__)
 # Routes that require authentication
+log_bp.route('/upload', methods=['POST'])(token_required(upload_log))
+# Explicitly define the blueprint route with function directly
+@log_bp.route('/classify', methods=['POST'])
+@token_required
+def classify_route(current_user):
+    return classify_log_activities(current_user)
 log_bp.route('/user', methods=['GET'])(token_required(get_user_logs))
 log_bp.route('/date-range', methods=['POST'])(token_required(get_logs_by_date_range))
 log_bp.route('/<log_id>', methods=['GET'])(token_required(get_log))

utils/pdf_utils.py CHANGED Viewed

@@ -16,32 +16,60 @@ from datetime import datetime
 # Configure logging
 logger = logging.getLogger(__name__)
-def pdf_to_text(pdf_url):
-    """Extract text from a PDF file using OCR"""
-    try:
-        # Download the PDF file
-        response = requests.get(pdf_url)
-        if response.status_code != 200:
-            raise Exception(f"Failed to download PDF: HTTP {response.status_code}")
-        pdf_bytes = io.BytesIO(response.content)
-        # Create a temporary directory for the PDF pages
         with tempfile.TemporaryDirectory() as temp_dir:
             # Convert PDF to images
-            pages = pdf2image.convert_from_bytes(
-                pdf_bytes.read(),
-                dpi=300,
-                output_folder=temp_dir
-            )
-            # Extract text from each page
-            text = ""
-            for i, page in enumerate(pages):
-                logger.info(f"Processing page {i+1}/{len(pages)}")
-                text += pytesseract.image_to_string(page) + "\n\n"
-            return text
     except Exception as e:
         logger.error(f"Error extracting text from PDF: {str(e)}")
         raise

 # Configure logging
 logger = logging.getLogger(__name__)
+def pdf_to_text(pdf_source, is_bytes=False):
+    """
+    Extract text from PDF using OCR
+    Args:
+        pdf_source: Either a URL to a PDF or the PDF content as bytes
+        is_bytes: Whether pdf_source is bytes (True) or a URL (False)
+    Returns:
+        str: Extracted text from PDF
+    """
+    try:
+        # Set up temporary directory for processing
         with tempfile.TemporaryDirectory() as temp_dir:
+            if not is_bytes:
+                # If pdf_source is a URL, download the PDF first
+                if pdf_source.startswith('/api/'):
+                    # Handle internal URLs by prepending hostname
+                    pdf_url = f"http://localhost:5000{pdf_source}"
+                else:
+                    pdf_url = pdf_source
+                # Download PDF file
+                logger.info(f"Downloading PDF from {pdf_url}")
+                response = requests.get(pdf_url)
+                if response.status_code != 200:
+                    logger.error(f"Failed to download PDF: {response.status_code}")
+                    raise Exception(f"Failed to download PDF: {response.status_code}")
+                # Save PDF to temporary file
+                pdf_path = os.path.join(temp_dir, "document.pdf")
+                with open(pdf_path, 'wb') as f:
+                    f.write(response.content)
+            else:
+                # If pdf_source is already bytes, save directly
+                pdf_path = os.path.join(temp_dir, "document.pdf")
+                with open(pdf_path, 'wb') as f:
+                    f.write(pdf_source)
             # Convert PDF to images
+            logger.info(f"Converting PDF to images")
+            images = pdf2image.convert_from_path(pdf_path)
+            # Extract text from each page with OCR
+            logger.info(f"Extracting text with OCR from {len(images)} pages")
+            extracted_text = ""
+            for i, image in enumerate(images):
+                logger.info(f"Processing page {i+1}/{len(images)}")
+                # Use OCR to extract text
+                text = pytesseract.image_to_string(image)
+                extracted_text += text + "\n\n"
+            return extracted_text
     except Exception as e:
         logger.error(f"Error extracting text from PDF: {str(e)}")
         raise