Spaces:

droov
/

enflow-api

Sleeping

App Files Files Community

dhruv575 commited on Apr 26, 2025

Commit

f6249c8

1 Parent(s): aa62fa3

Proper migration to GridFS

Browse files

Files changed (6) hide show

app.py +1 -14
controllers/log_controller.py +32 -18
controllers/workflow_controller.py +9 -19
migrate_cloudinary_to_gridfs.py +127 -0
requirements.txt +10 -10
routes/log_routes.py +39 -3

app.py CHANGED Viewed

@@ -5,8 +5,6 @@ from dotenv import load_dotenv
 import logging
 from logging.handlers import RotatingFileHandler
 import pymongo
-import cloudinary
-import cloudinary.uploader
 import datetime
 import secrets
@@ -67,7 +65,7 @@ app.logger.info('App secrets configured.')
 # Verify essential environment variables
 missing_vars = []
-required_env_vars = ['MONGO_URI', 'CLOUDINARY_CLOUD_NAME', 'CLOUDINARY_API_KEY', 'CLOUDINARY_API_SECRET', 'OPENAI_API_KEY']
 for var in required_env_vars:
     if not os.environ.get(var):
         missing_vars.append(var)
@@ -88,17 +86,6 @@ except Exception as e:
     app.logger.error(f"Failed to connect to MongoDB: {str(e)}")
     # Depending on severity, you might want to exit or prevent app run
-# Configure Cloudinary
-try:
-    cloudinary.config(
-        cloud_name=os.environ.get('CLOUDINARY_CLOUD_NAME'),
-        api_key=os.environ.get('CLOUDINARY_API_KEY'),
-        api_secret=os.environ.get('CLOUDINARY_API_SECRET')
-    )
-    app.logger.info("Cloudinary configured.")
-except Exception as e:
-    app.logger.error(f"Failed to configure Cloudinary: {str(e)}")
 # Configure OpenAI API key
 if 'OPENAI_API_KEY' in os.environ:
     try:

 import logging
 from logging.handlers import RotatingFileHandler
 import pymongo
 import datetime
 import secrets
 # Verify essential environment variables
 missing_vars = []
+required_env_vars = ['MONGO_URI', 'OPENAI_API_KEY']
 for var in required_env_vars:
     if not os.environ.get(var):
         missing_vars.append(var)
     app.logger.error(f"Failed to connect to MongoDB: {str(e)}")
     # Depending on severity, you might want to exit or prevent app run
 # Configure OpenAI API key
 if 'OPENAI_API_KEY' in os.environ:
     try:

controllers/log_controller.py CHANGED Viewed

@@ -1,9 +1,8 @@
 from flask import jsonify, request
 import logging
 import os
-import cloudinary.uploader
-import uuid
 from datetime import datetime
 import pytesseract
 from PIL import Image
 import pdf2image
@@ -17,6 +16,8 @@ from models.workflow import Workflow
 from models.incident import Incident
 from utils.celery_tasks import process_log_document
 from utils.pdf_utils import pdf_to_text
 # Configure logging
 logger = logging.getLogger(__name__)
@@ -43,17 +44,22 @@ def upload_log(current_user):
         # Parse the date string
         log_date = datetime.strptime(log_date_str, '%Y-%m-%d').date()
-        # Upload file to Cloudinary
-        result = cloudinary.uploader.upload(
-            file,
-            resource_type="raw",
-            folder=f"enflow/logs/{current_user.department_id}/{current_user._id}",
-            public_id=f"{uuid.uuid4()}",
-            format="pdf"
         )
-        # Get the public URL
-        log_file_url = result['secure_url']
         # Create new log entry
         log = Log(
@@ -72,6 +78,13 @@ def upload_log(current_user):
                 'log': log.to_dict()
             }), 201
         else:
             return jsonify({'message': 'Failed to save log entry'}), 500
     except ValueError:
@@ -106,15 +119,16 @@ def delete_log(current_user, log_id):
     if str(log.user_id) != str(current_user._id) and current_user.permissions != 'Admin':
         return jsonify({'message': 'Only the log owner or department admin can delete logs'}), 403
-    # Delete the log file from Cloudinary if it exists
-    if log.log_file and log.log_file.startswith('http'):
         try:
-            # Extract public_id from URL
-            parts = log.log_file.split('/')
-            public_id = parts[-1].split('.')[0]
-            cloudinary.uploader.destroy(public_id)
         except Exception as e:
-            logger.error(f"Error deleting log file from Cloudinary: {str(e)}")
     # Delete associated incidents if they exist
     for incident_id in log.incidents:

 from flask import jsonify, request
 import logging
 import os
 from datetime import datetime
+import uuid
 import pytesseract
 from PIL import Image
 import pdf2image
 from models.incident import Incident
 from utils.celery_tasks import process_log_document
 from utils.pdf_utils import pdf_to_text
+from db import get_gridfs
+from bson.objectid import ObjectId
 # Configure logging
 logger = logging.getLogger(__name__)
         # Parse the date string
         log_date = datetime.strptime(log_date_str, '%Y-%m-%d').date()
+        # Upload file to GridFS
+        fs = get_gridfs()
+        file_id = fs.put(
+            file.read(),
+            filename=file.filename,
+            content_type='application/pdf',
+            metadata={
+                'user_id': str(current_user._id),
+                'department_id': str(current_user.department_id),
+                'log_date': log_date_str,
+                'upload_date': datetime.now()
+            }
         )
+        # Create the file URL for retrieval
+        log_file_url = f"/api/logs/files/{file_id}"
         # Create new log entry
         log = Log(
                 'log': log.to_dict()
             }), 201
         else:
+            # Clean up GridFS file if log save fails
+            try:
+                fs.delete(file_id)
+                logger.info(f"Deleted file {file_id} from GridFS after failed log save")
+            except Exception as del_e:
+                logger.error(f"Failed to delete GridFS file {file_id} after DB error: {del_e}")
             return jsonify({'message': 'Failed to save log entry'}), 500
     except ValueError:
     if str(log.user_id) != str(current_user._id) and current_user.permissions != 'Admin':
         return jsonify({'message': 'Only the log owner or department admin can delete logs'}), 403
+    # Delete the log file from GridFS if it exists
+    if log.log_file and '/files/' in log.log_file:
         try:
+            # Extract file_id from URL
+            file_id = log.log_file.split('/')[-1]
+            fs = get_gridfs()
+            fs.delete(ObjectId(file_id))
+            logger.info(f"Deleted file {file_id} from GridFS")
         except Exception as e:
+            logger.error(f"Error deleting log file from GridFS: {str(e)}")
     # Delete associated incidents if they exist
     for incident_id in log.incidents:

controllers/workflow_controller.py CHANGED Viewed

@@ -3,8 +3,6 @@ from models.workflow import Workflow
 from models.department import Department
 import logging
 import os
-import cloudinary.uploader
-import uuid
 from db import get_gridfs
 from bson.objectid import ObjectId
 from datetime import datetime
@@ -138,15 +136,15 @@ def delete_workflow(current_user, workflow_id):
     # Delete all forms associated with the workflow
     for form_path in workflow.raw_forms:
         try:
-            # If using Cloudinary
-            if form_path.startswith('http'):
-                # Extract public_id from URL - assuming Cloudinary URL format
-                parts = form_path.split('/')
-                public_id = parts[-1].split('.')[0]
-                cloudinary.uploader.destroy(public_id)
-            # If using local storage
-            elif os.path.exists(form_path):
-                os.remove(form_path)
         except Exception as e:
             logger.error(f"Error deleting form {form_path}: {str(e)}")
@@ -333,17 +331,9 @@ def remove_form(current_user, workflow_id):
             file_id = form_url.split('/')[-1]
             # Delete from GridFS
-            from db import get_gridfs
             fs = get_gridfs()
             fs.delete(ObjectId(file_id))
             logger.info(f"Deleted file {file_id} from GridFS")
-        # If using Cloudinary (legacy support)
-        elif form_url.startswith('http'):
-            # Extract public_id from URL - assuming Cloudinary URL format
-            parts = form_url.split('/')
-            public_id = parts[-1].split('.')[0]
-            cloudinary.uploader.destroy(public_id)
-            logger.info(f"Deleted file {public_id} from Cloudinary")
         # Remove form from workflow
         if workflow.remove_form(form_url):

 from models.department import Department
 import logging
 import os
 from db import get_gridfs
 from bson.objectid import ObjectId
 from datetime import datetime
     # Delete all forms associated with the workflow
     for form_path in workflow.raw_forms:
         try:
+            # If it's a GridFS file URL (starts with /api/workflows/{id}/files/{file_id})
+            if '/files/' in form_path:
+                # Extract file_id from URL
+                file_id = form_path.split('/')[-1]
+                # Delete from GridFS
+                fs = get_gridfs()
+                fs.delete(ObjectId(file_id))
+                logger.info(f"Deleted file {file_id} from GridFS")
         except Exception as e:
             logger.error(f"Error deleting form {form_path}: {str(e)}")
             file_id = form_url.split('/')[-1]
             # Delete from GridFS
             fs = get_gridfs()
             fs.delete(ObjectId(file_id))
             logger.info(f"Deleted file {file_id} from GridFS")
         # Remove form from workflow
         if workflow.remove_form(form_url):

migrate_cloudinary_to_gridfs.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import os
+import logging
+import requests
+from urllib.parse import urlparse
+import cloudinary
+import cloudinary.api
+from dotenv import load_dotenv
+from db import get_gridfs, Database, get_workflows_collection
+from models.workflow import Workflow
+from bson.objectid import ObjectId
+# Load environment variables
+load_dotenv()
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Configure Cloudinary
+try:
+    cloudinary.config(
+        cloud_name=os.environ.get('CLOUDINARY_CLOUD_NAME'),
+        api_key=os.environ.get('CLOUDINARY_API_KEY'),
+        api_secret=os.environ.get('CLOUDINARY_API_SECRET')
+    )
+    logger.info("Cloudinary configured.")
+except Exception as e:
+    logger.error(f"Failed to configure Cloudinary: {str(e)}")
+    exit(1)
+def is_cloudinary_url(url):
+    """Check if a URL is a Cloudinary URL"""
+    return url.startswith('http') and 'cloudinary.com' in url
+def extract_cloudinary_public_id(url):
+    """Extract the public ID from a Cloudinary URL"""
+    parsed = urlparse(url)
+    path = parsed.path
+    # Remove the file extension
+    filename = os.path.basename(path)
+    public_id = os.path.splitext(filename)[0]
+    return public_id
+def migrate_cloudinary_to_gridfs():
+    """Migrate all Cloudinary files referenced in workflows to GridFS"""
+    logger.info("Starting migration of Cloudinary files to GridFS")
+    # Initialize database connection
+    db_instance = Database.get_instance()
+    fs = get_gridfs()
+    # Get all workflows
+    workflows = Workflow.get_all()
+    logger.info(f"Found {len(workflows)} workflows to check")
+    migration_count = 0
+    error_count = 0
+    for workflow in workflows:
+        # Check if workflow has Cloudinary forms
+        cloudinary_forms = [form for form in workflow.raw_forms if is_cloudinary_url(form)]
+        if not cloudinary_forms:
+            continue
+        logger.info(f"Workflow {workflow._id} has {len(cloudinary_forms)} Cloudinary forms to migrate")
+        for form_url in cloudinary_forms:
+            try:
+                # Extract public_id from URL
+                public_id = extract_cloudinary_public_id(form_url)
+                logger.info(f"Migrating Cloudinary file {public_id} to GridFS")
+                # Download file from Cloudinary
+                resource = cloudinary.api.resource(public_id)
+                file_url = resource['secure_url']
+                response = requests.get(file_url)
+                if response.status_code != 200:
+                    logger.error(f"Failed to download file from Cloudinary: {response.status_code}")
+                    error_count += 1
+                    continue
+                file_content = response.content
+                # Store in GridFS
+                file_id = fs.put(
+                    file_content,
+                    filename=f"{public_id}.pdf",
+                    content_type='application/pdf',
+                    metadata={
+                        'workflow_id': str(workflow._id),
+                        'migrated_from_cloudinary': True,
+                        'original_url': form_url
+                    }
+                )
+                # Create a URL for retrieving the file
+                gridfs_url = f"/api/workflows/{workflow._id}/files/{file_id}"
+                # Replace Cloudinary URL with GridFS URL in workflow
+                workflow.raw_forms.remove(form_url)
+                workflow.raw_forms.append(gridfs_url)
+                if workflow.save():
+                    logger.info(f"Successfully migrated {form_url} to {gridfs_url}")
+                    migration_count += 1
+                    # Optionally delete from Cloudinary
+                    try:
+                        cloudinary.uploader.destroy(public_id)
+                        logger.info(f"Deleted file {public_id} from Cloudinary")
+                    except Exception as del_e:
+                        logger.warning(f"Could not delete {public_id} from Cloudinary: {str(del_e)}")
+                else:
+                    logger.error(f"Failed to update workflow {workflow._id} with new GridFS URL")
+                    # Clean up the added GridFS file
+                    fs.delete(file_id)
+                    error_count += 1
+            except Exception as e:
+                logger.error(f"Error migrating {form_url}: {str(e)}")
+                error_count += 1
+    logger.info(f"Migration complete: {migration_count} files migrated, {error_count} errors")
+if __name__ == "__main__":
+    migrate_cloudinary_to_gridfs()

requirements.txt CHANGED Viewed

@@ -1,18 +1,18 @@
-Flask==2.2.3
-python-dotenv==1.0.0
-pymongo==4.5.0
-flask-cors==4.0.0
 bcrypt==4.0.1
 PyJWT==2.8.0
 gunicorn==21.2.0
-cloudinary==1.35.0
-openai==1.6.1
 pytesseract==0.3.10
-Pillow==10.1.0
 python-magic==0.4.27
 Flask-RESTful==0.3.10
 Werkzeug==2.2.3
-celery==5.3.4
 redis==5.0.1
-pdf2image==1.16.3
-requests==2.31.0

+Flask==3.0.2
+python-dotenv==1.0.1
+pymongo==4.7.1
+flask-cors==4.0.1
 bcrypt==4.0.1
 PyJWT==2.8.0
 gunicorn==21.2.0
+openai==1.12.0
 pytesseract==0.3.10
+Pillow==10.2.0
 python-magic==0.4.27
 Flask-RESTful==0.3.10
 Werkzeug==2.2.3
+celery==5.3.6
 redis==5.0.1
+pdf2image==1.17.0
+requests==2.31.0
+pytest==8.0.2

routes/log_routes.py CHANGED Viewed

@@ -1,12 +1,15 @@
-from flask import Blueprint
 from controllers.log_controller import (
     upload_log, get_log, delete_log, get_user_logs,
     get_department_logs, get_logs_by_date_range
 )
 from utils.auth import token_required, admin_required
 # Create blueprint
-log_bp = Blueprint('logs', __name__)
 # Routes that require authentication
 log_bp.route('/', methods=['POST'])(token_required(upload_log))
@@ -16,4 +19,37 @@ log_bp.route('/<log_id>', methods=['GET'])(token_required(get_log))
 log_bp.route('/<log_id>', methods=['DELETE'])(token_required(delete_log))
 # Routes that require admin permissions
-log_bp.route('/department', methods=['GET'])(admin_required(get_department_logs))

+from flask import Blueprint, send_file, jsonify
 from controllers.log_controller import (
     upload_log, get_log, delete_log, get_user_logs,
     get_department_logs, get_logs_by_date_range
 )
 from utils.auth import token_required, admin_required
+from db import get_gridfs
+from bson.objectid import ObjectId
+from io import BytesIO
 # Create blueprint
+log_bp = Blueprint('log', __name__)
 # Routes that require authentication
 log_bp.route('/', methods=['POST'])(token_required(upload_log))
 log_bp.route('/<log_id>', methods=['DELETE'])(token_required(delete_log))
 # Routes that require admin permissions
+log_bp.route('/department', methods=['GET'])(admin_required(get_department_logs))
+@log_bp.route('/files/<file_id>', methods=['GET'])
+@token_required
+def get_log_file(current_user, file_id):
+    """Serve a file from GridFS"""
+    try:
+        # Get the file from GridFS
+        fs = get_gridfs()
+        file_obj = fs.get(ObjectId(file_id))
+        if not file_obj:
+            return jsonify({'message': 'File not found'}), 404
+        # Check if user has access to this file (belongs to their department)
+        metadata = file_obj.metadata or {}
+        if 'department_id' in metadata:
+            file_department_id = metadata['department_id']
+            if str(file_department_id) != str(current_user.department_id):
+                return jsonify({'message': 'Access denied to files from other departments'}), 403
+        # Create a response with the file data
+        data = BytesIO(file_obj.read())
+        data.seek(0)
+        return send_file(
+            data,
+            mimetype='application/pdf',
+            as_attachment=False,
+            download_name=file_obj.filename
+        )
+    except Exception as e:
+        return jsonify({'message': f'Error retrieving file: {str(e)}'}), 500