File size: 4,735 Bytes
f6249c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import logging
import requests
from urllib.parse import urlparse
import cloudinary
import cloudinary.api
from dotenv import load_dotenv
from db import get_gridfs, Database, get_workflows_collection
from models.workflow import Workflow
from bson.objectid import ObjectId

# Load environment variables
load_dotenv()

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Configure Cloudinary
try:
    cloudinary.config(
        cloud_name=os.environ.get('CLOUDINARY_CLOUD_NAME'),
        api_key=os.environ.get('CLOUDINARY_API_KEY'),
        api_secret=os.environ.get('CLOUDINARY_API_SECRET')
    )
    logger.info("Cloudinary configured.")
except Exception as e:
    logger.error(f"Failed to configure Cloudinary: {str(e)}")
    exit(1)

def is_cloudinary_url(url):
    """Check if a URL is a Cloudinary URL"""
    return url.startswith('http') and 'cloudinary.com' in url

def extract_cloudinary_public_id(url):
    """Extract the public ID from a Cloudinary URL"""
    parsed = urlparse(url)
    path = parsed.path
    # Remove the file extension
    filename = os.path.basename(path)
    public_id = os.path.splitext(filename)[0]
    return public_id

def migrate_cloudinary_to_gridfs():
    """Migrate all Cloudinary files referenced in workflows to GridFS"""
    logger.info("Starting migration of Cloudinary files to GridFS")
    
    # Initialize database connection
    db_instance = Database.get_instance()
    fs = get_gridfs()
    
    # Get all workflows
    workflows = Workflow.get_all()
    logger.info(f"Found {len(workflows)} workflows to check")
    
    migration_count = 0
    error_count = 0
    
    for workflow in workflows:
        # Check if workflow has Cloudinary forms
        cloudinary_forms = [form for form in workflow.raw_forms if is_cloudinary_url(form)]
        
        if not cloudinary_forms:
            continue
            
        logger.info(f"Workflow {workflow._id} has {len(cloudinary_forms)} Cloudinary forms to migrate")
        
        for form_url in cloudinary_forms:
            try:
                # Extract public_id from URL
                public_id = extract_cloudinary_public_id(form_url)
                logger.info(f"Migrating Cloudinary file {public_id} to GridFS")
                
                # Download file from Cloudinary
                resource = cloudinary.api.resource(public_id)
                file_url = resource['secure_url']
                response = requests.get(file_url)
                if response.status_code != 200:
                    logger.error(f"Failed to download file from Cloudinary: {response.status_code}")
                    error_count += 1
                    continue
                
                file_content = response.content
                
                # Store in GridFS
                file_id = fs.put(
                    file_content,
                    filename=f"{public_id}.pdf",
                    content_type='application/pdf',
                    metadata={
                        'workflow_id': str(workflow._id),
                        'migrated_from_cloudinary': True,
                        'original_url': form_url
                    }
                )
                
                # Create a URL for retrieving the file
                gridfs_url = f"/api/workflows/{workflow._id}/files/{file_id}"
                
                # Replace Cloudinary URL with GridFS URL in workflow
                workflow.raw_forms.remove(form_url)
                workflow.raw_forms.append(gridfs_url)
                
                if workflow.save():
                    logger.info(f"Successfully migrated {form_url} to {gridfs_url}")
                    migration_count += 1
                    
                    # Optionally delete from Cloudinary
                    try:
                        cloudinary.uploader.destroy(public_id)
                        logger.info(f"Deleted file {public_id} from Cloudinary")
                    except Exception as del_e:
                        logger.warning(f"Could not delete {public_id} from Cloudinary: {str(del_e)}")
                else:
                    logger.error(f"Failed to update workflow {workflow._id} with new GridFS URL")
                    # Clean up the added GridFS file
                    fs.delete(file_id)
                    error_count += 1
                    
            except Exception as e:
                logger.error(f"Error migrating {form_url}: {str(e)}")
                error_count += 1
    
    logger.info(f"Migration complete: {migration_count} files migrated, {error_count} errors")

if __name__ == "__main__":
    migrate_cloudinary_to_gridfs()