Spaces:
Runtime error
Runtime error
| from flask import Flask, request, jsonify, send_file | |
| from flask_cors import CORS | |
| import os | |
| import uuid | |
| import logging | |
| import subprocess | |
| import json | |
| import sys | |
| # Create Flask app | |
| app = Flask(__name__) | |
| CORS(app) | |
| # Configure logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.StreamHandler(sys.stdout), | |
| logging.FileHandler('/tmp/mineruapi.log') | |
| ] | |
| ) | |
| logger = logging.getLogger() | |
| # Constants | |
| UPLOAD_FOLDER = '/tmp/pdf_uploads' | |
| OUTPUT_FOLDER = '/tmp/pdf_output' | |
| ALLOWED_EXTENSIONS = {'pdf'} | |
| # Create directories | |
| os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| os.makedirs(OUTPUT_FOLDER, exist_ok=True) | |
| # Function to check file extensions | |
| def allowed_file(filename): | |
| return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS | |
| def index(): | |
| return """ | |
| <html> | |
| <head> | |
| <title>MinerU PDF API</title> | |
| <style> | |
| body { font-family: Arial; max-width: 800px; margin: 0 auto; padding: 20px; } | |
| h1 { color: #2c3e50; } | |
| .info { background: #f8f9fa; padding: 15px; border-radius: 5px; } | |
| </style> | |
| </head> | |
| <body> | |
| <h1>MinerU PDF API</h1> | |
| <div class="info"> | |
| <p>This API service converts PDF files to Markdown and JSON.</p> | |
| <p>Usage: POST a PDF file to /api/convert endpoint.</p> | |
| <p>Example: <code>curl -X POST -F "file=@your_file.pdf" https://marcosremar2-mineruapi.hf.space/api/convert</code></p> | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| def health(): | |
| """Health check endpoint""" | |
| return jsonify({ | |
| "status": "healthy", | |
| "message": "API is running" | |
| }) | |
| def convert_pdf(): | |
| """PDF conversion endpoint""" | |
| try: | |
| # Check if file is in the request | |
| if 'file' not in request.files: | |
| return jsonify({ | |
| "success": False, | |
| "error": "No file part in the request" | |
| }), 400 | |
| file = request.files['file'] | |
| # Check if file is selected | |
| if file.filename == '': | |
| return jsonify({ | |
| "success": False, | |
| "error": "No file selected" | |
| }), 400 | |
| # Check if file is a PDF | |
| if not allowed_file(file.filename): | |
| return jsonify({ | |
| "success": False, | |
| "error": "File must be a PDF" | |
| }), 400 | |
| # Generate session ID | |
| session_id = str(uuid.uuid4()) | |
| session_dir = os.path.join(OUTPUT_FOLDER, session_id) | |
| os.makedirs(session_dir, exist_ok=True) | |
| # Save the uploaded file | |
| filename = os.path.basename(file.filename) | |
| base_filename = os.path.splitext(filename)[0] | |
| input_path = os.path.join(session_dir, filename) | |
| file.save(input_path) | |
| # Create a simple Markdown and JSON from the PDF | |
| # This is a minimal version that should always work | |
| markdown_content = f"# {base_filename}\n\n*PDF file: {filename}*\n\nMinerU PDF processing service is running.\n" | |
| json_content = { | |
| "title": base_filename, | |
| "filename": filename, | |
| "status": "processed", | |
| "service": "MinerU PDF API" | |
| } | |
| # Save the files | |
| markdown_path = os.path.join(session_dir, f"{base_filename}.md") | |
| json_path = os.path.join(session_dir, f"{base_filename}.json") | |
| with open(markdown_path, 'w', encoding='utf-8') as f: | |
| f.write(markdown_content) | |
| with open(json_path, 'w', encoding='utf-8') as f: | |
| json.dump(json_content, f, indent=2) | |
| # Return the response | |
| return jsonify({ | |
| "success": True, | |
| "session_id": session_id, | |
| "message": "PDF file processed successfully", | |
| "markdown_content": markdown_content, | |
| "json_content": json_content, | |
| "markdown_url": f"/download/{session_id}/{base_filename}.md", | |
| "json_url": f"/download/{session_id}/{base_filename}.json" | |
| }) | |
| except Exception as e: | |
| logging.exception("Error in convert_pdf endpoint") | |
| return jsonify({ | |
| "success": False, | |
| "error": str(e) | |
| }), 500 | |
| def download_file(session_id, filename): | |
| """Download generated files""" | |
| # Simple security check | |
| if not all(c.isalnum() or c in ['-', '_', '.'] for c in session_id + filename): | |
| return jsonify({"error": "Invalid filename or session ID"}), 400 | |
| file_path = os.path.join(OUTPUT_FOLDER, session_id, filename) | |
| if os.path.exists(file_path): | |
| return send_file(file_path, as_attachment=True) | |
| else: | |
| return jsonify({"error": "File not found"}), 404 | |
| if __name__ == '__main__': | |
| app.run(host='0.0.0.0', port=7860, debug=False) |