|
|
from flask import Flask, request, jsonify, send_file |
|
|
from flask_cors import CORS |
|
|
import os |
|
|
import traceback |
|
|
from io import BytesIO |
|
|
import sys |
|
|
|
|
|
|
|
|
try: |
|
|
from gemini_client import GeminiClient |
|
|
from document_converter import DocumentConverter |
|
|
from latex_processor import OptimizedLaTeXProcessor as LaTeXProcessor |
|
|
|
|
|
except ImportError as e: |
|
|
print(f"CRITICAL IMPORT ERROR: {str(e)}", file=sys.stderr) |
|
|
traceback.print_exc() |
|
|
raise |
|
|
|
|
|
app = Flask(__name__) |
|
|
CORS(app) |
|
|
|
|
|
|
|
|
try: |
|
|
api_key = os.getenv('GEMINI_API_KEY') |
|
|
if not api_key: |
|
|
print("ERROR: GEMINI_API_KEY not found in environment!", file=sys.stderr) |
|
|
gemini_client = None |
|
|
else: |
|
|
print(f"Initializing Gemini with API key: {api_key[:10]}...", file=sys.stderr) |
|
|
gemini_client = GeminiClient(api_key=api_key) |
|
|
print("Gemini client initialized successfully", file=sys.stderr) |
|
|
|
|
|
latex_processor = LaTeXProcessor() |
|
|
doc_converter = DocumentConverter() |
|
|
print("All services initialized successfully", file=sys.stderr) |
|
|
except Exception as e: |
|
|
print(f"CRITICAL INITIALIZATION ERROR: {str(e)}", file=sys.stderr) |
|
|
traceback.print_exc() |
|
|
gemini_client = None |
|
|
|
|
|
|
|
|
@app.route('/health', methods=['GET']) |
|
|
def health_check(): |
|
|
"""Health check endpoint""" |
|
|
return jsonify({ |
|
|
'status': 'healthy' if gemini_client else 'degraded', |
|
|
'service': 'LaTeX Document Enhancement API', |
|
|
'version': '2.0.0', |
|
|
'gemini_available': gemini_client is not None, |
|
|
'api_key_set': bool(os.getenv('GEMINI_API_KEY')), |
|
|
'features': ['latex_output', 'equation_formatting', 'multi_format_support'] |
|
|
}) |
|
|
|
|
|
|
|
|
@app.route('/enhance', methods=['POST']) |
|
|
def enhance_document(): |
|
|
""" |
|
|
Enhance document with AI and LaTeX support |
|
|
|
|
|
Expected form data: |
|
|
- file: Document file (.docx, .pdf, or .txt) |
|
|
- prompt: (optional) User's enhancement instructions |
|
|
- doc_type: (optional) Document type hint (auto, academic, technical, business) |
|
|
""" |
|
|
try: |
|
|
print("=== ENHANCE REQUEST STARTED ===", file=sys.stderr) |
|
|
|
|
|
|
|
|
if not gemini_client: |
|
|
print("ERROR: Gemini client not initialized", file=sys.stderr) |
|
|
return jsonify({ |
|
|
'error': 'AI service not available. Please check API key configuration.' |
|
|
}), 503 |
|
|
|
|
|
|
|
|
if 'file' not in request.files: |
|
|
print("ERROR: No file in request", file=sys.stderr) |
|
|
return jsonify({'error': 'No file provided'}), 400 |
|
|
|
|
|
file = request.files['file'] |
|
|
if file.filename == '': |
|
|
print("ERROR: Empty filename", file=sys.stderr) |
|
|
return jsonify({'error': 'Empty filename'}), 400 |
|
|
|
|
|
print(f"Processing file: {file.filename}", file=sys.stderr) |
|
|
|
|
|
|
|
|
user_prompt = request.args.get('prompt', request.form.get('prompt', '')) |
|
|
doc_type = request.args.get('doc_type', request.form.get('doc_type', 'auto')) |
|
|
|
|
|
print(f"User prompt: {user_prompt[:100] if user_prompt else 'None'}", file=sys.stderr) |
|
|
print(f"Doc type: {doc_type}", file=sys.stderr) |
|
|
|
|
|
|
|
|
file_ext = os.path.splitext(file.filename)[1].lower() |
|
|
if file_ext not in ['.docx', '.pdf', '.txt', '.doc']: |
|
|
print(f"ERROR: Unsupported format: {file_ext}", file=sys.stderr) |
|
|
return jsonify({'error': f'Unsupported file format: {file_ext}. Please use .docx, .pdf, or .txt'}), 400 |
|
|
|
|
|
|
|
|
print("Reading file content...", file=sys.stderr) |
|
|
file_content = file.read() |
|
|
print(f"File size: {len(file_content)} bytes", file=sys.stderr) |
|
|
|
|
|
|
|
|
print("Extracting text...", file=sys.stderr) |
|
|
extracted_text = doc_converter.extract_text(file_content, file_ext) |
|
|
print(f"Extracted text length: {len(extracted_text) if extracted_text else 0} characters", file=sys.stderr) |
|
|
|
|
|
if not extracted_text or len(extracted_text.strip()) < 1: |
|
|
print(f"ERROR: Text extraction failed for {file.filename}", file=sys.stderr) |
|
|
return jsonify({'error': 'Could not extract text from document. Please ensure the file contains readable text.'}), 400 |
|
|
|
|
|
|
|
|
print("Detecting mathematical content...", file=sys.stderr) |
|
|
has_math = latex_processor.detect_mathematical_content(extracted_text) |
|
|
print(f"Has math content: {has_math}", file=sys.stderr) |
|
|
|
|
|
|
|
|
print("Calling Gemini API with LaTeX-focused prompt...", file=sys.stderr) |
|
|
try: |
|
|
enhanced_content = gemini_client.enhance_with_latex_output( |
|
|
content=extracted_text, |
|
|
instructions=user_prompt, |
|
|
doc_type=doc_type, |
|
|
include_latex=True |
|
|
) |
|
|
print(f"Enhanced content received: {len(enhanced_content)} characters", file=sys.stderr) |
|
|
except Exception as gemini_error: |
|
|
print(f"GEMINI API ERROR: {str(gemini_error)}", file=sys.stderr) |
|
|
traceback.print_exc() |
|
|
return jsonify({ |
|
|
'error': 'AI enhancement failed. Please try again.', |
|
|
'details': str(gemini_error) if os.getenv('FLASK_ENV') == 'development' else None |
|
|
}), 500 |
|
|
|
|
|
|
|
|
print("Processing LaTeX content...", file=sys.stderr) |
|
|
latex_result = latex_processor.process_latex_content(enhanced_content) |
|
|
processed_content = latex_result.get('cleaned_content', enhanced_content) |
|
|
equations = latex_result.get('equations', []) |
|
|
|
|
|
|
|
|
if equations: |
|
|
print(f"Extracted {len(equations)} equations from enhanced content", file=sys.stderr) |
|
|
for i, eq in enumerate(equations): |
|
|
if not eq.get('valid', True): |
|
|
print(f"WARNING: Equation {i+1} validation issue: {eq.get('error')}", file=sys.stderr) |
|
|
else: |
|
|
print(f"Equation {i+1}: {eq.get('type')} - {eq.get('latex', '')[:50]}...", file=sys.stderr) |
|
|
|
|
|
|
|
|
print("Creating output document...", file=sys.stderr) |
|
|
|
|
|
|
|
|
output_format = '.docx' |
|
|
|
|
|
|
|
|
output_file = doc_converter.create_document( |
|
|
content=processed_content, |
|
|
original_format=file_ext, |
|
|
output_format=output_format, |
|
|
include_latex=True |
|
|
) |
|
|
|
|
|
print(f"Output file size: {len(output_file)} bytes", file=sys.stderr) |
|
|
|
|
|
|
|
|
output_buffer = BytesIO(output_file) |
|
|
output_buffer.seek(0) |
|
|
|
|
|
|
|
|
base_name = os.path.splitext(file.filename)[0] |
|
|
output_filename = f"enhanced_{base_name}{output_format}" |
|
|
|
|
|
print(f"Sending file: {output_filename}", file=sys.stderr) |
|
|
print("=== ENHANCE REQUEST COMPLETED SUCCESSFULLY ===", file=sys.stderr) |
|
|
|
|
|
return send_file( |
|
|
output_buffer, |
|
|
mimetype=( |
|
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document' |
|
|
if output_format == '.docx' else |
|
|
'application/pdf' |
|
|
), |
|
|
as_attachment=True, |
|
|
download_name=output_filename |
|
|
) |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
print(f"=== ERROR IN ENHANCE REQUEST ===", file=sys.stderr) |
|
|
print(f"Error: {str(e)}", file=sys.stderr) |
|
|
print(traceback.format_exc(), file=sys.stderr) |
|
|
|
|
|
|
|
|
return jsonify({ |
|
|
'error': 'Failed to process document. Please try again.', |
|
|
'details': str(e) if os.getenv('FLASK_ENV') == 'development' else None |
|
|
}), 500 |
|
|
|
|
|
|
|
|
@app.route('/preview', methods=['POST']) |
|
|
def preview_latex(): |
|
|
""" |
|
|
Preview LaTeX equations extracted from document |
|
|
|
|
|
Expected form data: |
|
|
- file: Document file |
|
|
|
|
|
Returns: |
|
|
- JSON with extracted LaTeX equations |
|
|
""" |
|
|
try: |
|
|
if 'file' not in request.files: |
|
|
return jsonify({'error': 'No file provided'}), 400 |
|
|
|
|
|
file = request.files['file'] |
|
|
file_ext = os.path.splitext(file.filename)[1].lower() |
|
|
file_content = file.read() |
|
|
|
|
|
|
|
|
extracted_text = doc_converter.extract_text(file_content, file_ext) |
|
|
|
|
|
|
|
|
has_math = latex_processor.detect_mathematical_content(extracted_text) |
|
|
|
|
|
|
|
|
equations_list = latex_processor.extract_latex_equations(extracted_text) |
|
|
|
|
|
return jsonify({ |
|
|
'filename': file.filename, |
|
|
'has_math': has_math, |
|
|
'equation_count': len(equations_list), |
|
|
'equations': [{'type': eq.get('type'), 'content': eq.get('latex')} for eq in equations_list], |
|
|
'text_preview': extracted_text[:500] + '...' if len(extracted_text) > 500 else extracted_text |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error in preview: {str(e)}", file=sys.stderr) |
|
|
return jsonify({'error': str(e)}), 500 |
|
|
|
|
|
|
|
|
@app.route('/add-signature', methods=['POST']) |
|
|
def add_signature(): |
|
|
"""Add digital signature to document""" |
|
|
try: |
|
|
print("=== SIGNATURE REQUEST STARTED ===", file=sys.stderr) |
|
|
|
|
|
if 'file' not in request.files: |
|
|
return jsonify({'error': 'No file provided'}), 400 |
|
|
|
|
|
file = request.files['file'] |
|
|
if file.filename == '': |
|
|
return jsonify({'error': 'Empty filename'}), 400 |
|
|
|
|
|
signature_data = request.form.get('signature') |
|
|
if not signature_data: |
|
|
return jsonify({'error': 'No signature provided'}), 400 |
|
|
|
|
|
position = request.form.get('position', 'bottom-right') |
|
|
signer_name = request.form.get('signer_name') |
|
|
|
|
|
print(f"Adding signature to: {file.filename}", file=sys.stderr) |
|
|
|
|
|
file_content = file.read() |
|
|
|
|
|
signed_doc = doc_converter.add_signature( |
|
|
file_content=file_content, |
|
|
signature_data=signature_data, |
|
|
position=position, |
|
|
signer_name=signer_name |
|
|
) |
|
|
|
|
|
output_buffer = BytesIO(signed_doc) |
|
|
output_buffer.seek(0) |
|
|
|
|
|
base_name = os.path.splitext(file.filename)[0] |
|
|
output_filename = f"Signed_{base_name}.docx" |
|
|
|
|
|
print(f"Signature added successfully: {output_filename}", file=sys.stderr) |
|
|
|
|
|
return send_file( |
|
|
output_buffer, |
|
|
mimetype='application/vnd.openxmlformats-officedocument.wordprocessingml.document', |
|
|
as_attachment=True, |
|
|
download_name=output_filename |
|
|
) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"=== ERROR IN SIGNATURE REQUEST ===", file=sys.stderr) |
|
|
print(f"Error: {str(e)}", file=sys.stderr) |
|
|
print(traceback.format_exc(), file=sys.stderr) |
|
|
return jsonify({ |
|
|
'error': 'Failed to sign document', |
|
|
'details': str(e) if os.getenv('FLASK_ENV') == 'development' else None |
|
|
}), 500 |
|
|
|
|
|
|
|
|
@app.route('/', methods=['GET']) |
|
|
def index(): |
|
|
"""Root endpoint with API information""" |
|
|
return jsonify({ |
|
|
'name': 'LaTeX Document Enhancement API', |
|
|
'version': '2.0.0', |
|
|
'description': 'AI-powered document enhancement with LaTeX support using Google Gemini', |
|
|
'status': 'operational' if gemini_client else 'degraded', |
|
|
'endpoints': { |
|
|
'/': 'API information (GET)', |
|
|
'/health': 'Health check (GET)', |
|
|
'/enhance': 'Enhance document with LaTeX (POST with file)', |
|
|
'/preview': 'Preview LaTeX equations (POST with file)', |
|
|
'/add-signature': 'Add signature to document (POST with file and signature)' |
|
|
}, |
|
|
'supported_formats': ['.docx', '.pdf', '.txt'], |
|
|
'document_types': ['auto', 'academic', 'technical', 'business'], |
|
|
'features': [ |
|
|
'AI-powered content enhancement', |
|
|
'Full LaTeX equation support', |
|
|
'Mathematical notation formatting', |
|
|
'Scientific document structure', |
|
|
'Professional formatting', |
|
|
'Multi-format input/output', |
|
|
'LaTeX preview and validation' |
|
|
] |
|
|
}) |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
api_key = os.getenv('GEMINI_API_KEY') |
|
|
if not api_key: |
|
|
print("=" * 60, file=sys.stderr) |
|
|
print("WARNING: GEMINI_API_KEY environment variable not set!", file=sys.stderr) |
|
|
print("Please set it in HuggingFace Spaces Settings → Repository secrets", file=sys.stderr) |
|
|
print("=" * 60, file=sys.stderr) |
|
|
else: |
|
|
print(f"API Key found: {api_key[:10]}...", file=sys.stderr) |
|
|
|
|
|
|
|
|
port = int(os.getenv('PORT', 7860)) |
|
|
print(f"Starting Flask app on port {port}", file=sys.stderr) |
|
|
app.run(host='0.0.0.0', port=port, debug=os.getenv('FLASK_ENV') == 'development') |
|
|
|