vero_ps / app.py
omgy's picture
Update app.py
356b7eb verified
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
import os
import traceback
from io import BytesIO
import sys
# Import with error handling
try:
from gemini_client import GeminiClient
from document_converter import DocumentConverter
from latex_processor import OptimizedLaTeXProcessor as LaTeXProcessor
except ImportError as e:
print(f"CRITICAL IMPORT ERROR: {str(e)}", file=sys.stderr)
traceback.print_exc()
raise
app = Flask(__name__)
CORS(app)
# Initialize services with error handling
try:
api_key = os.getenv('GEMINI_API_KEY')
if not api_key:
print("ERROR: GEMINI_API_KEY not found in environment!", file=sys.stderr)
gemini_client = None
else:
print(f"Initializing Gemini with API key: {api_key[:10]}...", file=sys.stderr)
gemini_client = GeminiClient(api_key=api_key)
print("Gemini client initialized successfully", file=sys.stderr)
latex_processor = LaTeXProcessor()
doc_converter = DocumentConverter()
print("All services initialized successfully", file=sys.stderr)
except Exception as e:
print(f"CRITICAL INITIALIZATION ERROR: {str(e)}", file=sys.stderr)
traceback.print_exc()
gemini_client = None
@app.route('/health', methods=['GET'])
def health_check():
"""Health check endpoint"""
return jsonify({
'status': 'healthy' if gemini_client else 'degraded',
'service': 'LaTeX Document Enhancement API',
'version': '2.0.0',
'gemini_available': gemini_client is not None,
'api_key_set': bool(os.getenv('GEMINI_API_KEY')),
'features': ['latex_output', 'equation_formatting', 'multi_format_support']
})
@app.route('/enhance', methods=['POST'])
def enhance_document():
"""
Enhance document with AI and LaTeX support
Expected form data:
- file: Document file (.docx, .pdf, or .txt)
- prompt: (optional) User's enhancement instructions
- doc_type: (optional) Document type hint (auto, academic, technical, business)
"""
try:
print("=== ENHANCE REQUEST STARTED ===", file=sys.stderr)
# Check if Gemini is available
if not gemini_client:
print("ERROR: Gemini client not initialized", file=sys.stderr)
return jsonify({
'error': 'AI service not available. Please check API key configuration.'
}), 503
# Validate file upload
if 'file' not in request.files:
print("ERROR: No file in request", file=sys.stderr)
return jsonify({'error': 'No file provided'}), 400
file = request.files['file']
if file.filename == '':
print("ERROR: Empty filename", file=sys.stderr)
return jsonify({'error': 'Empty filename'}), 400
print(f"Processing file: {file.filename}", file=sys.stderr)
# Get optional parameters
user_prompt = request.args.get('prompt', request.form.get('prompt', ''))
doc_type = request.args.get('doc_type', request.form.get('doc_type', 'auto'))
print(f"User prompt: {user_prompt[:100] if user_prompt else 'None'}", file=sys.stderr)
print(f"Doc type: {doc_type}", file=sys.stderr)
# Validate file extension
file_ext = os.path.splitext(file.filename)[1].lower()
if file_ext not in ['.docx', '.pdf', '.txt', '.doc']:
print(f"ERROR: Unsupported format: {file_ext}", file=sys.stderr)
return jsonify({'error': f'Unsupported file format: {file_ext}. Please use .docx, .pdf, or .txt'}), 400
# Read file content
print("Reading file content...", file=sys.stderr)
file_content = file.read()
print(f"File size: {len(file_content)} bytes", file=sys.stderr)
# Extract text from document
print("Extracting text...", file=sys.stderr)
extracted_text = doc_converter.extract_text(file_content, file_ext)
print(f"Extracted text length: {len(extracted_text) if extracted_text else 0} characters", file=sys.stderr)
if not extracted_text or len(extracted_text.strip()) < 1:
print(f"ERROR: Text extraction failed for {file.filename}", file=sys.stderr)
return jsonify({'error': 'Could not extract text from document. Please ensure the file contains readable text.'}), 400
# Detect if document contains mathematical/scientific content (for logging)
print("Detecting mathematical content...", file=sys.stderr)
has_math = latex_processor.detect_mathematical_content(extracted_text)
print(f"Has math content: {has_math}", file=sys.stderr)
# Use LaTeX-focused enhancement method (FORCE LATEX OUTPUT)
print("Calling Gemini API with LaTeX-focused prompt...", file=sys.stderr)
try:
enhanced_content = gemini_client.enhance_with_latex_output(
content=extracted_text,
instructions=user_prompt,
doc_type=doc_type,
include_latex=True # FORCE LATEX OUTPUT
)
print(f"Enhanced content received: {len(enhanced_content)} characters", file=sys.stderr)
except Exception as gemini_error:
print(f"GEMINI API ERROR: {str(gemini_error)}", file=sys.stderr)
traceback.print_exc()
return jsonify({
'error': 'AI enhancement failed. Please try again.',
'details': str(gemini_error) if os.getenv('FLASK_ENV') == 'development' else None
}), 500
# Process and validate LaTeX in the enhanced content
print("Processing LaTeX content...", file=sys.stderr)
latex_result = latex_processor.process_latex_content(enhanced_content)
processed_content = latex_result.get('cleaned_content', enhanced_content)
equations = latex_result.get('equations', [])
# Log equation info if any
if equations:
print(f"Extracted {len(equations)} equations from enhanced content", file=sys.stderr)
for i, eq in enumerate(equations):
if not eq.get('valid', True):
print(f"WARNING: Equation {i+1} validation issue: {eq.get('error')}", file=sys.stderr)
else:
print(f"Equation {i+1}: {eq.get('type')} - {eq.get('latex', '')[:50]}...", file=sys.stderr)
# Convert back to document format
print("Creating output document...", file=sys.stderr)
# Choose output format: keep same as input if docx/pdf, else default to docx
output_format = '.docx'
# ALWAYS treat processed_content as LaTeX for Pandoc
output_file = doc_converter.create_document(
content=processed_content,
original_format=file_ext,
output_format=output_format,
include_latex=True # FORCE LATEX MODE IN PANDOC
)
print(f"Output file size: {len(output_file)} bytes", file=sys.stderr)
# Prepare response
output_buffer = BytesIO(output_file)
output_buffer.seek(0)
# Determine output filename
base_name = os.path.splitext(file.filename)[0]
output_filename = f"enhanced_{base_name}{output_format}"
print(f"Sending file: {output_filename}", file=sys.stderr)
print("=== ENHANCE REQUEST COMPLETED SUCCESSFULLY ===", file=sys.stderr)
return send_file(
output_buffer,
mimetype=(
'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
if output_format == '.docx' else
'application/pdf'
),
as_attachment=True,
download_name=output_filename
)
except Exception as e:
# Log error for debugging
print(f"=== ERROR IN ENHANCE REQUEST ===", file=sys.stderr)
print(f"Error: {str(e)}", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
# Return JSON error response
return jsonify({
'error': 'Failed to process document. Please try again.',
'details': str(e) if os.getenv('FLASK_ENV') == 'development' else None
}), 500
@app.route('/preview', methods=['POST'])
def preview_latex():
"""
Preview LaTeX equations extracted from document
Expected form data:
- file: Document file
Returns:
- JSON with extracted LaTeX equations
"""
try:
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400
file = request.files['file']
file_ext = os.path.splitext(file.filename)[1].lower()
file_content = file.read()
# Extract text
extracted_text = doc_converter.extract_text(file_content, file_ext)
# Detect math
has_math = latex_processor.detect_mathematical_content(extracted_text)
# Extract existing equations if any
equations_list = latex_processor.extract_latex_equations(extracted_text)
return jsonify({
'filename': file.filename,
'has_math': has_math,
'equation_count': len(equations_list),
'equations': [{'type': eq.get('type'), 'content': eq.get('latex')} for eq in equations_list],
'text_preview': extracted_text[:500] + '...' if len(extracted_text) > 500 else extracted_text
})
except Exception as e:
print(f"Error in preview: {str(e)}", file=sys.stderr)
return jsonify({'error': str(e)}), 500
@app.route('/add-signature', methods=['POST'])
def add_signature():
"""Add digital signature to document"""
try:
print("=== SIGNATURE REQUEST STARTED ===", file=sys.stderr)
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400
file = request.files['file']
if file.filename == '':
return jsonify({'error': 'Empty filename'}), 400
signature_data = request.form.get('signature')
if not signature_data:
return jsonify({'error': 'No signature provided'}), 400
position = request.form.get('position', 'bottom-right')
signer_name = request.form.get('signer_name')
print(f"Adding signature to: {file.filename}", file=sys.stderr)
file_content = file.read()
signed_doc = doc_converter.add_signature(
file_content=file_content,
signature_data=signature_data,
position=position,
signer_name=signer_name
)
output_buffer = BytesIO(signed_doc)
output_buffer.seek(0)
base_name = os.path.splitext(file.filename)[0]
output_filename = f"Signed_{base_name}.docx"
print(f"Signature added successfully: {output_filename}", file=sys.stderr)
return send_file(
output_buffer,
mimetype='application/vnd.openxmlformats-officedocument.wordprocessingml.document',
as_attachment=True,
download_name=output_filename
)
except Exception as e:
print(f"=== ERROR IN SIGNATURE REQUEST ===", file=sys.stderr)
print(f"Error: {str(e)}", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
return jsonify({
'error': 'Failed to sign document',
'details': str(e) if os.getenv('FLASK_ENV') == 'development' else None
}), 500
@app.route('/', methods=['GET'])
def index():
"""Root endpoint with API information"""
return jsonify({
'name': 'LaTeX Document Enhancement API',
'version': '2.0.0',
'description': 'AI-powered document enhancement with LaTeX support using Google Gemini',
'status': 'operational' if gemini_client else 'degraded',
'endpoints': {
'/': 'API information (GET)',
'/health': 'Health check (GET)',
'/enhance': 'Enhance document with LaTeX (POST with file)',
'/preview': 'Preview LaTeX equations (POST with file)',
'/add-signature': 'Add signature to document (POST with file and signature)'
},
'supported_formats': ['.docx', '.pdf', '.txt'],
'document_types': ['auto', 'academic', 'technical', 'business'],
'features': [
'AI-powered content enhancement',
'Full LaTeX equation support',
'Mathematical notation formatting',
'Scientific document structure',
'Professional formatting',
'Multi-format input/output',
'LaTeX preview and validation'
]
})
if __name__ == '__main__':
# Check for API key
api_key = os.getenv('GEMINI_API_KEY')
if not api_key:
print("=" * 60, file=sys.stderr)
print("WARNING: GEMINI_API_KEY environment variable not set!", file=sys.stderr)
print("Please set it in HuggingFace Spaces Settings → Repository secrets", file=sys.stderr)
print("=" * 60, file=sys.stderr)
else:
print(f"API Key found: {api_key[:10]}...", file=sys.stderr)
# Run Flask app
port = int(os.getenv('PORT', 7860))
print(f"Starting Flask app on port {port}", file=sys.stderr)
app.run(host='0.0.0.0', port=port, debug=os.getenv('FLASK_ENV') == 'development')