from flask import Flask, request, jsonify, send_file from flask_cors import CORS import os import traceback from io import BytesIO import sys # Import with error handling try: from gemini_client import GeminiClient from document_converter import DocumentConverter from latex_processor import OptimizedLaTeXProcessor as LaTeXProcessor except ImportError as e: print(f"CRITICAL IMPORT ERROR: {str(e)}", file=sys.stderr) traceback.print_exc() raise app = Flask(__name__) CORS(app) # Initialize services with error handling try: api_key = os.getenv('GEMINI_API_KEY') if not api_key: print("ERROR: GEMINI_API_KEY not found in environment!", file=sys.stderr) gemini_client = None else: print(f"Initializing Gemini with API key: {api_key[:10]}...", file=sys.stderr) gemini_client = GeminiClient(api_key=api_key) print("Gemini client initialized successfully", file=sys.stderr) latex_processor = LaTeXProcessor() doc_converter = DocumentConverter() print("All services initialized successfully", file=sys.stderr) except Exception as e: print(f"CRITICAL INITIALIZATION ERROR: {str(e)}", file=sys.stderr) traceback.print_exc() gemini_client = None @app.route('/health', methods=['GET']) def health_check(): """Health check endpoint""" return jsonify({ 'status': 'healthy' if gemini_client else 'degraded', 'service': 'LaTeX Document Enhancement API', 'version': '2.0.0', 'gemini_available': gemini_client is not None, 'api_key_set': bool(os.getenv('GEMINI_API_KEY')), 'features': ['latex_output', 'equation_formatting', 'multi_format_support'] }) @app.route('/enhance', methods=['POST']) def enhance_document(): """ Enhance document with AI and LaTeX support Expected form data: - file: Document file (.docx, .pdf, or .txt) - prompt: (optional) User's enhancement instructions - doc_type: (optional) Document type hint (auto, academic, technical, business) """ try: print("=== ENHANCE REQUEST STARTED ===", file=sys.stderr) # Check if Gemini is available if not gemini_client: print("ERROR: Gemini client not initialized", file=sys.stderr) return jsonify({ 'error': 'AI service not available. Please check API key configuration.' }), 503 # Validate file upload if 'file' not in request.files: print("ERROR: No file in request", file=sys.stderr) return jsonify({'error': 'No file provided'}), 400 file = request.files['file'] if file.filename == '': print("ERROR: Empty filename", file=sys.stderr) return jsonify({'error': 'Empty filename'}), 400 print(f"Processing file: {file.filename}", file=sys.stderr) # Get optional parameters user_prompt = request.args.get('prompt', request.form.get('prompt', '')) doc_type = request.args.get('doc_type', request.form.get('doc_type', 'auto')) print(f"User prompt: {user_prompt[:100] if user_prompt else 'None'}", file=sys.stderr) print(f"Doc type: {doc_type}", file=sys.stderr) # Validate file extension file_ext = os.path.splitext(file.filename)[1].lower() if file_ext not in ['.docx', '.pdf', '.txt', '.doc']: print(f"ERROR: Unsupported format: {file_ext}", file=sys.stderr) return jsonify({'error': f'Unsupported file format: {file_ext}. Please use .docx, .pdf, or .txt'}), 400 # Read file content print("Reading file content...", file=sys.stderr) file_content = file.read() print(f"File size: {len(file_content)} bytes", file=sys.stderr) # Extract text from document print("Extracting text...", file=sys.stderr) extracted_text = doc_converter.extract_text(file_content, file_ext) print(f"Extracted text length: {len(extracted_text) if extracted_text else 0} characters", file=sys.stderr) if not extracted_text or len(extracted_text.strip()) < 1: print(f"ERROR: Text extraction failed for {file.filename}", file=sys.stderr) return jsonify({'error': 'Could not extract text from document. Please ensure the file contains readable text.'}), 400 # Detect if document contains mathematical/scientific content (for logging) print("Detecting mathematical content...", file=sys.stderr) has_math = latex_processor.detect_mathematical_content(extracted_text) print(f"Has math content: {has_math}", file=sys.stderr) # Use LaTeX-focused enhancement method (FORCE LATEX OUTPUT) print("Calling Gemini API with LaTeX-focused prompt...", file=sys.stderr) try: enhanced_content = gemini_client.enhance_with_latex_output( content=extracted_text, instructions=user_prompt, doc_type=doc_type, include_latex=True # FORCE LATEX OUTPUT ) print(f"Enhanced content received: {len(enhanced_content)} characters", file=sys.stderr) except Exception as gemini_error: print(f"GEMINI API ERROR: {str(gemini_error)}", file=sys.stderr) traceback.print_exc() return jsonify({ 'error': 'AI enhancement failed. Please try again.', 'details': str(gemini_error) if os.getenv('FLASK_ENV') == 'development' else None }), 500 # Process and validate LaTeX in the enhanced content print("Processing LaTeX content...", file=sys.stderr) latex_result = latex_processor.process_latex_content(enhanced_content) processed_content = latex_result.get('cleaned_content', enhanced_content) equations = latex_result.get('equations', []) # Log equation info if any if equations: print(f"Extracted {len(equations)} equations from enhanced content", file=sys.stderr) for i, eq in enumerate(equations): if not eq.get('valid', True): print(f"WARNING: Equation {i+1} validation issue: {eq.get('error')}", file=sys.stderr) else: print(f"Equation {i+1}: {eq.get('type')} - {eq.get('latex', '')[:50]}...", file=sys.stderr) # Convert back to document format print("Creating output document...", file=sys.stderr) # Choose output format: keep same as input if docx/pdf, else default to docx output_format = '.docx' # ALWAYS treat processed_content as LaTeX for Pandoc output_file = doc_converter.create_document( content=processed_content, original_format=file_ext, output_format=output_format, include_latex=True # FORCE LATEX MODE IN PANDOC ) print(f"Output file size: {len(output_file)} bytes", file=sys.stderr) # Prepare response output_buffer = BytesIO(output_file) output_buffer.seek(0) # Determine output filename base_name = os.path.splitext(file.filename)[0] output_filename = f"enhanced_{base_name}{output_format}" print(f"Sending file: {output_filename}", file=sys.stderr) print("=== ENHANCE REQUEST COMPLETED SUCCESSFULLY ===", file=sys.stderr) return send_file( output_buffer, mimetype=( 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' if output_format == '.docx' else 'application/pdf' ), as_attachment=True, download_name=output_filename ) except Exception as e: # Log error for debugging print(f"=== ERROR IN ENHANCE REQUEST ===", file=sys.stderr) print(f"Error: {str(e)}", file=sys.stderr) print(traceback.format_exc(), file=sys.stderr) # Return JSON error response return jsonify({ 'error': 'Failed to process document. Please try again.', 'details': str(e) if os.getenv('FLASK_ENV') == 'development' else None }), 500 @app.route('/preview', methods=['POST']) def preview_latex(): """ Preview LaTeX equations extracted from document Expected form data: - file: Document file Returns: - JSON with extracted LaTeX equations """ try: if 'file' not in request.files: return jsonify({'error': 'No file provided'}), 400 file = request.files['file'] file_ext = os.path.splitext(file.filename)[1].lower() file_content = file.read() # Extract text extracted_text = doc_converter.extract_text(file_content, file_ext) # Detect math has_math = latex_processor.detect_mathematical_content(extracted_text) # Extract existing equations if any equations_list = latex_processor.extract_latex_equations(extracted_text) return jsonify({ 'filename': file.filename, 'has_math': has_math, 'equation_count': len(equations_list), 'equations': [{'type': eq.get('type'), 'content': eq.get('latex')} for eq in equations_list], 'text_preview': extracted_text[:500] + '...' if len(extracted_text) > 500 else extracted_text }) except Exception as e: print(f"Error in preview: {str(e)}", file=sys.stderr) return jsonify({'error': str(e)}), 500 @app.route('/add-signature', methods=['POST']) def add_signature(): """Add digital signature to document""" try: print("=== SIGNATURE REQUEST STARTED ===", file=sys.stderr) if 'file' not in request.files: return jsonify({'error': 'No file provided'}), 400 file = request.files['file'] if file.filename == '': return jsonify({'error': 'Empty filename'}), 400 signature_data = request.form.get('signature') if not signature_data: return jsonify({'error': 'No signature provided'}), 400 position = request.form.get('position', 'bottom-right') signer_name = request.form.get('signer_name') print(f"Adding signature to: {file.filename}", file=sys.stderr) file_content = file.read() signed_doc = doc_converter.add_signature( file_content=file_content, signature_data=signature_data, position=position, signer_name=signer_name ) output_buffer = BytesIO(signed_doc) output_buffer.seek(0) base_name = os.path.splitext(file.filename)[0] output_filename = f"Signed_{base_name}.docx" print(f"Signature added successfully: {output_filename}", file=sys.stderr) return send_file( output_buffer, mimetype='application/vnd.openxmlformats-officedocument.wordprocessingml.document', as_attachment=True, download_name=output_filename ) except Exception as e: print(f"=== ERROR IN SIGNATURE REQUEST ===", file=sys.stderr) print(f"Error: {str(e)}", file=sys.stderr) print(traceback.format_exc(), file=sys.stderr) return jsonify({ 'error': 'Failed to sign document', 'details': str(e) if os.getenv('FLASK_ENV') == 'development' else None }), 500 @app.route('/', methods=['GET']) def index(): """Root endpoint with API information""" return jsonify({ 'name': 'LaTeX Document Enhancement API', 'version': '2.0.0', 'description': 'AI-powered document enhancement with LaTeX support using Google Gemini', 'status': 'operational' if gemini_client else 'degraded', 'endpoints': { '/': 'API information (GET)', '/health': 'Health check (GET)', '/enhance': 'Enhance document with LaTeX (POST with file)', '/preview': 'Preview LaTeX equations (POST with file)', '/add-signature': 'Add signature to document (POST with file and signature)' }, 'supported_formats': ['.docx', '.pdf', '.txt'], 'document_types': ['auto', 'academic', 'technical', 'business'], 'features': [ 'AI-powered content enhancement', 'Full LaTeX equation support', 'Mathematical notation formatting', 'Scientific document structure', 'Professional formatting', 'Multi-format input/output', 'LaTeX preview and validation' ] }) if __name__ == '__main__': # Check for API key api_key = os.getenv('GEMINI_API_KEY') if not api_key: print("=" * 60, file=sys.stderr) print("WARNING: GEMINI_API_KEY environment variable not set!", file=sys.stderr) print("Please set it in HuggingFace Spaces Settings → Repository secrets", file=sys.stderr) print("=" * 60, file=sys.stderr) else: print(f"API Key found: {api_key[:10]}...", file=sys.stderr) # Run Flask app port = int(os.getenv('PORT', 7860)) print(f"Starting Flask app on port {port}", file=sys.stderr) app.run(host='0.0.0.0', port=port, debug=os.getenv('FLASK_ENV') == 'development')