File size: 13,263 Bytes
1c4b684
 
 
 
 
1c62c5f
1c4b684
d40f883
1c62c5f
 
 
0359bee
602a57d
1c62c5f
 
 
 
1c4b684
 
1c62c5f
1c4b684
1c62c5f
 
 
 
 
 
 
 
 
 
89d83c3
0359bee
1c62c5f
 
 
 
 
 
1c4b684
89d83c3
1c4b684
 
 
 
1c62c5f
1c4b684
d40f883
1c62c5f
d40f883
 
1c4b684
 
89d83c3
1c4b684
 
 
 
89d83c3
d40f883
 
 
 
1c4b684
 
1c62c5f
89d83c3
1c62c5f
 
 
 
 
 
89d83c3
1c4b684
 
1c62c5f
1c4b684
89d83c3
1c4b684
 
1c62c5f
1c4b684
89d83c3
1c62c5f
89d83c3
1c4b684
 
 
89d83c3
1c62c5f
 
89d83c3
d40f883
1c4b684
 
1c62c5f
 
89d83c3
1c4b684
1c62c5f
1c4b684
1c62c5f
89d83c3
1c4b684
1c62c5f
1c4b684
d40f883
89d83c3
d31dc64
1c62c5f
 
89d83c3
 
1c62c5f
1c4b684
1c62c5f
89d83c3
 
d40f883
1c62c5f
d40f883
 
 
 
89d83c3
d40f883
 
1c62c5f
 
 
 
 
 
 
89d83c3
d40f883
1c62c5f
0e3de38
89d83c3
 
 
 
 
0359bee
 
89d83c3
 
0359bee
89d83c3
 
1c4b684
1c62c5f
89d83c3
 
356b7eb
89d83c3
 
 
 
 
 
 
 
 
1c62c5f
89d83c3
1c4b684
 
 
89d83c3
1c4b684
 
 
89d83c3
1c62c5f
d40f883
89d83c3
1c4b684
 
89d83c3
 
 
 
 
1c4b684
 
 
89d83c3
1c4b684
1c62c5f
 
 
 
89d83c3
d40f883
1c4b684
 
 
 
 
89d83c3
d40f883
 
 
 
89d83c3
d40f883
 
89d83c3
d40f883
 
 
 
 
 
89d83c3
d40f883
 
 
89d83c3
d40f883
 
89d83c3
d40f883
 
89d83c3
d40f883
0359bee
89d83c3
d40f883
 
 
0359bee
89d83c3
d40f883
 
89d83c3
d40f883
 
 
 
89d83c3
1c4b684
 
1c62c5f
1c4b684
1c62c5f
89d83c3
1c4b684
 
89d83c3
1c4b684
 
 
89d83c3
1c4b684
 
 
89d83c3
1c4b684
 
89d83c3
1c62c5f
89d83c3
1c4b684
89d83c3
1c4b684
 
 
 
 
 
89d83c3
1c4b684
 
89d83c3
1c4b684
 
89d83c3
1c62c5f
89d83c3
1c4b684
 
 
 
 
 
89d83c3
1c4b684
1c62c5f
 
 
1c4b684
 
 
 
 
89d83c3
1c4b684
 
 
 
 
d40f883
1c4b684
1c62c5f
1c4b684
d40f883
 
 
 
1c62c5f
1c4b684
 
d40f883
1c4b684
 
d40f883
 
 
 
 
 
1c4b684
 
 
89d83c3
1c4b684
 
1c62c5f
 
 
 
 
 
 
 
89d83c3
1c4b684
1c62c5f
 
89d83c3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
import os
import traceback
from io import BytesIO
import sys

# Import with error handling
try:
    from gemini_client import GeminiClient
    from document_converter import DocumentConverter
    from latex_processor import OptimizedLaTeXProcessor as LaTeXProcessor

except ImportError as e:
    print(f"CRITICAL IMPORT ERROR: {str(e)}", file=sys.stderr)
    traceback.print_exc()
    raise

app = Flask(__name__)
CORS(app)

# Initialize services with error handling
try:
    api_key = os.getenv('GEMINI_API_KEY')
    if not api_key:
        print("ERROR: GEMINI_API_KEY not found in environment!", file=sys.stderr)
        gemini_client = None
    else:
        print(f"Initializing Gemini with API key: {api_key[:10]}...", file=sys.stderr)
        gemini_client = GeminiClient(api_key=api_key)
        print("Gemini client initialized successfully", file=sys.stderr)

    latex_processor = LaTeXProcessor()
    doc_converter = DocumentConverter()
    print("All services initialized successfully", file=sys.stderr)
except Exception as e:
    print(f"CRITICAL INITIALIZATION ERROR: {str(e)}", file=sys.stderr)
    traceback.print_exc()
    gemini_client = None


@app.route('/health', methods=['GET'])
def health_check():
    """Health check endpoint"""
    return jsonify({
        'status': 'healthy' if gemini_client else 'degraded',
        'service': 'LaTeX Document Enhancement API',
        'version': '2.0.0',
        'gemini_available': gemini_client is not None,
        'api_key_set': bool(os.getenv('GEMINI_API_KEY')),
        'features': ['latex_output', 'equation_formatting', 'multi_format_support']
    })


@app.route('/enhance', methods=['POST'])
def enhance_document():
    """
    Enhance document with AI and LaTeX support

    Expected form data:
    - file: Document file (.docx, .pdf, or .txt)
    - prompt: (optional) User's enhancement instructions
    - doc_type: (optional) Document type hint (auto, academic, technical, business)
    """
    try:
        print("=== ENHANCE REQUEST STARTED ===", file=sys.stderr)

        # Check if Gemini is available
        if not gemini_client:
            print("ERROR: Gemini client not initialized", file=sys.stderr)
            return jsonify({
                'error': 'AI service not available. Please check API key configuration.'
            }), 503

        # Validate file upload
        if 'file' not in request.files:
            print("ERROR: No file in request", file=sys.stderr)
            return jsonify({'error': 'No file provided'}), 400

        file = request.files['file']
        if file.filename == '':
            print("ERROR: Empty filename", file=sys.stderr)
            return jsonify({'error': 'Empty filename'}), 400

        print(f"Processing file: {file.filename}", file=sys.stderr)

        # Get optional parameters
        user_prompt = request.args.get('prompt', request.form.get('prompt', ''))
        doc_type = request.args.get('doc_type', request.form.get('doc_type', 'auto'))

        print(f"User prompt: {user_prompt[:100] if user_prompt else 'None'}", file=sys.stderr)
        print(f"Doc type: {doc_type}", file=sys.stderr)

        # Validate file extension
        file_ext = os.path.splitext(file.filename)[1].lower()
        if file_ext not in ['.docx', '.pdf', '.txt', '.doc']:
            print(f"ERROR: Unsupported format: {file_ext}", file=sys.stderr)
            return jsonify({'error': f'Unsupported file format: {file_ext}. Please use .docx, .pdf, or .txt'}), 400

        # Read file content
        print("Reading file content...", file=sys.stderr)
        file_content = file.read()
        print(f"File size: {len(file_content)} bytes", file=sys.stderr)

        # Extract text from document
        print("Extracting text...", file=sys.stderr)
        extracted_text = doc_converter.extract_text(file_content, file_ext)
        print(f"Extracted text length: {len(extracted_text) if extracted_text else 0} characters", file=sys.stderr)

        if not extracted_text or len(extracted_text.strip()) < 1:
            print(f"ERROR: Text extraction failed for {file.filename}", file=sys.stderr)
            return jsonify({'error': 'Could not extract text from document. Please ensure the file contains readable text.'}), 400

        # Detect if document contains mathematical/scientific content (for logging)
        print("Detecting mathematical content...", file=sys.stderr)
        has_math = latex_processor.detect_mathematical_content(extracted_text)
        print(f"Has math content: {has_math}", file=sys.stderr)

        # Use LaTeX-focused enhancement method (FORCE LATEX OUTPUT)
        print("Calling Gemini API with LaTeX-focused prompt...", file=sys.stderr)
        try:
            enhanced_content = gemini_client.enhance_with_latex_output(
                content=extracted_text,
                instructions=user_prompt,
                doc_type=doc_type,
                include_latex=True  # FORCE LATEX OUTPUT
            )
            print(f"Enhanced content received: {len(enhanced_content)} characters", file=sys.stderr)
        except Exception as gemini_error:
            print(f"GEMINI API ERROR: {str(gemini_error)}", file=sys.stderr)
            traceback.print_exc()
            return jsonify({
                'error': 'AI enhancement failed. Please try again.',
                'details': str(gemini_error) if os.getenv('FLASK_ENV') == 'development' else None
            }), 500

        # Process and validate LaTeX in the enhanced content
        print("Processing LaTeX content...", file=sys.stderr)
        latex_result = latex_processor.process_latex_content(enhanced_content)
        processed_content = latex_result.get('cleaned_content', enhanced_content)
        equations = latex_result.get('equations', [])

        # Log equation info if any
        if equations:
            print(f"Extracted {len(equations)} equations from enhanced content", file=sys.stderr)
            for i, eq in enumerate(equations):
                if not eq.get('valid', True):
                    print(f"WARNING: Equation {i+1} validation issue: {eq.get('error')}", file=sys.stderr)
                else:
                    print(f"Equation {i+1}: {eq.get('type')} - {eq.get('latex', '')[:50]}...", file=sys.stderr)

        # Convert back to document format
        print("Creating output document...", file=sys.stderr)

        # Choose output format: keep same as input if docx/pdf, else default to docx
        output_format = '.docx'

        # ALWAYS treat processed_content as LaTeX for Pandoc
        output_file = doc_converter.create_document(
            content=processed_content,
            original_format=file_ext,
            output_format=output_format,
            include_latex=True  # FORCE LATEX MODE IN PANDOC
        )

        print(f"Output file size: {len(output_file)} bytes", file=sys.stderr)

        # Prepare response
        output_buffer = BytesIO(output_file)
        output_buffer.seek(0)

        # Determine output filename
        base_name = os.path.splitext(file.filename)[0]
        output_filename = f"enhanced_{base_name}{output_format}"

        print(f"Sending file: {output_filename}", file=sys.stderr)
        print("=== ENHANCE REQUEST COMPLETED SUCCESSFULLY ===", file=sys.stderr)

        return send_file(
            output_buffer,
            mimetype=(
                'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
                if output_format == '.docx' else
                'application/pdf'
            ),
            as_attachment=True,
            download_name=output_filename
        )

    except Exception as e:
        # Log error for debugging
        print(f"=== ERROR IN ENHANCE REQUEST ===", file=sys.stderr)
        print(f"Error: {str(e)}", file=sys.stderr)
        print(traceback.format_exc(), file=sys.stderr)

        # Return JSON error response
        return jsonify({
            'error': 'Failed to process document. Please try again.',
            'details': str(e) if os.getenv('FLASK_ENV') == 'development' else None
        }), 500


@app.route('/preview', methods=['POST'])
def preview_latex():
    """
    Preview LaTeX equations extracted from document

    Expected form data:
    - file: Document file

    Returns:
    - JSON with extracted LaTeX equations
    """
    try:
        if 'file' not in request.files:
            return jsonify({'error': 'No file provided'}), 400

        file = request.files['file']
        file_ext = os.path.splitext(file.filename)[1].lower()
        file_content = file.read()

        # Extract text
        extracted_text = doc_converter.extract_text(file_content, file_ext)

        # Detect math
        has_math = latex_processor.detect_mathematical_content(extracted_text)

        # Extract existing equations if any
        equations_list = latex_processor.extract_latex_equations(extracted_text)

        return jsonify({
            'filename': file.filename,
            'has_math': has_math,
            'equation_count': len(equations_list),
            'equations': [{'type': eq.get('type'), 'content': eq.get('latex')} for eq in equations_list],
            'text_preview': extracted_text[:500] + '...' if len(extracted_text) > 500 else extracted_text
        })

    except Exception as e:
        print(f"Error in preview: {str(e)}", file=sys.stderr)
        return jsonify({'error': str(e)}), 500


@app.route('/add-signature', methods=['POST'])
def add_signature():
    """Add digital signature to document"""
    try:
        print("=== SIGNATURE REQUEST STARTED ===", file=sys.stderr)

        if 'file' not in request.files:
            return jsonify({'error': 'No file provided'}), 400

        file = request.files['file']
        if file.filename == '':
            return jsonify({'error': 'Empty filename'}), 400

        signature_data = request.form.get('signature')
        if not signature_data:
            return jsonify({'error': 'No signature provided'}), 400

        position = request.form.get('position', 'bottom-right')
        signer_name = request.form.get('signer_name')

        print(f"Adding signature to: {file.filename}", file=sys.stderr)

        file_content = file.read()

        signed_doc = doc_converter.add_signature(
            file_content=file_content,
            signature_data=signature_data,
            position=position,
            signer_name=signer_name
        )

        output_buffer = BytesIO(signed_doc)
        output_buffer.seek(0)

        base_name = os.path.splitext(file.filename)[0]
        output_filename = f"Signed_{base_name}.docx"

        print(f"Signature added successfully: {output_filename}", file=sys.stderr)

        return send_file(
            output_buffer,
            mimetype='application/vnd.openxmlformats-officedocument.wordprocessingml.document',
            as_attachment=True,
            download_name=output_filename
        )

    except Exception as e:
        print(f"=== ERROR IN SIGNATURE REQUEST ===", file=sys.stderr)
        print(f"Error: {str(e)}", file=sys.stderr)
        print(traceback.format_exc(), file=sys.stderr)
        return jsonify({
            'error': 'Failed to sign document',
            'details': str(e) if os.getenv('FLASK_ENV') == 'development' else None
        }), 500


@app.route('/', methods=['GET'])
def index():
    """Root endpoint with API information"""
    return jsonify({
        'name': 'LaTeX Document Enhancement API',
        'version': '2.0.0',
        'description': 'AI-powered document enhancement with LaTeX support using Google Gemini',
        'status': 'operational' if gemini_client else 'degraded',
        'endpoints': {
            '/': 'API information (GET)',
            '/health': 'Health check (GET)',
            '/enhance': 'Enhance document with LaTeX (POST with file)',
            '/preview': 'Preview LaTeX equations (POST with file)',
            '/add-signature': 'Add signature to document (POST with file and signature)'
        },
        'supported_formats': ['.docx', '.pdf', '.txt'],
        'document_types': ['auto', 'academic', 'technical', 'business'],
        'features': [
            'AI-powered content enhancement',
            'Full LaTeX equation support',
            'Mathematical notation formatting',
            'Scientific document structure',
            'Professional formatting',
            'Multi-format input/output',
            'LaTeX preview and validation'
        ]
    })


if __name__ == '__main__':
    # Check for API key
    api_key = os.getenv('GEMINI_API_KEY')
    if not api_key:
        print("=" * 60, file=sys.stderr)
        print("WARNING: GEMINI_API_KEY environment variable not set!", file=sys.stderr)
        print("Please set it in HuggingFace Spaces Settings → Repository secrets", file=sys.stderr)
        print("=" * 60, file=sys.stderr)
    else:
        print(f"API Key found: {api_key[:10]}...", file=sys.stderr)

    # Run Flask app
    port = int(os.getenv('PORT', 7860))
    print(f"Starting Flask app on port {port}", file=sys.stderr)
    app.run(host='0.0.0.0', port=port, debug=os.getenv('FLASK_ENV') == 'development')