File size: 10,073 Bytes
0d04e6a
18327d7
0d04e6a
 
18327d7
cf7c52d
 
feaf77b
 
 
 
 
 
 
 
 
0d04e6a
 
 
cf7c52d
 
 
 
 
 
18327d7
9d07e1c
feaf77b
0d04e6a
18327d7
cf7c52d
130ced7
 
 
cd95c93
 
18327d7
cf7c52d
 
18327d7
cf7c52d
18327d7
 
 
9d07e1c
 
 
 
 
 
 
cf7c52d
130ced7
 
 
9d07e1c
 
 
 
 
 
 
 
 
 
cf7c52d
18327d7
9d07e1c
 
 
 
18327d7
9d07e1c
18327d7
 
9d07e1c
18327d7
 
 
 
 
 
 
 
 
 
 
 
9d07e1c
18327d7
 
 
 
 
 
 
 
 
 
 
cd95c93
18327d7
cd95c93
18327d7
cd95c93
18327d7
cd95c93
18327d7
cd95c93
18327d7
cd95c93
18327d7
 
 
 
 
 
 
 
 
0d04e6a
 
 
130ced7
9d07e1c
cd95c93
 
 
 
 
 
130ced7
feaf77b
 
190c0a6
9d07e1c
190c0a6
cf7c52d
 
 
 
190c0a6
 
 
cf7c52d
130ced7
9d07e1c
130ced7
 
 
feaf77b
130ced7
 
feaf77b
 
130ced7
 
feaf77b
 
 
 
 
 
 
9d07e1c
feaf77b
 
 
 
 
 
 
9d07e1c
feaf77b
 
 
 
 
 
 
 
 
 
 
9d07e1c
 
 
feaf77b
 
 
 
 
 
 
130ced7
9d07e1c
130ced7
cf7c52d
9d07e1c
cf7c52d
 
9d07e1c
cf7c52d
 
9d07e1c
cf7c52d
0d04e6a
cf7c52d
cd95c93
 
 
9d07e1c
190c0a6
 
 
 
0d04e6a
 
18327d7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# app.py
from flask import Flask, request, jsonify, render_template
import requests
import json
import asyncio
import os
import uuid # For generating unique session IDs if not provided
import base64 # For decoding base64 data
import io # For handling binary data in memory

# Import PyPDF2 for PDF parsing
try:
    from PyPDF2 import PdfReader
except ImportError:
    print("PyPDF2 not found. Please install it using 'pip install PyPDF2'")
    PdfReader = None # Set to None if not available

app = Flask(__name__)

# In-memory storage for conversation histories
# This will reset if the Flask application restarts.
# For persistent history, a database (like Firestore) is required.
conversation_histories = {}

async def generate_solution_python(chat_history):
    """
    Generates a solution using a dummy context and Gemini LLM,
    based on the provided chat history which can include text, images, and extracted PDF text.

    Args:
        chat_history (list): A list of message objects representing the conversation.
                             Each object has "role" and "parts". Parts can be:
                             - {"text": "..."}
                             - {"inlineData": {"mimeType": "image/png", "data": "base64_string"}}
    Returns:
        str: The generated solution text or an error message.
    """
    if not chat_history:
        return "Error: Chat history is empty."

    print(f"Processing chat history length: {len(chat_history)}")
    response_text = ""

    try:
        # --- IMPORTANT: Placeholder for Search API Integration ---
        # The 'google_search' tool is specific to the Canvas environment.
        # On Hugging Face, you would integrate a real public search API here.
        # For this example, we'll use a dummy context based on the latest user query.

        # Find the latest user input (text or image/document indication) for dummy context
        latest_user_input = ""
        for message in reversed(chat_history):
            if message["role"] == "user" and message["parts"]:
                for part in message["parts"]:
                    if part.get("text"):
                        latest_user_input = part["text"]
                        break
                    if part.get("inlineData") and "image" in part["inlineData"].get("mimeType", ""):
                        latest_user_input = "an image"
                        break
                    # If a document was processed and its text added, use that
                    if part.get("text") and part["text"].startswith("PDF Document Content:") or part["text"].startswith("Document content:"):
                        latest_user_input = "a document"
                        break
            if latest_user_input:
                break

        dummy_context = f"Information related to '{latest_user_input}' from various online sources indicates that..."
        
        # Step 2: Call Gemini API with the full chat history
        print("Calling Gemini API with full chat history...")
        llm_payload = {
            "contents": chat_history # Pass the entire history, including text and image parts
        }

        # Get API key from environment variables (Hugging Face Space Secrets)
        gemini_api_key = os.environ.get("GEMINI_API_KEY") 
        if not gemini_api_key:
            raise ValueError("GEMINI_API_KEY environment variable not set.")

        gemini_api_url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={gemini_api_key}"

        gemini_response = requests.post(
            gemini_api_url,
            headers={'Content-Type': 'application/json'},
            data=json.dumps(llm_payload)
        )

        gemini_response.raise_for_status() # Raise an exception for HTTP errors
        llm_result = gemini_response.json()
        print("Gemini API response received.")

        if llm_result.get('candidates') and len(llm_result['candidates']) > 0 and \
           llm_result['candidates'][0].get('content') and llm_result['candidates'][0]['content'].get('parts') and \
           len(llm_result['candidates'][0]['content']['parts']) > 0:
            response_text = llm_result['candidates'][0]['content']['parts'][0]['text']
        else:
            response_text = "No solution could be generated. Please try a different query."

    except requests.exceptions.RequestException as e:
        error_message = f"Network or API error during LLM call: {e}"
        print(f"Error: {error_message}")
        response_text = f"An API error occurred: {error_message}. Please check the logs."
    except ValueError as e:
        error_message = f"Configuration error (e.g., missing API key): {e}"
        print(f"Error: {error_message}")
        response_text = f"A configuration error occurred: {error_message}. Please check your Space secrets."
    except Exception as e:
        error_message = f"An unexpected error occurred in generate_solution_python: {e}"
        print(f"Error: {error_message}")
        response_text = f"An unexpected error occurred: {error_message}. Please check the logs."
    
    return response_text

# --- Flask Routes ---

@app.route('/')
def index():
    """Serves the main HTML page."""
    return render_template('index.html')

@app.route('/generate', methods=['POST'])
async def generate():
    """Handles the AI generation request, managing conversation history and multi-modal input."""
    session_id = None # Initialize session_id to None
    try:
        data = request.get_json()
        if not data:
            return jsonify({"error": "Request body must be JSON"}), 400

        user_query = data.get('query')
        image_data = data.get('image_data') # Base64 image data
        document_text = data.get('document_text') # Text extracted from .txt on frontend
        pdf_data = data.get('pdf_data') # Base64 PDF data
        
        # Ensure session_id is assigned before use
        session_id = data.get('session_id')
        if not session_id:
            session_id = str(uuid.uuid4())
            print(f"Warning: session_id not provided, generated new one: {session_id}")

        if not (user_query or image_data or document_text or pdf_data):
            return jsonify({"error": "Query, image, or document is required in the request body"}), 400
        
        current_chat_history = conversation_histories.get(session_id, [])
        
        # Construct the parts for the user message
        user_message_parts = []
        if user_query:
            user_message_parts.append({"text": user_query})
        
        if image_data:
            user_message_parts.append({"inlineData": image_data})
            print("Received image data for processing.")
        
        if document_text:
            user_message_parts.append({"text": f"Document content:\n{document_text}"})
            print("Received text document content for processing.")

        if pdf_data:
            if not PdfReader:
                return jsonify({"error": "PDF parsing library (PyPDF2) not installed on backend."}), 500
            
            try:
                # Decode base64 PDF data
                pdf_bytes = base64.b64decode(pdf_data['data'])
                pdf_file = io.BytesIO(pdf_bytes)
                reader = PdfReader(pdf_file)
                
                pdf_extracted_text = ""
                for page_num in range(len(reader.pages)):
                    page = reader.pages[page_num]
                    pdf_extracted_text += page.extract_text() or "" # extract_text can return None
                
                if pdf_extracted_text.strip():
                    user_message_parts.append({"text": f"PDF Document Content:\n{pdf_extracted_text}"})
                    print(f"Successfully extracted {len(pdf_extracted_text)} characters from PDF.")
                else:
                    user_message_parts.append({"text": "PDF Document: (No extractable text found or PDF is image-based)"})
                    print("No extractable text found in PDF.")

            except Exception as pdf_error:
                print(f"Error processing PDF: {pdf_error}")
                user_message_parts.append({"text": f"PDF Document: (Error processing PDF: {pdf_error})"})
                # Do not return error to frontend immediately for PDF processing issues
                # Let the LLM try to respond even if PDF extraction failed
                # return jsonify({"error": f"Failed to process PDF: {pdf_error}"}), 400 

        # If only a file was provided without a query, add a default instruction
        if not user_query and (image_data or document_text or pdf_data):
            if image_data:
                user_message_parts.insert(0, {"text": "Please analyze the following image and provide insights:"})
            elif document_text or pdf_data:
                user_message_parts.insert(0, {"text": "Please analyze the following document content and provide a summary or answer questions:"})

        # Append the new user message (which can be multi-part) to the history
        current_chat_history.append({"role": "user", "parts": user_message_parts})

        # Generate the solution using the full chat history
        solution_text = await generate_solution_python(current_chat_history)

        # Append the model's response to the history
        current_chat_history.append({"role": "model", "parts": [{"text": solution_text}]})
        
        # Store the updated history
        conversation_histories[session_id] = current_chat_history

        return jsonify({"solution": solution_text, "session_id": session_id})

    except Exception as e:
        print(f"Error in /generate endpoint: {e}")
        # Ensure session_id is handled even in the outer exception for logging/debugging
        if session_id:
            return jsonify({"error": f"Internal server error for session {session_id}: {e}"}), 500
        else:
            return jsonify({"error": f"Internal server error: {e}"}), 500

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)