# app.py from flask import Flask, request, jsonify, render_template import requests import json import asyncio import os import uuid # For generating unique session IDs if not provided import base64 # For decoding base64 data import io # For handling binary data in memory # Import PyPDF2 for PDF parsing try: from PyPDF2 import PdfReader except ImportError: print("PyPDF2 not found. Please install it using 'pip install PyPDF2'") PdfReader = None # Set to None if not available app = Flask(__name__) # In-memory storage for conversation histories # This will reset if the Flask application restarts. # For persistent history, a database (like Firestore) is required. conversation_histories = {} async def generate_solution_python(chat_history): """ Generates a solution using a dummy context and Gemini LLM, based on the provided chat history which can include text, images, and extracted PDF text. Args: chat_history (list): A list of message objects representing the conversation. Each object has "role" and "parts". Parts can be: - {"text": "..."} - {"inlineData": {"mimeType": "image/png", "data": "base64_string"}} Returns: str: The generated solution text or an error message. """ if not chat_history: return "Error: Chat history is empty." print(f"Processing chat history length: {len(chat_history)}") response_text = "" try: # --- IMPORTANT: Placeholder for Search API Integration --- # The 'google_search' tool is specific to the Canvas environment. # On Hugging Face, you would integrate a real public search API here. # For this example, we'll use a dummy context based on the latest user query. # Find the latest user input (text or image/document indication) for dummy context latest_user_input = "" for message in reversed(chat_history): if message["role"] == "user" and message["parts"]: for part in message["parts"]: if part.get("text"): latest_user_input = part["text"] break if part.get("inlineData") and "image" in part["inlineData"].get("mimeType", ""): latest_user_input = "an image" break # If a document was processed and its text added, use that if part.get("text") and part["text"].startswith("PDF Document Content:") or part["text"].startswith("Document content:"): latest_user_input = "a document" break if latest_user_input: break dummy_context = f"Information related to '{latest_user_input}' from various online sources indicates that..." # Step 2: Call Gemini API with the full chat history print("Calling Gemini API with full chat history...") llm_payload = { "contents": chat_history # Pass the entire history, including text and image parts } # Get API key from environment variables (Hugging Face Space Secrets) gemini_api_key = os.environ.get("GEMINI_API_KEY") if not gemini_api_key: raise ValueError("GEMINI_API_KEY environment variable not set.") gemini_api_url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={gemini_api_key}" gemini_response = requests.post( gemini_api_url, headers={'Content-Type': 'application/json'}, data=json.dumps(llm_payload) ) gemini_response.raise_for_status() # Raise an exception for HTTP errors llm_result = gemini_response.json() print("Gemini API response received.") if llm_result.get('candidates') and len(llm_result['candidates']) > 0 and \ llm_result['candidates'][0].get('content') and llm_result['candidates'][0]['content'].get('parts') and \ len(llm_result['candidates'][0]['content']['parts']) > 0: response_text = llm_result['candidates'][0]['content']['parts'][0]['text'] else: response_text = "No solution could be generated. Please try a different query." except requests.exceptions.RequestException as e: error_message = f"Network or API error during LLM call: {e}" print(f"Error: {error_message}") response_text = f"An API error occurred: {error_message}. Please check the logs." except ValueError as e: error_message = f"Configuration error (e.g., missing API key): {e}" print(f"Error: {error_message}") response_text = f"A configuration error occurred: {error_message}. Please check your Space secrets." except Exception as e: error_message = f"An unexpected error occurred in generate_solution_python: {e}" print(f"Error: {error_message}") response_text = f"An unexpected error occurred: {error_message}. Please check the logs." return response_text # --- Flask Routes --- @app.route('/') def index(): """Serves the main HTML page.""" return render_template('index.html') @app.route('/generate', methods=['POST']) async def generate(): """Handles the AI generation request, managing conversation history and multi-modal input.""" session_id = None # Initialize session_id to None try: data = request.get_json() if not data: return jsonify({"error": "Request body must be JSON"}), 400 user_query = data.get('query') image_data = data.get('image_data') # Base64 image data document_text = data.get('document_text') # Text extracted from .txt on frontend pdf_data = data.get('pdf_data') # Base64 PDF data # Ensure session_id is assigned before use session_id = data.get('session_id') if not session_id: session_id = str(uuid.uuid4()) print(f"Warning: session_id not provided, generated new one: {session_id}") if not (user_query or image_data or document_text or pdf_data): return jsonify({"error": "Query, image, or document is required in the request body"}), 400 current_chat_history = conversation_histories.get(session_id, []) # Construct the parts for the user message user_message_parts = [] if user_query: user_message_parts.append({"text": user_query}) if image_data: user_message_parts.append({"inlineData": image_data}) print("Received image data for processing.") if document_text: user_message_parts.append({"text": f"Document content:\n{document_text}"}) print("Received text document content for processing.") if pdf_data: if not PdfReader: return jsonify({"error": "PDF parsing library (PyPDF2) not installed on backend."}), 500 try: # Decode base64 PDF data pdf_bytes = base64.b64decode(pdf_data['data']) pdf_file = io.BytesIO(pdf_bytes) reader = PdfReader(pdf_file) pdf_extracted_text = "" for page_num in range(len(reader.pages)): page = reader.pages[page_num] pdf_extracted_text += page.extract_text() or "" # extract_text can return None if pdf_extracted_text.strip(): user_message_parts.append({"text": f"PDF Document Content:\n{pdf_extracted_text}"}) print(f"Successfully extracted {len(pdf_extracted_text)} characters from PDF.") else: user_message_parts.append({"text": "PDF Document: (No extractable text found or PDF is image-based)"}) print("No extractable text found in PDF.") except Exception as pdf_error: print(f"Error processing PDF: {pdf_error}") user_message_parts.append({"text": f"PDF Document: (Error processing PDF: {pdf_error})"}) # Do not return error to frontend immediately for PDF processing issues # Let the LLM try to respond even if PDF extraction failed # return jsonify({"error": f"Failed to process PDF: {pdf_error}"}), 400 # If only a file was provided without a query, add a default instruction if not user_query and (image_data or document_text or pdf_data): if image_data: user_message_parts.insert(0, {"text": "Please analyze the following image and provide insights:"}) elif document_text or pdf_data: user_message_parts.insert(0, {"text": "Please analyze the following document content and provide a summary or answer questions:"}) # Append the new user message (which can be multi-part) to the history current_chat_history.append({"role": "user", "parts": user_message_parts}) # Generate the solution using the full chat history solution_text = await generate_solution_python(current_chat_history) # Append the model's response to the history current_chat_history.append({"role": "model", "parts": [{"text": solution_text}]}) # Store the updated history conversation_histories[session_id] = current_chat_history return jsonify({"solution": solution_text, "session_id": session_id}) except Exception as e: print(f"Error in /generate endpoint: {e}") # Ensure session_id is handled even in the outer exception for logging/debugging if session_id: return jsonify({"error": f"Internal server error for session {session_id}: {e}"}), 500 else: return jsonify({"error": f"Internal server error: {e}"}), 500 if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)