File size: 10,073 Bytes
0d04e6a 18327d7 0d04e6a 18327d7 cf7c52d feaf77b 0d04e6a cf7c52d 18327d7 9d07e1c feaf77b 0d04e6a 18327d7 cf7c52d 130ced7 cd95c93 18327d7 cf7c52d 18327d7 cf7c52d 18327d7 9d07e1c cf7c52d 130ced7 9d07e1c cf7c52d 18327d7 9d07e1c 18327d7 9d07e1c 18327d7 9d07e1c 18327d7 9d07e1c 18327d7 cd95c93 18327d7 cd95c93 18327d7 cd95c93 18327d7 cd95c93 18327d7 cd95c93 18327d7 cd95c93 18327d7 0d04e6a 130ced7 9d07e1c cd95c93 130ced7 feaf77b 190c0a6 9d07e1c 190c0a6 cf7c52d 190c0a6 cf7c52d 130ced7 9d07e1c 130ced7 feaf77b 130ced7 feaf77b 130ced7 feaf77b 9d07e1c feaf77b 9d07e1c feaf77b 9d07e1c feaf77b 130ced7 9d07e1c 130ced7 cf7c52d 9d07e1c cf7c52d 9d07e1c cf7c52d 9d07e1c cf7c52d 0d04e6a cf7c52d cd95c93 9d07e1c 190c0a6 0d04e6a 18327d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
# app.py
from flask import Flask, request, jsonify, render_template
import requests
import json
import asyncio
import os
import uuid # For generating unique session IDs if not provided
import base64 # For decoding base64 data
import io # For handling binary data in memory
# Import PyPDF2 for PDF parsing
try:
from PyPDF2 import PdfReader
except ImportError:
print("PyPDF2 not found. Please install it using 'pip install PyPDF2'")
PdfReader = None # Set to None if not available
app = Flask(__name__)
# In-memory storage for conversation histories
# This will reset if the Flask application restarts.
# For persistent history, a database (like Firestore) is required.
conversation_histories = {}
async def generate_solution_python(chat_history):
"""
Generates a solution using a dummy context and Gemini LLM,
based on the provided chat history which can include text, images, and extracted PDF text.
Args:
chat_history (list): A list of message objects representing the conversation.
Each object has "role" and "parts". Parts can be:
- {"text": "..."}
- {"inlineData": {"mimeType": "image/png", "data": "base64_string"}}
Returns:
str: The generated solution text or an error message.
"""
if not chat_history:
return "Error: Chat history is empty."
print(f"Processing chat history length: {len(chat_history)}")
response_text = ""
try:
# --- IMPORTANT: Placeholder for Search API Integration ---
# The 'google_search' tool is specific to the Canvas environment.
# On Hugging Face, you would integrate a real public search API here.
# For this example, we'll use a dummy context based on the latest user query.
# Find the latest user input (text or image/document indication) for dummy context
latest_user_input = ""
for message in reversed(chat_history):
if message["role"] == "user" and message["parts"]:
for part in message["parts"]:
if part.get("text"):
latest_user_input = part["text"]
break
if part.get("inlineData") and "image" in part["inlineData"].get("mimeType", ""):
latest_user_input = "an image"
break
# If a document was processed and its text added, use that
if part.get("text") and part["text"].startswith("PDF Document Content:") or part["text"].startswith("Document content:"):
latest_user_input = "a document"
break
if latest_user_input:
break
dummy_context = f"Information related to '{latest_user_input}' from various online sources indicates that..."
# Step 2: Call Gemini API with the full chat history
print("Calling Gemini API with full chat history...")
llm_payload = {
"contents": chat_history # Pass the entire history, including text and image parts
}
# Get API key from environment variables (Hugging Face Space Secrets)
gemini_api_key = os.environ.get("GEMINI_API_KEY")
if not gemini_api_key:
raise ValueError("GEMINI_API_KEY environment variable not set.")
gemini_api_url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={gemini_api_key}"
gemini_response = requests.post(
gemini_api_url,
headers={'Content-Type': 'application/json'},
data=json.dumps(llm_payload)
)
gemini_response.raise_for_status() # Raise an exception for HTTP errors
llm_result = gemini_response.json()
print("Gemini API response received.")
if llm_result.get('candidates') and len(llm_result['candidates']) > 0 and \
llm_result['candidates'][0].get('content') and llm_result['candidates'][0]['content'].get('parts') and \
len(llm_result['candidates'][0]['content']['parts']) > 0:
response_text = llm_result['candidates'][0]['content']['parts'][0]['text']
else:
response_text = "No solution could be generated. Please try a different query."
except requests.exceptions.RequestException as e:
error_message = f"Network or API error during LLM call: {e}"
print(f"Error: {error_message}")
response_text = f"An API error occurred: {error_message}. Please check the logs."
except ValueError as e:
error_message = f"Configuration error (e.g., missing API key): {e}"
print(f"Error: {error_message}")
response_text = f"A configuration error occurred: {error_message}. Please check your Space secrets."
except Exception as e:
error_message = f"An unexpected error occurred in generate_solution_python: {e}"
print(f"Error: {error_message}")
response_text = f"An unexpected error occurred: {error_message}. Please check the logs."
return response_text
# --- Flask Routes ---
@app.route('/')
def index():
"""Serves the main HTML page."""
return render_template('index.html')
@app.route('/generate', methods=['POST'])
async def generate():
"""Handles the AI generation request, managing conversation history and multi-modal input."""
session_id = None # Initialize session_id to None
try:
data = request.get_json()
if not data:
return jsonify({"error": "Request body must be JSON"}), 400
user_query = data.get('query')
image_data = data.get('image_data') # Base64 image data
document_text = data.get('document_text') # Text extracted from .txt on frontend
pdf_data = data.get('pdf_data') # Base64 PDF data
# Ensure session_id is assigned before use
session_id = data.get('session_id')
if not session_id:
session_id = str(uuid.uuid4())
print(f"Warning: session_id not provided, generated new one: {session_id}")
if not (user_query or image_data or document_text or pdf_data):
return jsonify({"error": "Query, image, or document is required in the request body"}), 400
current_chat_history = conversation_histories.get(session_id, [])
# Construct the parts for the user message
user_message_parts = []
if user_query:
user_message_parts.append({"text": user_query})
if image_data:
user_message_parts.append({"inlineData": image_data})
print("Received image data for processing.")
if document_text:
user_message_parts.append({"text": f"Document content:\n{document_text}"})
print("Received text document content for processing.")
if pdf_data:
if not PdfReader:
return jsonify({"error": "PDF parsing library (PyPDF2) not installed on backend."}), 500
try:
# Decode base64 PDF data
pdf_bytes = base64.b64decode(pdf_data['data'])
pdf_file = io.BytesIO(pdf_bytes)
reader = PdfReader(pdf_file)
pdf_extracted_text = ""
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
pdf_extracted_text += page.extract_text() or "" # extract_text can return None
if pdf_extracted_text.strip():
user_message_parts.append({"text": f"PDF Document Content:\n{pdf_extracted_text}"})
print(f"Successfully extracted {len(pdf_extracted_text)} characters from PDF.")
else:
user_message_parts.append({"text": "PDF Document: (No extractable text found or PDF is image-based)"})
print("No extractable text found in PDF.")
except Exception as pdf_error:
print(f"Error processing PDF: {pdf_error}")
user_message_parts.append({"text": f"PDF Document: (Error processing PDF: {pdf_error})"})
# Do not return error to frontend immediately for PDF processing issues
# Let the LLM try to respond even if PDF extraction failed
# return jsonify({"error": f"Failed to process PDF: {pdf_error}"}), 400
# If only a file was provided without a query, add a default instruction
if not user_query and (image_data or document_text or pdf_data):
if image_data:
user_message_parts.insert(0, {"text": "Please analyze the following image and provide insights:"})
elif document_text or pdf_data:
user_message_parts.insert(0, {"text": "Please analyze the following document content and provide a summary or answer questions:"})
# Append the new user message (which can be multi-part) to the history
current_chat_history.append({"role": "user", "parts": user_message_parts})
# Generate the solution using the full chat history
solution_text = await generate_solution_python(current_chat_history)
# Append the model's response to the history
current_chat_history.append({"role": "model", "parts": [{"text": solution_text}]})
# Store the updated history
conversation_histories[session_id] = current_chat_history
return jsonify({"solution": solution_text, "session_id": session_id})
except Exception as e:
print(f"Error in /generate endpoint: {e}")
# Ensure session_id is handled even in the outer exception for logging/debugging
if session_id:
return jsonify({"error": f"Internal server error for session {session_id}: {e}"}), 500
else:
return jsonify({"error": f"Internal server error: {e}"}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860)
|