|
|
|
|
|
from flask import Flask, request, jsonify, render_template |
|
|
import requests |
|
|
import json |
|
|
import asyncio |
|
|
import os |
|
|
import uuid |
|
|
import base64 |
|
|
import io |
|
|
|
|
|
|
|
|
try: |
|
|
from PyPDF2 import PdfReader |
|
|
except ImportError: |
|
|
print("PyPDF2 not found. Please install it using 'pip install PyPDF2'") |
|
|
PdfReader = None |
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
conversation_histories = {} |
|
|
|
|
|
async def generate_solution_python(chat_history): |
|
|
""" |
|
|
Generates a solution using a dummy context and Gemini LLM, |
|
|
based on the provided chat history which can include text, images, and extracted PDF text. |
|
|
|
|
|
Args: |
|
|
chat_history (list): A list of message objects representing the conversation. |
|
|
Each object has "role" and "parts". Parts can be: |
|
|
- {"text": "..."} |
|
|
- {"inlineData": {"mimeType": "image/png", "data": "base64_string"}} |
|
|
Returns: |
|
|
str: The generated solution text or an error message. |
|
|
""" |
|
|
if not chat_history: |
|
|
return "Error: Chat history is empty." |
|
|
|
|
|
print(f"Processing chat history length: {len(chat_history)}") |
|
|
response_text = "" |
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
latest_user_input = "" |
|
|
for message in reversed(chat_history): |
|
|
if message["role"] == "user" and message["parts"]: |
|
|
for part in message["parts"]: |
|
|
if part.get("text"): |
|
|
latest_user_input = part["text"] |
|
|
break |
|
|
if part.get("inlineData") and "image" in part["inlineData"].get("mimeType", ""): |
|
|
latest_user_input = "an image" |
|
|
break |
|
|
|
|
|
if part.get("text") and part["text"].startswith("PDF Document Content:") or part["text"].startswith("Document content:"): |
|
|
latest_user_input = "a document" |
|
|
break |
|
|
if latest_user_input: |
|
|
break |
|
|
|
|
|
dummy_context = f"Information related to '{latest_user_input}' from various online sources indicates that..." |
|
|
|
|
|
|
|
|
print("Calling Gemini API with full chat history...") |
|
|
llm_payload = { |
|
|
"contents": chat_history |
|
|
} |
|
|
|
|
|
|
|
|
gemini_api_key = os.environ.get("GEMINI_API_KEY") |
|
|
if not gemini_api_key: |
|
|
raise ValueError("GEMINI_API_KEY environment variable not set.") |
|
|
|
|
|
gemini_api_url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={gemini_api_key}" |
|
|
|
|
|
gemini_response = requests.post( |
|
|
gemini_api_url, |
|
|
headers={'Content-Type': 'application/json'}, |
|
|
data=json.dumps(llm_payload) |
|
|
) |
|
|
|
|
|
gemini_response.raise_for_status() |
|
|
llm_result = gemini_response.json() |
|
|
print("Gemini API response received.") |
|
|
|
|
|
if llm_result.get('candidates') and len(llm_result['candidates']) > 0 and \ |
|
|
llm_result['candidates'][0].get('content') and llm_result['candidates'][0]['content'].get('parts') and \ |
|
|
len(llm_result['candidates'][0]['content']['parts']) > 0: |
|
|
response_text = llm_result['candidates'][0]['content']['parts'][0]['text'] |
|
|
else: |
|
|
response_text = "No solution could be generated. Please try a different query." |
|
|
|
|
|
except requests.exceptions.RequestException as e: |
|
|
error_message = f"Network or API error during LLM call: {e}" |
|
|
print(f"Error: {error_message}") |
|
|
response_text = f"An API error occurred: {error_message}. Please check the logs." |
|
|
except ValueError as e: |
|
|
error_message = f"Configuration error (e.g., missing API key): {e}" |
|
|
print(f"Error: {error_message}") |
|
|
response_text = f"A configuration error occurred: {error_message}. Please check your Space secrets." |
|
|
except Exception as e: |
|
|
error_message = f"An unexpected error occurred in generate_solution_python: {e}" |
|
|
print(f"Error: {error_message}") |
|
|
response_text = f"An unexpected error occurred: {error_message}. Please check the logs." |
|
|
|
|
|
return response_text |
|
|
|
|
|
|
|
|
|
|
|
@app.route('/') |
|
|
def index(): |
|
|
"""Serves the main HTML page.""" |
|
|
return render_template('index.html') |
|
|
|
|
|
@app.route('/generate', methods=['POST']) |
|
|
async def generate(): |
|
|
"""Handles the AI generation request, managing conversation history and multi-modal input.""" |
|
|
session_id = None |
|
|
try: |
|
|
data = request.get_json() |
|
|
if not data: |
|
|
return jsonify({"error": "Request body must be JSON"}), 400 |
|
|
|
|
|
user_query = data.get('query') |
|
|
image_data = data.get('image_data') |
|
|
document_text = data.get('document_text') |
|
|
pdf_data = data.get('pdf_data') |
|
|
|
|
|
|
|
|
session_id = data.get('session_id') |
|
|
if not session_id: |
|
|
session_id = str(uuid.uuid4()) |
|
|
print(f"Warning: session_id not provided, generated new one: {session_id}") |
|
|
|
|
|
if not (user_query or image_data or document_text or pdf_data): |
|
|
return jsonify({"error": "Query, image, or document is required in the request body"}), 400 |
|
|
|
|
|
current_chat_history = conversation_histories.get(session_id, []) |
|
|
|
|
|
|
|
|
user_message_parts = [] |
|
|
if user_query: |
|
|
user_message_parts.append({"text": user_query}) |
|
|
|
|
|
if image_data: |
|
|
user_message_parts.append({"inlineData": image_data}) |
|
|
print("Received image data for processing.") |
|
|
|
|
|
if document_text: |
|
|
user_message_parts.append({"text": f"Document content:\n{document_text}"}) |
|
|
print("Received text document content for processing.") |
|
|
|
|
|
if pdf_data: |
|
|
if not PdfReader: |
|
|
return jsonify({"error": "PDF parsing library (PyPDF2) not installed on backend."}), 500 |
|
|
|
|
|
try: |
|
|
|
|
|
pdf_bytes = base64.b64decode(pdf_data['data']) |
|
|
pdf_file = io.BytesIO(pdf_bytes) |
|
|
reader = PdfReader(pdf_file) |
|
|
|
|
|
pdf_extracted_text = "" |
|
|
for page_num in range(len(reader.pages)): |
|
|
page = reader.pages[page_num] |
|
|
pdf_extracted_text += page.extract_text() or "" |
|
|
|
|
|
if pdf_extracted_text.strip(): |
|
|
user_message_parts.append({"text": f"PDF Document Content:\n{pdf_extracted_text}"}) |
|
|
print(f"Successfully extracted {len(pdf_extracted_text)} characters from PDF.") |
|
|
else: |
|
|
user_message_parts.append({"text": "PDF Document: (No extractable text found or PDF is image-based)"}) |
|
|
print("No extractable text found in PDF.") |
|
|
|
|
|
except Exception as pdf_error: |
|
|
print(f"Error processing PDF: {pdf_error}") |
|
|
user_message_parts.append({"text": f"PDF Document: (Error processing PDF: {pdf_error})"}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not user_query and (image_data or document_text or pdf_data): |
|
|
if image_data: |
|
|
user_message_parts.insert(0, {"text": "Please analyze the following image and provide insights:"}) |
|
|
elif document_text or pdf_data: |
|
|
user_message_parts.insert(0, {"text": "Please analyze the following document content and provide a summary or answer questions:"}) |
|
|
|
|
|
|
|
|
current_chat_history.append({"role": "user", "parts": user_message_parts}) |
|
|
|
|
|
|
|
|
solution_text = await generate_solution_python(current_chat_history) |
|
|
|
|
|
|
|
|
current_chat_history.append({"role": "model", "parts": [{"text": solution_text}]}) |
|
|
|
|
|
|
|
|
conversation_histories[session_id] = current_chat_history |
|
|
|
|
|
return jsonify({"solution": solution_text, "session_id": session_id}) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error in /generate endpoint: {e}") |
|
|
|
|
|
if session_id: |
|
|
return jsonify({"error": f"Internal server error for session {session_id}: {e}"}), 500 |
|
|
else: |
|
|
return jsonify({"error": f"Internal server error: {e}"}), 500 |
|
|
|
|
|
if __name__ == '__main__': |
|
|
app.run(host='0.0.0.0', port=7860) |
|
|
|