AI_tool / app.py
TarSh8654's picture
Update app.py
9d07e1c verified
# app.py
from flask import Flask, request, jsonify, render_template
import requests
import json
import asyncio
import os
import uuid # For generating unique session IDs if not provided
import base64 # For decoding base64 data
import io # For handling binary data in memory
# Import PyPDF2 for PDF parsing
try:
from PyPDF2 import PdfReader
except ImportError:
print("PyPDF2 not found. Please install it using 'pip install PyPDF2'")
PdfReader = None # Set to None if not available
app = Flask(__name__)
# In-memory storage for conversation histories
# This will reset if the Flask application restarts.
# For persistent history, a database (like Firestore) is required.
conversation_histories = {}
async def generate_solution_python(chat_history):
"""
Generates a solution using a dummy context and Gemini LLM,
based on the provided chat history which can include text, images, and extracted PDF text.
Args:
chat_history (list): A list of message objects representing the conversation.
Each object has "role" and "parts". Parts can be:
- {"text": "..."}
- {"inlineData": {"mimeType": "image/png", "data": "base64_string"}}
Returns:
str: The generated solution text or an error message.
"""
if not chat_history:
return "Error: Chat history is empty."
print(f"Processing chat history length: {len(chat_history)}")
response_text = ""
try:
# --- IMPORTANT: Placeholder for Search API Integration ---
# The 'google_search' tool is specific to the Canvas environment.
# On Hugging Face, you would integrate a real public search API here.
# For this example, we'll use a dummy context based on the latest user query.
# Find the latest user input (text or image/document indication) for dummy context
latest_user_input = ""
for message in reversed(chat_history):
if message["role"] == "user" and message["parts"]:
for part in message["parts"]:
if part.get("text"):
latest_user_input = part["text"]
break
if part.get("inlineData") and "image" in part["inlineData"].get("mimeType", ""):
latest_user_input = "an image"
break
# If a document was processed and its text added, use that
if part.get("text") and part["text"].startswith("PDF Document Content:") or part["text"].startswith("Document content:"):
latest_user_input = "a document"
break
if latest_user_input:
break
dummy_context = f"Information related to '{latest_user_input}' from various online sources indicates that..."
# Step 2: Call Gemini API with the full chat history
print("Calling Gemini API with full chat history...")
llm_payload = {
"contents": chat_history # Pass the entire history, including text and image parts
}
# Get API key from environment variables (Hugging Face Space Secrets)
gemini_api_key = os.environ.get("GEMINI_API_KEY")
if not gemini_api_key:
raise ValueError("GEMINI_API_KEY environment variable not set.")
gemini_api_url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={gemini_api_key}"
gemini_response = requests.post(
gemini_api_url,
headers={'Content-Type': 'application/json'},
data=json.dumps(llm_payload)
)
gemini_response.raise_for_status() # Raise an exception for HTTP errors
llm_result = gemini_response.json()
print("Gemini API response received.")
if llm_result.get('candidates') and len(llm_result['candidates']) > 0 and \
llm_result['candidates'][0].get('content') and llm_result['candidates'][0]['content'].get('parts') and \
len(llm_result['candidates'][0]['content']['parts']) > 0:
response_text = llm_result['candidates'][0]['content']['parts'][0]['text']
else:
response_text = "No solution could be generated. Please try a different query."
except requests.exceptions.RequestException as e:
error_message = f"Network or API error during LLM call: {e}"
print(f"Error: {error_message}")
response_text = f"An API error occurred: {error_message}. Please check the logs."
except ValueError as e:
error_message = f"Configuration error (e.g., missing API key): {e}"
print(f"Error: {error_message}")
response_text = f"A configuration error occurred: {error_message}. Please check your Space secrets."
except Exception as e:
error_message = f"An unexpected error occurred in generate_solution_python: {e}"
print(f"Error: {error_message}")
response_text = f"An unexpected error occurred: {error_message}. Please check the logs."
return response_text
# --- Flask Routes ---
@app.route('/')
def index():
"""Serves the main HTML page."""
return render_template('index.html')
@app.route('/generate', methods=['POST'])
async def generate():
"""Handles the AI generation request, managing conversation history and multi-modal input."""
session_id = None # Initialize session_id to None
try:
data = request.get_json()
if not data:
return jsonify({"error": "Request body must be JSON"}), 400
user_query = data.get('query')
image_data = data.get('image_data') # Base64 image data
document_text = data.get('document_text') # Text extracted from .txt on frontend
pdf_data = data.get('pdf_data') # Base64 PDF data
# Ensure session_id is assigned before use
session_id = data.get('session_id')
if not session_id:
session_id = str(uuid.uuid4())
print(f"Warning: session_id not provided, generated new one: {session_id}")
if not (user_query or image_data or document_text or pdf_data):
return jsonify({"error": "Query, image, or document is required in the request body"}), 400
current_chat_history = conversation_histories.get(session_id, [])
# Construct the parts for the user message
user_message_parts = []
if user_query:
user_message_parts.append({"text": user_query})
if image_data:
user_message_parts.append({"inlineData": image_data})
print("Received image data for processing.")
if document_text:
user_message_parts.append({"text": f"Document content:\n{document_text}"})
print("Received text document content for processing.")
if pdf_data:
if not PdfReader:
return jsonify({"error": "PDF parsing library (PyPDF2) not installed on backend."}), 500
try:
# Decode base64 PDF data
pdf_bytes = base64.b64decode(pdf_data['data'])
pdf_file = io.BytesIO(pdf_bytes)
reader = PdfReader(pdf_file)
pdf_extracted_text = ""
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
pdf_extracted_text += page.extract_text() or "" # extract_text can return None
if pdf_extracted_text.strip():
user_message_parts.append({"text": f"PDF Document Content:\n{pdf_extracted_text}"})
print(f"Successfully extracted {len(pdf_extracted_text)} characters from PDF.")
else:
user_message_parts.append({"text": "PDF Document: (No extractable text found or PDF is image-based)"})
print("No extractable text found in PDF.")
except Exception as pdf_error:
print(f"Error processing PDF: {pdf_error}")
user_message_parts.append({"text": f"PDF Document: (Error processing PDF: {pdf_error})"})
# Do not return error to frontend immediately for PDF processing issues
# Let the LLM try to respond even if PDF extraction failed
# return jsonify({"error": f"Failed to process PDF: {pdf_error}"}), 400
# If only a file was provided without a query, add a default instruction
if not user_query and (image_data or document_text or pdf_data):
if image_data:
user_message_parts.insert(0, {"text": "Please analyze the following image and provide insights:"})
elif document_text or pdf_data:
user_message_parts.insert(0, {"text": "Please analyze the following document content and provide a summary or answer questions:"})
# Append the new user message (which can be multi-part) to the history
current_chat_history.append({"role": "user", "parts": user_message_parts})
# Generate the solution using the full chat history
solution_text = await generate_solution_python(current_chat_history)
# Append the model's response to the history
current_chat_history.append({"role": "model", "parts": [{"text": solution_text}]})
# Store the updated history
conversation_histories[session_id] = current_chat_history
return jsonify({"solution": solution_text, "session_id": session_id})
except Exception as e:
print(f"Error in /generate endpoint: {e}")
# Ensure session_id is handled even in the outer exception for logging/debugging
if session_id:
return jsonify({"error": f"Internal server error for session {session_id}: {e}"}), 500
else:
return jsonify({"error": f"Internal server error: {e}"}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860)