Update app.py
Browse files
app.py
CHANGED
|
@@ -16,11 +16,13 @@ conversation_histories = {}
|
|
| 16 |
async def generate_solution_python(chat_history):
|
| 17 |
"""
|
| 18 |
Generates a solution using a dummy context (since google_search is not available)
|
| 19 |
-
and Gemini LLM, based on the provided chat history.
|
| 20 |
|
| 21 |
Args:
|
| 22 |
chat_history (list): A list of message objects representing the conversation.
|
| 23 |
-
Each object has "role" and "parts"
|
|
|
|
|
|
|
| 24 |
Returns:
|
| 25 |
str: The generated solution text or an error message.
|
| 26 |
"""
|
|
@@ -37,14 +39,22 @@ async def generate_solution_python(chat_history):
|
|
| 37 |
# e.g., Google Custom Search API, SerpAPI, or a web scraping library.
|
| 38 |
# For this example, we'll use a dummy context based on the latest user query.
|
| 39 |
|
| 40 |
-
# Find the latest user query to generate a relevant dummy context
|
| 41 |
-
|
| 42 |
for message in reversed(chat_history):
|
| 43 |
-
if message["role"] == "user" and message["parts"]
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
break
|
| 46 |
|
| 47 |
-
dummy_context = f"Information related to '{
|
| 48 |
|
| 49 |
# You could also inject this context into the chat_history as a system message
|
| 50 |
# or prepend it to the latest user message's text if you want the LLM to explicitly
|
|
@@ -53,7 +63,7 @@ async def generate_solution_python(chat_history):
|
|
| 53 |
# Step 2: Call Gemini API with the full chat history
|
| 54 |
print("Calling Gemini API with full chat history...")
|
| 55 |
llm_payload = {
|
| 56 |
-
"contents": chat_history # Pass the entire history
|
| 57 |
}
|
| 58 |
|
| 59 |
# Get API key from environment variables (Hugging Face Space Secrets)
|
|
@@ -104,27 +114,41 @@ def index():
|
|
| 104 |
|
| 105 |
@app.route('/generate', methods=['POST'])
|
| 106 |
async def generate():
|
| 107 |
-
"""Handles the AI generation request, managing conversation history."""
|
| 108 |
try:
|
| 109 |
data = request.get_json()
|
| 110 |
if not data:
|
| 111 |
return jsonify({"error": "Request body must be JSON"}), 400
|
| 112 |
|
| 113 |
user_query = data.get('query')
|
|
|
|
|
|
|
| 114 |
session_id = data.get('session_id')
|
| 115 |
|
| 116 |
-
if not user_query:
|
| 117 |
-
return jsonify({"error": "Query is required in the request body"}), 400
|
| 118 |
if not session_id:
|
| 119 |
-
# Generate a session ID if not provided (should be provided by frontend)
|
| 120 |
session_id = str(uuid.uuid4())
|
| 121 |
print(f"Warning: session_id not provided, generated new one: {session_id}")
|
| 122 |
|
| 123 |
-
# Retrieve or initialize chat history for this session
|
| 124 |
current_chat_history = conversation_histories.get(session_id, [])
|
| 125 |
-
|
| 126 |
-
#
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
# Generate the solution using the full chat history
|
| 130 |
solution_text = await generate_solution_python(current_chat_history)
|
|
|
|
| 16 |
async def generate_solution_python(chat_history):
|
| 17 |
"""
|
| 18 |
Generates a solution using a dummy context (since google_search is not available)
|
| 19 |
+
and Gemini LLM, based on the provided chat history which can include text and images.
|
| 20 |
|
| 21 |
Args:
|
| 22 |
chat_history (list): A list of message objects representing the conversation.
|
| 23 |
+
Each object has "role" and "parts". Parts can be:
|
| 24 |
+
- {"text": "..."}
|
| 25 |
+
- {"inlineData": {"mimeType": "image/png", "data": "base64_string"}}
|
| 26 |
Returns:
|
| 27 |
str: The generated solution text or an error message.
|
| 28 |
"""
|
|
|
|
| 39 |
# e.g., Google Custom Search API, SerpAPI, or a web scraping library.
|
| 40 |
# For this example, we'll use a dummy context based on the latest user query.
|
| 41 |
|
| 42 |
+
# Find the latest user query or image prompt to generate a relevant dummy context
|
| 43 |
+
latest_user_input = ""
|
| 44 |
for message in reversed(chat_history):
|
| 45 |
+
if message["role"] == "user" and message["parts"]:
|
| 46 |
+
for part in message["parts"]:
|
| 47 |
+
if part.get("text"):
|
| 48 |
+
latest_user_input = part["text"]
|
| 49 |
+
break
|
| 50 |
+
# If it's an image, we can indicate that an image was provided
|
| 51 |
+
if part.get("inlineData") and "image" in part["inlineData"].get("mimeType", ""):
|
| 52 |
+
latest_user_input = "an image" # Indicate image input for context
|
| 53 |
+
break
|
| 54 |
+
if latest_user_input:
|
| 55 |
break
|
| 56 |
|
| 57 |
+
dummy_context = f"Information related to '{latest_user_input}' from various online sources indicates that..."
|
| 58 |
|
| 59 |
# You could also inject this context into the chat_history as a system message
|
| 60 |
# or prepend it to the latest user message's text if you want the LLM to explicitly
|
|
|
|
| 63 |
# Step 2: Call Gemini API with the full chat history
|
| 64 |
print("Calling Gemini API with full chat history...")
|
| 65 |
llm_payload = {
|
| 66 |
+
"contents": chat_history # Pass the entire history, including text and image parts
|
| 67 |
}
|
| 68 |
|
| 69 |
# Get API key from environment variables (Hugging Face Space Secrets)
|
|
|
|
| 114 |
|
| 115 |
@app.route('/generate', methods=['POST'])
|
| 116 |
async def generate():
|
| 117 |
+
"""Handles the AI generation request, managing conversation history and multi-modal input."""
|
| 118 |
try:
|
| 119 |
data = request.get_json()
|
| 120 |
if not data:
|
| 121 |
return jsonify({"error": "Request body must be JSON"}), 400
|
| 122 |
|
| 123 |
user_query = data.get('query')
|
| 124 |
+
image_data = data.get('image_data') # Base64 image data
|
| 125 |
+
document_text = data.get('document_text') # Text extracted from document
|
| 126 |
session_id = data.get('session_id')
|
| 127 |
|
| 128 |
+
if not (user_query or image_data or document_text):
|
| 129 |
+
return jsonify({"error": "Query, image, or document text is required in the request body"}), 400
|
| 130 |
if not session_id:
|
|
|
|
| 131 |
session_id = str(uuid.uuid4())
|
| 132 |
print(f"Warning: session_id not provided, generated new one: {session_id}")
|
| 133 |
|
|
|
|
| 134 |
current_chat_history = conversation_histories.get(session_id, [])
|
| 135 |
+
|
| 136 |
+
# Construct the parts for the user message
|
| 137 |
+
user_message_parts = []
|
| 138 |
+
if user_query:
|
| 139 |
+
user_message_parts.append({"text": user_query})
|
| 140 |
+
if image_data:
|
| 141 |
+
# Expecting image_data to be a dict with mimeType and data
|
| 142 |
+
user_message_parts.append({"inlineData": image_data})
|
| 143 |
+
if document_text:
|
| 144 |
+
user_message_parts.append({"text": f"Document content:\n{document_text}"})
|
| 145 |
+
# Optionally, you might want to add a specific instruction for document analysis
|
| 146 |
+
if not user_query: # If only document was provided, add a default query
|
| 147 |
+
user_message_parts.insert(0, {"text": "Please analyze the following document content:"})
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
# Append the new user message (which can be multi-part) to the history
|
| 151 |
+
current_chat_history.append({"role": "user", "parts": user_message_parts})
|
| 152 |
|
| 153 |
# Generate the solution using the full chat history
|
| 154 |
solution_text = await generate_solution_python(current_chat_history)
|