Spaces:

TarSh8654
/

AI_tool

Sleeping

App Files Files Community

TarSh8654 commited on Jul 15, 2025

Commit

130ced7

verified ·

1 Parent(s): 439f544

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -16

app.py CHANGED Viewed

@@ -16,11 +16,13 @@ conversation_histories = {}
 async def generate_solution_python(chat_history):
     """
     Generates a solution using a dummy context (since google_search is not available)
-    and Gemini LLM, based on the provided chat history.
     Args:
         chat_history (list): A list of message objects representing the conversation.
-                             Each object has "role" and "parts" (e.g., [{"text": "..."}]).
     Returns:
         str: The generated solution text or an error message.
     """
@@ -37,14 +39,22 @@ async def generate_solution_python(chat_history):
         # e.g., Google Custom Search API, SerpAPI, or a web scraping library.
         # For this example, we'll use a dummy context based on the latest user query.
-        # Find the latest user query to generate a relevant dummy context
-        latest_user_query = ""
         for message in reversed(chat_history):
-            if message["role"] == "user" and message["parts"] and message["parts"][0].get("text"):
-                latest_user_query = message["parts"][0]["text"]
                 break
-        dummy_context = f"Information related to '{latest_user_query}' from various online sources indicates that..."
         # You could also inject this context into the chat_history as a system message
         # or prepend it to the latest user message's text if you want the LLM to explicitly
@@ -53,7 +63,7 @@ async def generate_solution_python(chat_history):
         # Step 2: Call Gemini API with the full chat history
         print("Calling Gemini API with full chat history...")
         llm_payload = {
-            "contents": chat_history # Pass the entire history
         }
         # Get API key from environment variables (Hugging Face Space Secrets)
@@ -104,27 +114,41 @@ def index():
 @app.route('/generate', methods=['POST'])
 async def generate():
-    """Handles the AI generation request, managing conversation history."""
     try:
         data = request.get_json()
         if not data:
             return jsonify({"error": "Request body must be JSON"}), 400
         user_query = data.get('query')
         session_id = data.get('session_id')
-        if not user_query:
-            return jsonify({"error": "Query is required in the request body"}), 400
         if not session_id:
-            # Generate a session ID if not provided (should be provided by frontend)
             session_id = str(uuid.uuid4())
             print(f"Warning: session_id not provided, generated new one: {session_id}")
-        # Retrieve or initialize chat history for this session
         current_chat_history = conversation_histories.get(session_id, [])
-        # Append the new user message to the history
-        current_chat_history.append({"role": "user", "parts": [{"text": user_query}]})
         # Generate the solution using the full chat history
         solution_text = await generate_solution_python(current_chat_history)

 async def generate_solution_python(chat_history):
     """
     Generates a solution using a dummy context (since google_search is not available)
+    and Gemini LLM, based on the provided chat history which can include text and images.
     Args:
         chat_history (list): A list of message objects representing the conversation.
+                             Each object has "role" and "parts". Parts can be:
+                             - {"text": "..."}
+                             - {"inlineData": {"mimeType": "image/png", "data": "base64_string"}}
     Returns:
         str: The generated solution text or an error message.
     """
         # e.g., Google Custom Search API, SerpAPI, or a web scraping library.
         # For this example, we'll use a dummy context based on the latest user query.
+        # Find the latest user query or image prompt to generate a relevant dummy context
+        latest_user_input = ""
         for message in reversed(chat_history):
+            if message["role"] == "user" and message["parts"]:
+                for part in message["parts"]:
+                    if part.get("text"):
+                        latest_user_input = part["text"]
+                        break
+                    # If it's an image, we can indicate that an image was provided
+                    if part.get("inlineData") and "image" in part["inlineData"].get("mimeType", ""):
+                        latest_user_input = "an image" # Indicate image input for context
+                        break
+            if latest_user_input:
                 break
+        dummy_context = f"Information related to '{latest_user_input}' from various online sources indicates that..."
         # You could also inject this context into the chat_history as a system message
         # or prepend it to the latest user message's text if you want the LLM to explicitly
         # Step 2: Call Gemini API with the full chat history
         print("Calling Gemini API with full chat history...")
         llm_payload = {
+            "contents": chat_history # Pass the entire history, including text and image parts
         }
         # Get API key from environment variables (Hugging Face Space Secrets)
 @app.route('/generate', methods=['POST'])
 async def generate():
+    """Handles the AI generation request, managing conversation history and multi-modal input."""
     try:
         data = request.get_json()
         if not data:
             return jsonify({"error": "Request body must be JSON"}), 400
         user_query = data.get('query')
+        image_data = data.get('image_data') # Base64 image data
+        document_text = data.get('document_text') # Text extracted from document
         session_id = data.get('session_id')
+        if not (user_query or image_data or document_text):
+            return jsonify({"error": "Query, image, or document text is required in the request body"}), 400
         if not session_id:
             session_id = str(uuid.uuid4())
             print(f"Warning: session_id not provided, generated new one: {session_id}")
         current_chat_history = conversation_histories.get(session_id, [])
+        # Construct the parts for the user message
+        user_message_parts = []
+        if user_query:
+            user_message_parts.append({"text": user_query})
+        if image_data:
+            # Expecting image_data to be a dict with mimeType and data
+            user_message_parts.append({"inlineData": image_data})
+        if document_text:
+            user_message_parts.append({"text": f"Document content:\n{document_text}"})
+            # Optionally, you might want to add a specific instruction for document analysis
+            if not user_query: # If only document was provided, add a default query
+                 user_message_parts.insert(0, {"text": "Please analyze the following document content:"})
+        # Append the new user message (which can be multi-part) to the history
+        current_chat_history.append({"role": "user", "parts": user_message_parts})
         # Generate the solution using the full chat history
         solution_text = await generate_solution_python(current_chat_history)