Spaces:

TarSh8654
/

AI_tool

Sleeping

App Files Files Community

TarSh8654 commited on Jul 15, 2025

Commit

feaf77b

verified ·

1 Parent(s): 5ac16d6

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -20

app.py CHANGED Viewed

@@ -5,6 +5,15 @@ import json
 import asyncio
 import os
 import uuid # For generating unique session IDs if not provided
 app = Flask(__name__)
@@ -15,8 +24,8 @@ conversation_histories = {}
 async def generate_solution_python(chat_history):
     """
-    Generates a solution using a dummy context (since google_search is not available)
-    and Gemini LLM, based on the provided chat history which can include text and images.
     Args:
         chat_history (list): A list of message objects representing the conversation.
@@ -35,11 +44,10 @@ async def generate_solution_python(chat_history):
     try:
         # --- IMPORTANT: Placeholder for Search API Integration ---
         # The 'google_search' tool is specific to the Canvas environment.
-        # On Hugging Face, you would integrate a real public search API here,
-        # e.g., Google Custom Search API, SerpAPI, or a web scraping library.
         # For this example, we'll use a dummy context based on the latest user query.
-        # Find the latest user query or image prompt to generate a relevant dummy context
         latest_user_input = ""
         for message in reversed(chat_history):
             if message["role"] == "user" and message["parts"]:
@@ -47,19 +55,18 @@ async def generate_solution_python(chat_history):
                     if part.get("text"):
                         latest_user_input = part["text"]
                         break
-                    # If it's an image, we can indicate that an image was provided
                     if part.get("inlineData") and "image" in part["inlineData"].get("mimeType", ""):
-                        latest_user_input = "an image" # Indicate image input for context
                         break
             if latest_user_input:
                 break
         dummy_context = f"Information related to '{latest_user_input}' from various online sources indicates that..."
-        # You could also inject this context into the chat_history as a system message
-        # or prepend it to the latest user message's text if you want the LLM to explicitly
-        # see it as part of the conversation flow. For now, it's implicitly part of the prompt.
         # Step 2: Call Gemini API with the full chat history
         print("Calling Gemini API with full chat history...")
         llm_payload = {
@@ -122,11 +129,11 @@ async def generate():
         user_query = data.get('query')
         image_data = data.get('image_data') # Base64 image data
-        document_text = data.get('document_text') # Text extracted from document
-        session_id = data.get('session_id')
-        if not (user_query or image_data or document_text):
-            return jsonify({"error": "Query, image, or document text is required in the request body"}), 400
         if not session_id:
             session_id = str(uuid.uuid4())
             print(f"Warning: session_id not provided, generated new one: {session_id}")
@@ -137,15 +144,48 @@ async def generate():
         user_message_parts = []
         if user_query:
             user_message_parts.append({"text": user_query})
         if image_data:
-            # Expecting image_data to be a dict with mimeType and data
             user_message_parts.append({"inlineData": image_data})
         if document_text:
             user_message_parts.append({"text": f"Document content:\n{document_text}"})
-            # Optionally, you might want to add a specific instruction for document analysis
-            if not user_query: # If only document was provided, add a default query
-                 user_message_parts.insert(0, {"text": "Please analyze the following document content:"})
         # Append the new user message (which can be multi-part) to the history
         current_chat_history.append({"role": "user", "parts": user_message_parts})

 import asyncio
 import os
 import uuid # For generating unique session IDs if not provided
+import base64 # For decoding base64 data
+import io # For handling binary data in memory
+# Import PyPDF2 for PDF parsing
+try:
+    from PyPDF2 import PdfReader
+except ImportError:
+    print("PyPDF2 not found. Please install it using 'pip install PyPDF2'")
+    PdfReader = None # Set to None if not available
 app = Flask(__name__)
 async def generate_solution_python(chat_history):
     """
+    Generates a solution using a dummy context and Gemini LLM,
+    based on the provided chat history which can include text, images, and extracted PDF text.
     Args:
         chat_history (list): A list of message objects representing the conversation.
     try:
         # --- IMPORTANT: Placeholder for Search API Integration ---
         # The 'google_search' tool is specific to the Canvas environment.
+        # On Hugging Face, you would integrate a real public search API here.
         # For this example, we'll use a dummy context based on the latest user query.
+        # Find the latest user input (text or image/document indication) for dummy context
         latest_user_input = ""
         for message in reversed(chat_history):
             if message["role"] == "user" and message["parts"]:
                     if part.get("text"):
                         latest_user_input = part["text"]
                         break
                     if part.get("inlineData") and "image" in part["inlineData"].get("mimeType", ""):
+                        latest_user_input = "an image"
+                        break
+                    # If a document was processed and its text added, use that
+                    if part.get("text") and part["text"].startswith("Document content:"):
+                        latest_user_input = "a document"
                         break
             if latest_user_input:
                 break
         dummy_context = f"Information related to '{latest_user_input}' from various online sources indicates that..."
         # Step 2: Call Gemini API with the full chat history
         print("Calling Gemini API with full chat history...")
         llm_payload = {
         user_query = data.get('query')
         image_data = data.get('image_data') # Base64 image data
+        document_text = data.get('document_text') # Text extracted from .txt on frontend
+        pdf_data = data.get('pdf_data') # Base64 PDF data
+        if not (user_query or image_data or document_text or pdf_data):
+            return jsonify({"error": "Query, image, or document is required in the request body"}), 400
         if not session_id:
             session_id = str(uuid.uuid4())
             print(f"Warning: session_id not provided, generated new one: {session_id}")
         user_message_parts = []
         if user_query:
             user_message_parts.append({"text": user_query})
         if image_data:
             user_message_parts.append({"inlineData": image_data})
+            print("Received image data for processing.")
         if document_text:
             user_message_parts.append({"text": f"Document content:\n{document_text}"})
+            print("Received text document content for processing.")
+        if pdf_data:
+            if not PdfReader:
+                return jsonify({"error": "PDF parsing library (PyPDF2) not installed on backend."}), 500
+            try:
+                # Decode base64 PDF data
+                pdf_bytes = base64.b64decode(pdf_data['data'])
+                pdf_file = io.BytesIO(pdf_bytes)
+                reader = PdfReader(pdf_file)
+                pdf_extracted_text = ""
+                for page_num in range(len(reader.pages)):
+                    page = reader.pages[page_num]
+                    pdf_extracted_text += page.extract_text() or "" # extract_text can return None
+                if pdf_extracted_text.strip():
+                    user_message_parts.append({"text": f"PDF Document Content:\n{pdf_extracted_text}"})
+                    print(f"Successfully extracted {len(pdf_extracted_text)} characters from PDF.")
+                else:
+                    user_message_parts.append({"text": "PDF Document: (No extractable text found or PDF is image-based)"})
+                    print("No extractable text found in PDF.")
+            except Exception as pdf_error:
+                print(f"Error processing PDF: {pdf_error}")
+                user_message_parts.append({"text": f"PDF Document: (Error processing PDF: {pdf_error})"})
+                return jsonify({"error": f"Failed to process PDF: {pdf_error}"}), 400 # Return error to frontend
+        # If only a file was provided without a query, add a default instruction
+        if not user_query and (image_data or document_text or pdf_data):
+            if image_data:
+                user_message_parts.insert(0, {"text": "Please analyze the following image and provide insights:"})
+            elif document_text or pdf_data:
+                user_message_parts.insert(0, {"text": "Please analyze the following document content and provide a summary or answer questions:"})
         # Append the new user message (which can be multi-part) to the history
         current_chat_history.append({"role": "user", "parts": user_message_parts})