TarSh8654 commited on
Commit
feaf77b
·
verified ·
1 Parent(s): 5ac16d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -20
app.py CHANGED
@@ -5,6 +5,15 @@ import json
5
  import asyncio
6
  import os
7
  import uuid # For generating unique session IDs if not provided
 
 
 
 
 
 
 
 
 
8
 
9
  app = Flask(__name__)
10
 
@@ -15,8 +24,8 @@ conversation_histories = {}
15
 
16
  async def generate_solution_python(chat_history):
17
  """
18
- Generates a solution using a dummy context (since google_search is not available)
19
- and Gemini LLM, based on the provided chat history which can include text and images.
20
 
21
  Args:
22
  chat_history (list): A list of message objects representing the conversation.
@@ -35,11 +44,10 @@ async def generate_solution_python(chat_history):
35
  try:
36
  # --- IMPORTANT: Placeholder for Search API Integration ---
37
  # The 'google_search' tool is specific to the Canvas environment.
38
- # On Hugging Face, you would integrate a real public search API here,
39
- # e.g., Google Custom Search API, SerpAPI, or a web scraping library.
40
  # For this example, we'll use a dummy context based on the latest user query.
41
 
42
- # Find the latest user query or image prompt to generate a relevant dummy context
43
  latest_user_input = ""
44
  for message in reversed(chat_history):
45
  if message["role"] == "user" and message["parts"]:
@@ -47,19 +55,18 @@ async def generate_solution_python(chat_history):
47
  if part.get("text"):
48
  latest_user_input = part["text"]
49
  break
50
- # If it's an image, we can indicate that an image was provided
51
  if part.get("inlineData") and "image" in part["inlineData"].get("mimeType", ""):
52
- latest_user_input = "an image" # Indicate image input for context
 
 
 
 
53
  break
54
  if latest_user_input:
55
  break
56
 
57
  dummy_context = f"Information related to '{latest_user_input}' from various online sources indicates that..."
58
 
59
- # You could also inject this context into the chat_history as a system message
60
- # or prepend it to the latest user message's text if you want the LLM to explicitly
61
- # see it as part of the conversation flow. For now, it's implicitly part of the prompt.
62
-
63
  # Step 2: Call Gemini API with the full chat history
64
  print("Calling Gemini API with full chat history...")
65
  llm_payload = {
@@ -122,11 +129,11 @@ async def generate():
122
 
123
  user_query = data.get('query')
124
  image_data = data.get('image_data') # Base64 image data
125
- document_text = data.get('document_text') # Text extracted from document
126
- session_id = data.get('session_id')
127
 
128
- if not (user_query or image_data or document_text):
129
- return jsonify({"error": "Query, image, or document text is required in the request body"}), 400
130
  if not session_id:
131
  session_id = str(uuid.uuid4())
132
  print(f"Warning: session_id not provided, generated new one: {session_id}")
@@ -137,15 +144,48 @@ async def generate():
137
  user_message_parts = []
138
  if user_query:
139
  user_message_parts.append({"text": user_query})
 
140
  if image_data:
141
- # Expecting image_data to be a dict with mimeType and data
142
  user_message_parts.append({"inlineData": image_data})
 
 
143
  if document_text:
144
  user_message_parts.append({"text": f"Document content:\n{document_text}"})
145
- # Optionally, you might want to add a specific instruction for document analysis
146
- if not user_query: # If only document was provided, add a default query
147
- user_message_parts.insert(0, {"text": "Please analyze the following document content:"})
148
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  # Append the new user message (which can be multi-part) to the history
151
  current_chat_history.append({"role": "user", "parts": user_message_parts})
 
5
  import asyncio
6
  import os
7
  import uuid # For generating unique session IDs if not provided
8
+ import base64 # For decoding base64 data
9
+ import io # For handling binary data in memory
10
+
11
+ # Import PyPDF2 for PDF parsing
12
+ try:
13
+ from PyPDF2 import PdfReader
14
+ except ImportError:
15
+ print("PyPDF2 not found. Please install it using 'pip install PyPDF2'")
16
+ PdfReader = None # Set to None if not available
17
 
18
  app = Flask(__name__)
19
 
 
24
 
25
  async def generate_solution_python(chat_history):
26
  """
27
+ Generates a solution using a dummy context and Gemini LLM,
28
+ based on the provided chat history which can include text, images, and extracted PDF text.
29
 
30
  Args:
31
  chat_history (list): A list of message objects representing the conversation.
 
44
  try:
45
  # --- IMPORTANT: Placeholder for Search API Integration ---
46
  # The 'google_search' tool is specific to the Canvas environment.
47
+ # On Hugging Face, you would integrate a real public search API here.
 
48
  # For this example, we'll use a dummy context based on the latest user query.
49
 
50
+ # Find the latest user input (text or image/document indication) for dummy context
51
  latest_user_input = ""
52
  for message in reversed(chat_history):
53
  if message["role"] == "user" and message["parts"]:
 
55
  if part.get("text"):
56
  latest_user_input = part["text"]
57
  break
 
58
  if part.get("inlineData") and "image" in part["inlineData"].get("mimeType", ""):
59
+ latest_user_input = "an image"
60
+ break
61
+ # If a document was processed and its text added, use that
62
+ if part.get("text") and part["text"].startswith("Document content:"):
63
+ latest_user_input = "a document"
64
  break
65
  if latest_user_input:
66
  break
67
 
68
  dummy_context = f"Information related to '{latest_user_input}' from various online sources indicates that..."
69
 
 
 
 
 
70
  # Step 2: Call Gemini API with the full chat history
71
  print("Calling Gemini API with full chat history...")
72
  llm_payload = {
 
129
 
130
  user_query = data.get('query')
131
  image_data = data.get('image_data') # Base64 image data
132
+ document_text = data.get('document_text') # Text extracted from .txt on frontend
133
+ pdf_data = data.get('pdf_data') # Base64 PDF data
134
 
135
+ if not (user_query or image_data or document_text or pdf_data):
136
+ return jsonify({"error": "Query, image, or document is required in the request body"}), 400
137
  if not session_id:
138
  session_id = str(uuid.uuid4())
139
  print(f"Warning: session_id not provided, generated new one: {session_id}")
 
144
  user_message_parts = []
145
  if user_query:
146
  user_message_parts.append({"text": user_query})
147
+
148
  if image_data:
 
149
  user_message_parts.append({"inlineData": image_data})
150
+ print("Received image data for processing.")
151
+
152
  if document_text:
153
  user_message_parts.append({"text": f"Document content:\n{document_text}"})
154
+ print("Received text document content for processing.")
155
+
156
+ if pdf_data:
157
+ if not PdfReader:
158
+ return jsonify({"error": "PDF parsing library (PyPDF2) not installed on backend."}), 500
159
+
160
+ try:
161
+ # Decode base64 PDF data
162
+ pdf_bytes = base64.b64decode(pdf_data['data'])
163
+ pdf_file = io.BytesIO(pdf_bytes)
164
+ reader = PdfReader(pdf_file)
165
+
166
+ pdf_extracted_text = ""
167
+ for page_num in range(len(reader.pages)):
168
+ page = reader.pages[page_num]
169
+ pdf_extracted_text += page.extract_text() or "" # extract_text can return None
170
+
171
+ if pdf_extracted_text.strip():
172
+ user_message_parts.append({"text": f"PDF Document Content:\n{pdf_extracted_text}"})
173
+ print(f"Successfully extracted {len(pdf_extracted_text)} characters from PDF.")
174
+ else:
175
+ user_message_parts.append({"text": "PDF Document: (No extractable text found or PDF is image-based)"})
176
+ print("No extractable text found in PDF.")
177
+
178
+ except Exception as pdf_error:
179
+ print(f"Error processing PDF: {pdf_error}")
180
+ user_message_parts.append({"text": f"PDF Document: (Error processing PDF: {pdf_error})"})
181
+ return jsonify({"error": f"Failed to process PDF: {pdf_error}"}), 400 # Return error to frontend
182
+
183
+ # If only a file was provided without a query, add a default instruction
184
+ if not user_query and (image_data or document_text or pdf_data):
185
+ if image_data:
186
+ user_message_parts.insert(0, {"text": "Please analyze the following image and provide insights:"})
187
+ elif document_text or pdf_data:
188
+ user_message_parts.insert(0, {"text": "Please analyze the following document content and provide a summary or answer questions:"})
189
 
190
  # Append the new user message (which can be multi-part) to the history
191
  current_chat_history.append({"role": "user", "parts": user_message_parts})