TarSh8654 commited on
Commit
130ced7
·
verified ·
1 Parent(s): 439f544

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -16
app.py CHANGED
@@ -16,11 +16,13 @@ conversation_histories = {}
16
  async def generate_solution_python(chat_history):
17
  """
18
  Generates a solution using a dummy context (since google_search is not available)
19
- and Gemini LLM, based on the provided chat history.
20
 
21
  Args:
22
  chat_history (list): A list of message objects representing the conversation.
23
- Each object has "role" and "parts" (e.g., [{"text": "..."}]).
 
 
24
  Returns:
25
  str: The generated solution text or an error message.
26
  """
@@ -37,14 +39,22 @@ async def generate_solution_python(chat_history):
37
  # e.g., Google Custom Search API, SerpAPI, or a web scraping library.
38
  # For this example, we'll use a dummy context based on the latest user query.
39
 
40
- # Find the latest user query to generate a relevant dummy context
41
- latest_user_query = ""
42
  for message in reversed(chat_history):
43
- if message["role"] == "user" and message["parts"] and message["parts"][0].get("text"):
44
- latest_user_query = message["parts"][0]["text"]
 
 
 
 
 
 
 
 
45
  break
46
 
47
- dummy_context = f"Information related to '{latest_user_query}' from various online sources indicates that..."
48
 
49
  # You could also inject this context into the chat_history as a system message
50
  # or prepend it to the latest user message's text if you want the LLM to explicitly
@@ -53,7 +63,7 @@ async def generate_solution_python(chat_history):
53
  # Step 2: Call Gemini API with the full chat history
54
  print("Calling Gemini API with full chat history...")
55
  llm_payload = {
56
- "contents": chat_history # Pass the entire history
57
  }
58
 
59
  # Get API key from environment variables (Hugging Face Space Secrets)
@@ -104,27 +114,41 @@ def index():
104
 
105
  @app.route('/generate', methods=['POST'])
106
  async def generate():
107
- """Handles the AI generation request, managing conversation history."""
108
  try:
109
  data = request.get_json()
110
  if not data:
111
  return jsonify({"error": "Request body must be JSON"}), 400
112
 
113
  user_query = data.get('query')
 
 
114
  session_id = data.get('session_id')
115
 
116
- if not user_query:
117
- return jsonify({"error": "Query is required in the request body"}), 400
118
  if not session_id:
119
- # Generate a session ID if not provided (should be provided by frontend)
120
  session_id = str(uuid.uuid4())
121
  print(f"Warning: session_id not provided, generated new one: {session_id}")
122
 
123
- # Retrieve or initialize chat history for this session
124
  current_chat_history = conversation_histories.get(session_id, [])
125
-
126
- # Append the new user message to the history
127
- current_chat_history.append({"role": "user", "parts": [{"text": user_query}]})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  # Generate the solution using the full chat history
130
  solution_text = await generate_solution_python(current_chat_history)
 
16
  async def generate_solution_python(chat_history):
17
  """
18
  Generates a solution using a dummy context (since google_search is not available)
19
+ and Gemini LLM, based on the provided chat history which can include text and images.
20
 
21
  Args:
22
  chat_history (list): A list of message objects representing the conversation.
23
+ Each object has "role" and "parts". Parts can be:
24
+ - {"text": "..."}
25
+ - {"inlineData": {"mimeType": "image/png", "data": "base64_string"}}
26
  Returns:
27
  str: The generated solution text or an error message.
28
  """
 
39
  # e.g., Google Custom Search API, SerpAPI, or a web scraping library.
40
  # For this example, we'll use a dummy context based on the latest user query.
41
 
42
+ # Find the latest user query or image prompt to generate a relevant dummy context
43
+ latest_user_input = ""
44
  for message in reversed(chat_history):
45
+ if message["role"] == "user" and message["parts"]:
46
+ for part in message["parts"]:
47
+ if part.get("text"):
48
+ latest_user_input = part["text"]
49
+ break
50
+ # If it's an image, we can indicate that an image was provided
51
+ if part.get("inlineData") and "image" in part["inlineData"].get("mimeType", ""):
52
+ latest_user_input = "an image" # Indicate image input for context
53
+ break
54
+ if latest_user_input:
55
  break
56
 
57
+ dummy_context = f"Information related to '{latest_user_input}' from various online sources indicates that..."
58
 
59
  # You could also inject this context into the chat_history as a system message
60
  # or prepend it to the latest user message's text if you want the LLM to explicitly
 
63
  # Step 2: Call Gemini API with the full chat history
64
  print("Calling Gemini API with full chat history...")
65
  llm_payload = {
66
+ "contents": chat_history # Pass the entire history, including text and image parts
67
  }
68
 
69
  # Get API key from environment variables (Hugging Face Space Secrets)
 
114
 
115
  @app.route('/generate', methods=['POST'])
116
  async def generate():
117
+ """Handles the AI generation request, managing conversation history and multi-modal input."""
118
  try:
119
  data = request.get_json()
120
  if not data:
121
  return jsonify({"error": "Request body must be JSON"}), 400
122
 
123
  user_query = data.get('query')
124
+ image_data = data.get('image_data') # Base64 image data
125
+ document_text = data.get('document_text') # Text extracted from document
126
  session_id = data.get('session_id')
127
 
128
+ if not (user_query or image_data or document_text):
129
+ return jsonify({"error": "Query, image, or document text is required in the request body"}), 400
130
  if not session_id:
 
131
  session_id = str(uuid.uuid4())
132
  print(f"Warning: session_id not provided, generated new one: {session_id}")
133
 
 
134
  current_chat_history = conversation_histories.get(session_id, [])
135
+
136
+ # Construct the parts for the user message
137
+ user_message_parts = []
138
+ if user_query:
139
+ user_message_parts.append({"text": user_query})
140
+ if image_data:
141
+ # Expecting image_data to be a dict with mimeType and data
142
+ user_message_parts.append({"inlineData": image_data})
143
+ if document_text:
144
+ user_message_parts.append({"text": f"Document content:\n{document_text}"})
145
+ # Optionally, you might want to add a specific instruction for document analysis
146
+ if not user_query: # If only document was provided, add a default query
147
+ user_message_parts.insert(0, {"text": "Please analyze the following document content:"})
148
+
149
+
150
+ # Append the new user message (which can be multi-part) to the history
151
+ current_chat_history.append({"role": "user", "parts": user_message_parts})
152
 
153
  # Generate the solution using the full chat history
154
  solution_text = await generate_solution_python(current_chat_history)