tecuts commited on
Commit
fdb085f
·
verified ·
1 Parent(s): 8cde5e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +238 -96
app.py CHANGED
@@ -48,80 +48,98 @@ GOOGLE_CX = os.getenv("GOOGLE_CX")
48
  LLM_API_KEY = os.getenv("LLM_API_KEY")
49
  LLM_BASE_URL = os.getenv("LLM_BASE_URL", "https://api-15i2e8ze256bvfn6.aistudio-app.com/v1")
50
 
51
- # --- Simplified System Prompts ---
52
- SYSTEM_PROMPT_WITH_SEARCH = """You are an intelligent AI assistant with access to current web search results.
53
- Use the provided search results to give accurate, up-to-date responses.
54
- Always reference and cite the search results when relevant.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  Current date: {current_date}"""
56
 
57
  SYSTEM_PROMPT_NO_SEARCH = """You are an intelligent AI assistant. Provide helpful, accurate, and comprehensive responses based on your training data.
 
58
  Current date: {current_date}"""
59
 
60
- # --- Fast Web Search Tool ---
61
- async def fast_google_search(query: str, num_results: int = 4) -> List[Dict]:
62
- """Fast Google Custom Search with minimal processing"""
 
 
63
  if not GOOGLE_API_KEY or not GOOGLE_CX or not query.strip():
64
  return []
65
 
66
- logger.info(f"Searching: '{query}'")
67
 
 
68
  params = {
69
  "key": GOOGLE_API_KEY,
70
  "cx": GOOGLE_CX,
71
  "q": query.strip(),
72
- "num": num_results,
73
- "dateRestrict": "m6" # Last 6 months
74
  }
75
 
76
  try:
77
  loop = asyncio.get_event_loop()
78
  response = await loop.run_in_executor(
79
  None,
80
- lambda: requests.get(
81
- "https://www.googleapis.com/customsearch/v1",
82
- params=params,
83
- timeout=12 # Faster timeout
84
- )
85
  )
86
  response.raise_for_status()
87
- data = response.json()
 
 
 
88
 
89
- results = []
90
- for item in data.get("items", [])[:num_results]:
91
  title = item.get("title", "").strip()
92
  url = item.get("link", "").strip()
93
  snippet = item.get("snippet", "").strip()
94
 
95
  if title and url and snippet:
96
- results.append({
97
- "title": title,
98
  "url": url,
99
  "snippet": snippet,
100
  "domain": url.split('/')[2] if '/' in url else url
101
  })
102
-
103
- logger.info(f"Found {len(results)} results")
104
- return results
105
 
106
  except Exception as e:
107
- logger.error(f"Search failed: {e}")
108
  return []
109
 
110
- def format_search_context(results: List[Dict]) -> str:
111
- """Fast search result formatting"""
112
- if not results:
113
- return "No search results available."
114
 
115
- context = ["=== SEARCH RESULTS ==="]
116
- for i, result in enumerate(results, 1):
117
- context.append(f"\n[{i}] {result['title']}")
118
- context.append(f"Source: {result['domain']}")
119
- context.append(f"Content: {result['snippet']}")
120
 
121
- return "\n".join(context)
122
 
123
  # --- FastAPI Application Setup ---
124
- app = FastAPI(title="Streaming AI Chatbot", version="2.2.0")
125
 
126
  app.add_middleware(
127
  CORSMiddleware,
@@ -142,71 +160,202 @@ if not LLM_API_KEY or not LLM_BASE_URL:
142
  client = None
143
  else:
144
  client = OpenAI(api_key=LLM_API_KEY, base_url=LLM_BASE_URL)
145
- logger.info("OpenAI client initialized")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
- # --- Optimized Streaming Response Generator ---
148
- async def generate_streaming_response(messages: List[Dict], use_search: bool, temperature: float, original_query: str):
149
- """Fast streaming response with optional search"""
150
 
151
  try:
152
- source_links = []
153
-
154
- # ALWAYS search when use_search is True
155
- if use_search:
156
- yield f"data: {json.dumps({'type': 'status', 'data': 'Searching...'})}\n\n"
157
-
158
- # Fast search execution
159
- search_results = await fast_google_search(original_query, 4)
160
-
161
- if search_results:
162
- # Format search context
163
- search_context = format_search_context(search_results)
164
-
165
- # Prepare source links for frontend
166
- source_links = [{
167
- "title": result["title"],
168
- "url": result["url"],
169
- "domain": result["domain"]
170
- } for result in search_results]
171
-
172
- # Add search context to messages
173
- messages = messages + [{
174
- "role": "system",
175
- "content": f"{search_context}\n\nSince now is 2025, but your knowlage is limited to 2023. Based on the search results above, provide a comprehensive and update to date response."
176
- }]
177
-
178
- logger.info(f"Added {len(search_results)} search results to context")
179
-
180
- # Generate response
181
- yield f"data: {json.dumps({'type': 'status', 'data': 'Generating response...'})}\n\n"
182
-
183
- # Optimized LLM parameters for speed
184
  llm_kwargs = {
185
- "model": "unsloth/Qwen3-30B-A3B-GGUF",
186
  "temperature": temperature,
187
  "messages": messages,
188
- "max_tokens": 2500, # Reduced for faster response
189
- "stream": True,
190
- "top_p": 0.9, # Optimize sampling
191
  }
192
 
193
- # Stream the response
 
 
 
 
 
 
 
 
 
194
  stream = client.chat.completions.create(**llm_kwargs)
195
 
 
 
 
196
  for chunk in stream:
197
- if chunk.choices[0].delta.content:
198
- content = chunk.choices[0].delta.content
199
- yield f"data: {json.dumps({'type': 'content', 'data': content})}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
- # Send sources if available
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  if source_links:
203
  yield f"data: {json.dumps({'type': 'sources', 'data': source_links})}\n\n"
204
 
205
- # Send completion
206
- yield f"data: {json.dumps({'type': 'done', 'data': {'search_used': use_search and bool(source_links)}})}\n\n"
207
 
208
  except Exception as e:
209
- logger.error(f"Response generation failed: {e}")
210
  yield f"data: {json.dumps({'type': 'error', 'data': str(e)})}\n\n"
211
 
212
  # --- Streaming Chat Endpoint ---
@@ -219,7 +368,7 @@ async def chat_stream_endpoint(request: Request, _: None = Depends(verify_origin
219
  data = await request.json()
220
  user_message = data.get("message", "").strip()
221
  use_search = data.get("use_search", False)
222
- temperature = max(0.1, min(1.5, data.get("temperature", 0.7))) # Optimized range
223
  conversation_history = data.get("history", [])
224
 
225
  if not user_message:
@@ -230,27 +379,20 @@ async def chat_stream_endpoint(request: Request, _: None = Depends(verify_origin
230
  system_content = (SYSTEM_PROMPT_WITH_SEARCH if use_search else SYSTEM_PROMPT_NO_SEARCH).format(current_date=current_date)
231
  messages = [{"role": "system", "content": system_content}] + conversation_history + [{"role": "user", "content": user_message}]
232
 
233
- logger.info(f"Request: search={use_search}, temp={temperature}")
234
 
235
  return StreamingResponse(
236
- generate_streaming_response(messages, use_search, temperature, user_message),
237
  media_type="text/plain",
238
  headers={
239
  "Cache-Control": "no-cache",
240
  "Connection": "keep-alive",
241
- "X-Accel-Buffering": "no",
242
- "Access-Control-Allow-Origin": "*" # For faster preflight
243
  }
244
  )
245
 
246
  except json.JSONDecodeError:
247
  raise HTTPException(status_code=400, detail="Invalid JSON")
248
  except Exception as e:
249
- logger.error(f"Endpoint error: {e}")
250
- raise HTTPException(status_code=500, detail=str(e))
251
-
252
- # --- Health Check Endpoint ---
253
- @app.get("/health")
254
- async def health_check():
255
- """Fast health check"""
256
- return {"status": "ok", "timestamp": datetime.now().isoformat()}
 
48
  LLM_API_KEY = os.getenv("LLM_API_KEY")
49
  LLM_BASE_URL = os.getenv("LLM_BASE_URL", "https://api-15i2e8ze256bvfn6.aistudio-app.com/v1")
50
 
51
+ # --- Improved System Prompts ---
52
+ SYSTEM_PROMPT_WITH_SEARCH = """You are an intelligent AI assistant with access to real-time web search capabilities.
53
+
54
+ When you need current information, recent events, specific facts, or when the user's question would benefit from up-to-date information, use the google_search function.
55
+
56
+ **Use search for:**
57
+ - Recent news or events
58
+ - Current statistics or data
59
+ - Specific factual information you're unsure about
60
+ - Questions about things that may have changed recently
61
+ - When the user explicitly asks for current/recent information
62
+
63
+ **Response Guidelines:**
64
+ 1. Always use the search tool when it would provide more accurate or current information
65
+ 2. Synthesize information from multiple sources when available
66
+ 3. Clearly indicate when information comes from search results
67
+ 4. Provide comprehensive, well-structured answers
68
+ 5. Cite sources appropriately
69
+
70
  Current date: {current_date}"""
71
 
72
  SYSTEM_PROMPT_NO_SEARCH = """You are an intelligent AI assistant. Provide helpful, accurate, and comprehensive responses based on your training data.
73
+
74
  Current date: {current_date}"""
75
 
76
+ # --- Optimized Web Search Tool ---
77
+ async def google_search_tool_async(query: str, num_results: int = 3) -> List[Dict]:
78
+ """
79
+ Async Google Custom Search - reduced results for faster response
80
+ """
81
  if not GOOGLE_API_KEY or not GOOGLE_CX or not query.strip():
82
  return []
83
 
84
+ logger.info(f"Executing search for: '{query}'")
85
 
86
+ search_url = "https://www.googleapis.com/customsearch/v1"
87
  params = {
88
  "key": GOOGLE_API_KEY,
89
  "cx": GOOGLE_CX,
90
  "q": query.strip(),
91
+ "num": min(num_results, 5),
92
+ "dateRestrict": "m3"
93
  }
94
 
95
  try:
96
  loop = asyncio.get_event_loop()
97
  response = await loop.run_in_executor(
98
  None,
99
+ lambda: requests.get(search_url, params=params, timeout=10)
 
 
 
 
100
  )
101
  response.raise_for_status()
102
+ search_results = response.json()
103
+
104
+ if "items" not in search_results:
105
+ return []
106
 
107
+ parsed_results = []
108
+ for item in search_results.get("items", [])[:num_results]:
109
  title = item.get("title", "").strip()
110
  url = item.get("link", "").strip()
111
  snippet = item.get("snippet", "").strip()
112
 
113
  if title and url and snippet:
114
+ parsed_results.append({
115
+ "source_title": title,
116
  "url": url,
117
  "snippet": snippet,
118
  "domain": url.split('/')[2] if '/' in url else url
119
  })
120
+
121
+ logger.info(f"Retrieved {len(parsed_results)} search results")
122
+ return parsed_results
123
 
124
  except Exception as e:
125
+ logger.error(f"Search error: {e}")
126
  return []
127
 
128
+ def format_search_results_compact(search_results: List[Dict]) -> str:
129
+ """Compact formatting for faster processing"""
130
+ if not search_results:
131
+ return "No search results found."
132
 
133
+ formatted = ["Search Results:"]
134
+ for i, result in enumerate(search_results, 1):
135
+ formatted.append(f"\n{i}. {result['source_title']}")
136
+ formatted.append(f" Source: {result['domain']}")
137
+ formatted.append(f" Content: {result['snippet']}")
138
 
139
+ return "\n".join(formatted)
140
 
141
  # --- FastAPI Application Setup ---
142
+ app = FastAPI(title="Streaming AI Chatbot", version="2.1.0")
143
 
144
  app.add_middleware(
145
  CORSMiddleware,
 
160
  client = None
161
  else:
162
  client = OpenAI(api_key=LLM_API_KEY, base_url=LLM_BASE_URL)
163
+ logger.info("OpenAI client initialized successfully")
164
+
165
+ # --- Tool Definition ---
166
+ available_tools = [
167
+ {
168
+ "type": "function",
169
+ "function": {
170
+ "name": "google_search",
171
+ "description": "Search Google for current information, recent events, or specific facts. Use this when you need up-to-date information or when the user's question would benefit from current data.",
172
+ "parameters": {
173
+ "type": "object",
174
+ "properties": {
175
+ "query": {
176
+ "type": "string",
177
+ "description": "Search query with relevant keywords"
178
+ }
179
+ },
180
+ "required": ["query"]
181
+ }
182
+ }
183
+ }
184
+ ]
185
 
186
+ # --- Fixed Streaming Response Generator ---
187
+ async def generate_streaming_response(messages: List[Dict], use_search: bool, temperature: float):
188
+ """Generate streaming response with optional search"""
189
 
190
  try:
191
+ # Initial LLM call with streaming
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  llm_kwargs = {
193
+ "model": "unsloth/Qwen3-30B-A3B-GGUF",
194
  "temperature": temperature,
195
  "messages": messages,
196
+ "max_tokens": 2000,
197
+ "stream": True
 
198
  }
199
 
200
+ if use_search:
201
+ llm_kwargs["tools"] = available_tools
202
+ llm_kwargs["tool_choice"] = "auto"
203
+
204
+ source_links = []
205
+ response_content = ""
206
+ tool_calls_data = []
207
+ current_tool_call = None
208
+
209
+ # First streaming call
210
  stream = client.chat.completions.create(**llm_kwargs)
211
 
212
+ # Track if we're in the middle of collecting a tool call
213
+ collecting_tool_call = False
214
+
215
  for chunk in stream:
216
+ delta = chunk.choices[0].delta
217
+ finish_reason = chunk.choices[0].finish_reason
218
+
219
+ # Handle content streaming
220
+ if delta.content:
221
+ content_chunk = delta.content
222
+ response_content += content_chunk
223
+ yield f"data: {json.dumps({'type': 'content', 'data': content_chunk})}\n\n"
224
+
225
+ # Handle tool calls - FIXED LOGIC
226
+ if delta.tool_calls:
227
+ collecting_tool_call = True
228
+ for tool_call in delta.tool_calls:
229
+ # Ensure we have enough slots in tool_calls_data
230
+ while len(tool_calls_data) <= tool_call.index:
231
+ tool_calls_data.append({
232
+ "id": None,
233
+ "function": {"name": None, "arguments": ""}
234
+ })
235
+
236
+ # Update the tool call data
237
+ if tool_call.id:
238
+ tool_calls_data[tool_call.index]["id"] = tool_call.id
239
+ if tool_call.function and tool_call.function.name:
240
+ tool_calls_data[tool_call.index]["function"]["name"] = tool_call.function.name
241
+ if tool_call.function and tool_call.function.arguments:
242
+ tool_calls_data[tool_call.index]["function"]["arguments"] += tool_call.function.arguments
243
+
244
+ # Check if we've finished collecting tool calls
245
+ if finish_reason in ["tool_calls", "stop"] and collecting_tool_call:
246
+ break
247
 
248
+ # Process tool calls if any were collected
249
+ processed_any_tools = False
250
+ if tool_calls_data and any(tc.get("id") and tc.get("function", {}).get("name") for tc in tool_calls_data):
251
+ yield f"data: {json.dumps({'type': 'status', 'data': 'Searching...'})}\n\n"
252
+
253
+ tool_responses = []
254
+
255
+ # Process each tool call
256
+ for tool_call in tool_calls_data:
257
+ if not tool_call.get("id") or not tool_call.get("function", {}).get("name"):
258
+ continue
259
+
260
+ function_name = tool_call["function"]["name"]
261
+
262
+ if function_name == "google_search":
263
+ try:
264
+ args = json.loads(tool_call["function"]["arguments"])
265
+ query = args.get("query", "").strip()
266
+ if query:
267
+ logger.info(f"Executing search with query: {query}")
268
+ search_results = await google_search_tool_async(query)
269
+
270
+ if search_results:
271
+ processed_any_tools = True
272
+
273
+ # Collect source links
274
+ for result in search_results:
275
+ source_links.append({
276
+ "title": result["source_title"],
277
+ "url": result["url"],
278
+ "domain": result["domain"]
279
+ })
280
+
281
+ # Format results for the model
282
+ search_context = format_search_results_compact(search_results)
283
+ tool_responses.append({
284
+ "tool_call_id": tool_call["id"],
285
+ "role": "tool",
286
+ "content": search_context
287
+ })
288
+ else:
289
+ tool_responses.append({
290
+ "tool_call_id": tool_call["id"],
291
+ "role": "tool",
292
+ "content": "No search results found."
293
+ })
294
+ except json.JSONDecodeError as e:
295
+ logger.error(f"Failed to parse tool arguments: {e}")
296
+ tool_responses.append({
297
+ "tool_call_id": tool_call["id"],
298
+ "role": "tool",
299
+ "content": "Error: Invalid search query format."
300
+ })
301
+ except Exception as e:
302
+ logger.error(f"Search tool error: {e}")
303
+ tool_responses.append({
304
+ "tool_call_id": tool_call["id"],
305
+ "role": "tool",
306
+ "content": f"Search error: {str(e)}"
307
+ })
308
+
309
+ # If we have tool responses, make a second call to get the final response
310
+ if tool_responses:
311
+ yield f"data: {json.dumps({'type': 'status', 'data': 'Generating response...'})}\n\n"
312
+
313
+ # Add tool call and tool response messages
314
+ final_messages = messages.copy()
315
+
316
+ # Add the assistant's tool call message
317
+ assistant_message = {
318
+ "role": "assistant",
319
+ "content": response_content if response_content else None,
320
+ "tool_calls": [
321
+ {
322
+ "id": tc["id"],
323
+ "type": "function",
324
+ "function": {
325
+ "name": tc["function"]["name"],
326
+ "arguments": tc["function"]["arguments"]
327
+ }
328
+ }
329
+ for tc in tool_calls_data if tc.get("id") and tc.get("function", {}).get("name")
330
+ ]
331
+ }
332
+ final_messages.append(assistant_message)
333
+
334
+ # Add tool response messages
335
+ final_messages.extend(tool_responses)
336
+
337
+ # Generate final response
338
+ final_stream = client.chat.completions.create(
339
+ model="unsloth/Qwen3-30B-A3B-GGUF",
340
+ temperature=temperature,
341
+ messages=final_messages,
342
+ max_tokens=2000,
343
+ stream=True
344
+ )
345
+
346
+ for chunk in final_stream:
347
+ if chunk.choices[0].delta.content:
348
+ content = chunk.choices[0].delta.content
349
+ yield f"data: {json.dumps({'type': 'content', 'data': content})}\n\n"
350
+
351
+ # Send sources and completion
352
  if source_links:
353
  yield f"data: {json.dumps({'type': 'sources', 'data': source_links})}\n\n"
354
 
355
+ yield f"data: {json.dumps({'type': 'done', 'data': {'search_used': processed_any_tools}})}\n\n"
 
356
 
357
  except Exception as e:
358
+ logger.error(f"Streaming error: {e}")
359
  yield f"data: {json.dumps({'type': 'error', 'data': str(e)})}\n\n"
360
 
361
  # --- Streaming Chat Endpoint ---
 
368
  data = await request.json()
369
  user_message = data.get("message", "").strip()
370
  use_search = data.get("use_search", False)
371
+ temperature = max(0, min(2, data.get("temperature", 0.7)))
372
  conversation_history = data.get("history", [])
373
 
374
  if not user_message:
 
379
  system_content = (SYSTEM_PROMPT_WITH_SEARCH if use_search else SYSTEM_PROMPT_NO_SEARCH).format(current_date=current_date)
380
  messages = [{"role": "system", "content": system_content}] + conversation_history + [{"role": "user", "content": user_message}]
381
 
382
+ logger.info(f"Stream request - search: {use_search}, temp: {temperature}, message: {user_message[:100]}...")
383
 
384
  return StreamingResponse(
385
+ generate_streaming_response(messages, use_search, temperature),
386
  media_type="text/plain",
387
  headers={
388
  "Cache-Control": "no-cache",
389
  "Connection": "keep-alive",
390
+ "X-Accel-Buffering": "no"
 
391
  }
392
  )
393
 
394
  except json.JSONDecodeError:
395
  raise HTTPException(status_code=400, detail="Invalid JSON")
396
  except Exception as e:
397
+ logger.error(f"Stream endpoint error: {e}")
398
+ raise HTTPException(status_code=500, detail=str(e))