tecuts commited on
Commit
5cb21e4
·
verified ·
1 Parent(s): f73a254

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -114
app.py CHANGED
@@ -7,6 +7,8 @@ from fastapi import FastAPI, Request, HTTPException
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from openai import OpenAI
9
  import logging
 
 
10
 
11
  # --- Configure Logging ---
12
  logging.basicConfig(level=logging.INFO)
@@ -222,144 +224,48 @@ def should_use_search(message: str) -> bool:
222
  async def chat_endpoint(request: Request):
223
  if not client:
224
  raise HTTPException(status_code=500, detail="LLM client not configured")
225
-
226
  try:
227
  data = await request.json()
228
  user_message = data.get("message", "").strip()
229
-
230
- # Support both 'use_search' and 'user_search' parameter names for flexibility
231
- use_search = data.get("use_search")
232
- if use_search is None:
233
- use_search = data.get("user_search") # Alternative parameter name
234
-
235
  conversation_history = data.get("history", [])
236
-
237
- # Debug logging for request parameters
238
- logger.info(f"Request parameters - message length: {len(user_message)}, use_search: {use_search}, history length: {len(conversation_history)}")
239
-
240
  if not user_message:
241
  raise HTTPException(status_code=400, detail="No message provided")
242
 
243
- # Auto-decide search usage if not specified
244
  if use_search is None:
245
  use_search = should_use_search(user_message)
246
- logger.info(f"Auto-decided search usage: {use_search}")
247
- else:
248
- logger.info(f"Manual search setting: {use_search}")
249
 
250
- # Prepare messages with appropriate system prompt based on search availability
251
  current_date = datetime.now().strftime("%Y-%m-%d")
252
-
253
  if use_search:
254
  system_content = SYSTEM_PROMPT_WITH_SEARCH.format(current_date=current_date)
255
  else:
256
  system_content = SYSTEM_PROMPT_NO_SEARCH.format(current_date=current_date)
257
-
258
  system_message = {"role": "system", "content": system_content}
259
  messages = [system_message] + conversation_history + [{"role": "user", "content": user_message}]
260
-
261
  llm_kwargs = {
262
- "model": "unsloth/Qwen3-30B-A3B-GGUF",
263
- "temperature": 0.7, # Slightly higher for more creative responses
264
  "messages": messages,
265
- "max_tokens": 2000 # Ensure comprehensive responses
 
266
  }
267
-
268
  if use_search:
269
- logger.info("Search is ENABLED - tools will be available to the model")
270
  llm_kwargs["tools"] = available_tools
271
- llm_kwargs["tool_choice"] = "auto" # Consider using "required" for testing
272
- else:
273
- logger.info("Search is DISABLED - no tools available")
274
-
275
- # First LLM call
276
- logger.info(f"Making LLM request with tools: {bool(use_search)}")
277
- llm_response = client.chat.completions.create(**llm_kwargs)
278
- tool_calls = llm_response.choices[0].message.tool_calls
279
- source_links = []
280
-
281
- # Debug: Log tool call information
282
- if tool_calls:
283
- logger.info(f"LLM made {len(tool_calls)} tool calls")
284
- for i, call in enumerate(tool_calls):
285
- logger.info(f"Tool call {i+1}: {call.function.name} with args: {call.function.arguments}")
286
- else:
287
- logger.info("LLM did not make any tool calls")
288
- if use_search:
289
- logger.warning("Search was enabled but LLM chose not to use search tools - this might indicate the query doesn't require current information")
290
-
291
- if tool_calls:
292
- logger.info(f"Processing {len(tool_calls)} tool calls")
293
- tool_outputs = []
294
-
295
- for tool_call in tool_calls:
296
- if tool_call.function.name == "google_search":
297
- try:
298
- function_args = json.loads(tool_call.function.arguments)
299
- search_query = function_args.get("query", "").strip()
300
-
301
- if search_query:
302
- logger.info(f"Executing search for: {search_query}")
303
- search_results = google_search_tool([search_query], num_results=5)
304
-
305
- # Collect source links for response
306
- for result in search_results:
307
- source_links.append({
308
- "title": result["source_title"],
309
- "url": result["url"],
310
- "domain": result["domain"]
311
- })
312
-
313
- # Format results for LLM
314
- formatted_results = format_search_results_for_llm(search_results)
315
- tool_outputs.append({
316
- "tool_call_id": tool_call.id,
317
- "output": formatted_results
318
- })
319
- else:
320
- logger.warning("Empty search query in tool call")
321
- tool_outputs.append({
322
- "tool_call_id": tool_call.id,
323
- "output": "Error: Empty search query provided."
324
- })
325
-
326
- except json.JSONDecodeError as e:
327
- logger.error(f"Failed to parse tool call arguments: {e}")
328
- tool_outputs.append({
329
- "tool_call_id": tool_call.id,
330
- "output": "Error: Failed to parse search parameters."
331
- })
332
-
333
- # Continue conversation with search results
334
- messages.append(llm_response.choices[0].message)
335
- for output_item in tool_outputs:
336
- messages.append({
337
- "role": "tool",
338
- "tool_call_id": output_item["tool_call_id"],
339
- "content": output_item["output"]
340
- })
341
 
342
- # Final response generation with search context
343
- final_response = client.chat.completions.create(
344
- model="unsloth/Qwen3-30B-A3B-GGUF",
345
- temperature=0.7,
346
- messages=messages,
347
- max_tokens=2000
348
- )
349
- final_chatbot_response = final_response.choices[0].message.content
350
- else:
351
- final_chatbot_response = llm_response.choices[0].message.content
352
 
353
- # Enhanced response structure
354
- response_data = {
355
- "response": final_chatbot_response,
356
- "sources": source_links,
357
- "search_used": bool(tool_calls),
358
- "timestamp": datetime.now().isoformat()
359
- }
360
-
361
- logger.info(f"Chat response generated successfully. Search used: {bool(tool_calls)}")
362
- return response_data
363
 
364
  except HTTPException:
365
  raise
@@ -370,6 +276,7 @@ async def chat_endpoint(request: Request):
370
  logger.error(f"Unexpected error in /chat endpoint: {e}")
371
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
372
 
 
373
  # --- Health Check Endpoint ---
374
  @app.get("/")
375
  async def root():
 
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from openai import OpenAI
9
  import logging
10
+ from fastapi.responses import StreamingResponse
11
+
12
 
13
  # --- Configure Logging ---
14
  logging.basicConfig(level=logging.INFO)
 
224
  async def chat_endpoint(request: Request):
225
  if not client:
226
  raise HTTPException(status_code=500, detail="LLM client not configured")
 
227
  try:
228
  data = await request.json()
229
  user_message = data.get("message", "").strip()
230
+ use_search = data.get("use_search", data.get("user_search"))
 
 
 
 
 
231
  conversation_history = data.get("history", [])
232
+
 
 
 
233
  if not user_message:
234
  raise HTTPException(status_code=400, detail="No message provided")
235
 
 
236
  if use_search is None:
237
  use_search = should_use_search(user_message)
 
 
 
238
 
 
239
  current_date = datetime.now().strftime("%Y-%m-%d")
 
240
  if use_search:
241
  system_content = SYSTEM_PROMPT_WITH_SEARCH.format(current_date=current_date)
242
  else:
243
  system_content = SYSTEM_PROMPT_NO_SEARCH.format(current_date=current_date)
 
244
  system_message = {"role": "system", "content": system_content}
245
  messages = [system_message] + conversation_history + [{"role": "user", "content": user_message}]
 
246
  llm_kwargs = {
247
+ "model": "unsloth/Qwen3-30B-A3B-GGUF",
248
+ "temperature": 0.7,
249
  "messages": messages,
250
+ "max_tokens": 2000,
251
+ "stream": True, # <--- Enable streaming
252
  }
 
253
  if use_search:
 
254
  llm_kwargs["tools"] = available_tools
255
+ llm_kwargs["tool_choice"] = "auto"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
+ # Streaming generator
258
+ def stream_llm_response():
259
+ response = client.chat.completions.create(**llm_kwargs)
260
+ for chunk in response:
261
+ # Each chunk is an object, get the content delta
262
+ if hasattr(chunk.choices[0].delta, "content"):
263
+ content = chunk.choices[0].delta.content
264
+ if content:
265
+ yield content
 
266
 
267
+ # Return as streaming response
268
+ return StreamingResponse(stream_llm_response(), media_type="text/plain")
 
 
 
 
 
 
 
 
269
 
270
  except HTTPException:
271
  raise
 
276
  logger.error(f"Unexpected error in /chat endpoint: {e}")
277
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
278
 
279
+
280
  # --- Health Check Endpoint ---
281
  @app.get("/")
282
  async def root():