tecuts commited on
Commit
0ec0144
·
verified ·
1 Parent(s): 174368c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -95
app.py CHANGED
@@ -5,6 +5,8 @@ from datetime import datetime, timedelta
5
  from typing import List, Dict, Optional
6
  from fastapi import FastAPI, Request, HTTPException, Depends
7
  from fastapi.middleware.cors import CORSMiddleware
 
 
8
  from openai import OpenAI
9
  import logging
10
  import time
@@ -81,7 +83,7 @@ When you don't have current information about recent events or changing data, ac
81
  **Current Context**: Today's date is {current_date}, but your knowledge has a cutoff date and may not include the most recent information."""
82
 
83
  # --- Enhanced Web Search Tool Implementation ---
84
- def google_search_tool(queries: List[str], num_results: int = 5) -> List[Dict]:
85
  """
86
  Enhanced Google Custom Search with better error handling and result formatting
87
  """
@@ -151,7 +153,7 @@ def google_search_tool(queries: List[str], num_results: int = 5) -> List[Dict]:
151
  logger.error(f"Error during Google search request: {e}")
152
  return []
153
  except Exception as e:
154
- logger.error(f"Unexpected error in google_search_tool: {e}")
155
  return []
156
 
157
  def format_search_results_for_llm(search_results: List[Dict]) -> str:
@@ -210,7 +212,7 @@ available_tools = [
210
  {
211
  "type": "function",
212
  "function": {
213
- "name": "google_search",
214
  "description": "REQUIRED for current information: Performs a Google search for recent events, current data, latest news, statistics, prices, or any information that changes frequently. Use this tool proactively when the user's query could benefit from up-to-date information, even if you have some relevant knowledge from training data.",
215
  "parameters": {
216
  "type": "object",
@@ -288,7 +290,7 @@ class RateLimiter:
288
  return len(self.requests[user_ip])
289
 
290
 
291
- # Initialize rate limiter with 100 requests per day
292
  rate_limiter = RateLimiter(
293
  max_requests=50,
294
  time_window=timedelta(days=1)
@@ -322,7 +324,7 @@ class ApiRotator:
322
  self.last_successful_index = index
323
 
324
 
325
- # --- Enhanced Chatbot Endpoint ---
326
  @app.post("/chat")
327
  async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
328
  user_ip = get_user_ip(request)
@@ -342,89 +344,56 @@ async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
342
  try:
343
  data = await request.json()
344
  user_message = data.get("message", "").strip()
345
-
346
- # Support both 'use_search' and 'user_search' parameter names for flexibility
347
  use_search = data.get("use_search")
348
  if use_search is None:
349
- use_search = data.get("user_search") # Alternative parameter name
350
-
351
- # Allow client to specify temperature (with validation)
352
- temperature = data.get("temperature", 0.7) # Default to 0.7
353
- if not isinstance(temperature, (int, float)) or temperature < 0 or temperature > 2:
354
- logger.warning(f"Invalid temperature value: {temperature}, defaulting to 0.7")
355
  temperature = 0.7
356
-
357
  conversation_history = data.get("history", [])
358
 
359
- # Debug logging for request parameters
360
- logger.info(f"Request parameters - message length: {len(user_message)}, use_search: {use_search}, temperature: {temperature}, history length: {len(conversation_history)}")
361
-
362
  if not user_message:
363
  raise HTTPException(status_code=400, detail="No message provided")
364
 
365
- # Auto-decide search usage if not specified
366
  if use_search is None:
367
  use_search = should_use_search(user_message)
368
- logger.info(f"Auto-decided search usage: {use_search}")
369
- else:
370
- logger.info(f"Manual search setting: {use_search}")
371
-
372
- # Prepare messages with appropriate system prompt based on search availability
373
- current_date = datetime.now().strftime("%Y-%m-%d")
374
-
375
- if use_search:
376
- system_content = SYSTEM_PROMPT_WITH_SEARCH.format(current_date=current_date)
377
- else:
378
- system_content = SYSTEM_PROMPT_NO_SEARCH.format(current_date=current_date)
379
 
 
 
 
380
  system_message = {"role": "system", "content": system_content}
381
  messages = [system_message] + conversation_history + [{"role": "user", "content": user_message}]
382
 
383
  llm_kwargs = {
384
- "model": "unsloth/Qwen3-30B-A3B-GGUF",
385
- "temperature": temperature, # Use client-specified temperature
386
  "messages": messages,
387
- "max_tokens": 2000 # Ensure comprehensive responses
388
  }
389
 
390
  if use_search:
391
- logger.info("Search is ENABLED - tools will be available to the model")
392
  llm_kwargs["tools"] = available_tools
393
- llm_kwargs["tool_choice"] = "auto" # Consider using "required" for testing
394
- else:
395
- logger.info("Search is DISABLED - no tools available")
396
 
397
- # First LLM call
398
- logger.info(f"Making LLM request with tools: {bool(use_search)}, temperature: {temperature}")
399
  llm_response = client.chat.completions.create(**llm_kwargs)
400
  tool_calls = llm_response.choices[0].message.tool_calls
401
  source_links = []
402
 
403
- # Debug: Log tool call information
404
- if tool_calls:
405
- logger.info(f"LLM made {len(tool_calls)} tool calls")
406
- for i, call in enumerate(tool_calls):
407
- logger.info(f"Tool call {i+1}: {call.function.name} with args: {call.function.arguments}")
408
- else:
409
- logger.info("LLM did not make any tool calls")
410
- if use_search:
411
- logger.warning("Search was enabled but LLM chose not to use search tools - this might indicate the query doesn't require current information")
412
-
413
  if tool_calls:
414
  logger.info(f"Processing {len(tool_calls)} tool calls")
415
  tool_outputs = []
416
 
417
  for tool_call in tool_calls:
418
- if tool_call.function.name == "google_search":
419
  try:
420
  function_args = json.loads(tool_call.function.arguments)
421
  search_query = function_args.get("query", "").strip()
422
 
423
  if search_query:
424
- logger.info(f"Executing search for: {search_query}")
425
- search_results = google_search_tool([search_query], num_results=5)
426
-
427
- # Collect source links for response
428
  for result in search_results:
429
  source_links.append({
430
  "title": result["source_title"],
@@ -432,58 +401,56 @@ async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
432
  "domain": result["domain"]
433
  })
434
 
435
- # Format results for LLM
436
  formatted_results = format_search_results_for_llm(search_results)
437
- tool_outputs.append({
438
- "tool_call_id": tool_call.id,
439
- "output": formatted_results
440
- })
441
- else:
442
- logger.warning("Empty search query in tool call")
443
  tool_outputs.append({
444
  "tool_call_id": tool_call.id,
445
- "output": "Error: Empty search query provided."
446
  })
447
-
448
- except json.JSONDecodeError as e:
449
- logger.error(f"Failed to parse tool call arguments: {e}")
450
- tool_outputs.append({
451
- "tool_call_id": tool_call.id,
452
- "output": "Error: Failed to parse search parameters."
453
- })
454
-
455
- # Continue conversation with search results
456
  messages.append(llm_response.choices[0].message)
457
  for output_item in tool_outputs:
458
  messages.append({
459
- "role": "tool",
460
- "tool_call_id": output_item["tool_call_id"],
461
  "content": output_item["output"]
462
  })
463
-
464
- # Final response generation with search context
465
- final_response = client.chat.completions.create(
 
 
 
 
 
 
 
 
 
 
 
466
  model="unsloth/Qwen3-30B-A3B-GGUF",
467
- temperature=temperature, # Use same temperature for consistency
468
  messages=messages,
469
- max_tokens=2000
470
- stream=True # <<< KEY CHANGE: Enable streaming
471
  )
472
- final_chatbot_response = final_response.choices[0].message.content
473
- else:
474
- final_chatbot_response = llm_response.choices[0].message.content
475
-
476
- # Enhanced response structure
477
- response_data = {
478
- "response": final_chatbot_response,
479
- "sources": source_links,
480
- "search_used": bool(tool_calls),
481
- "temperature": temperature, # Include temperature in response for debugging
482
- "timestamp": datetime.now().isoformat()
483
- }
484
-
485
- logger.info(f"Chat response generated successfully. Search used: {bool(tool_calls)}, Temperature: {temperature}")
486
- return response_data
487
 
488
  except HTTPException:
489
  raise
@@ -500,7 +467,7 @@ async def root():
500
  return {
501
  "message": "Enhanced AI Chatbot API is running",
502
  "version": "2.0.0",
503
- "features": ["Google Search Integration", "Intelligent Search Decision", "Enhanced Prompting"],
504
  "timestamp": datetime.now().isoformat()
505
  }
506
 
@@ -512,7 +479,7 @@ async def health_check():
512
  "timestamp": datetime.now().isoformat(),
513
  "services": {
514
  "llm_client": client is not None,
515
- "google_search": bool(GOOGLE_API_KEY and GOOGLE_CX)
516
  }
517
  }
518
  return health_status
 
5
  from typing import List, Dict, Optional
6
  from fastapi import FastAPI, Request, HTTPException, Depends
7
  from fastapi.middleware.cors import CORSMiddleware
8
+ from fastapi.responses import StreamingResponse # <-- Import StreamingResponse
9
+ import asyncio # <-- Import asyncio
10
  from openai import OpenAI
11
  import logging
12
  import time
 
83
  **Current Context**: Today's date is {current_date}, but your knowledge has a cutoff date and may not include the most recent information."""
84
 
85
  # --- Enhanced Web Search Tool Implementation ---
86
+ def Google_Search_tool(queries: List[str], num_results: int = 5) -> List[Dict]:
87
  """
88
  Enhanced Google Custom Search with better error handling and result formatting
89
  """
 
153
  logger.error(f"Error during Google search request: {e}")
154
  return []
155
  except Exception as e:
156
+ logger.error(f"Unexpected error in Google Search_tool: {e}")
157
  return []
158
 
159
  def format_search_results_for_llm(search_results: List[Dict]) -> str:
 
212
  {
213
  "type": "function",
214
  "function": {
215
+ "name": "Google Search",
216
  "description": "REQUIRED for current information: Performs a Google search for recent events, current data, latest news, statistics, prices, or any information that changes frequently. Use this tool proactively when the user's query could benefit from up-to-date information, even if you have some relevant knowledge from training data.",
217
  "parameters": {
218
  "type": "object",
 
290
  return len(self.requests[user_ip])
291
 
292
 
293
+ # Initialize rate limiter with 50 requests per day
294
  rate_limiter = RateLimiter(
295
  max_requests=50,
296
  time_window=timedelta(days=1)
 
324
  self.last_successful_index = index
325
 
326
 
327
+ # --- Enhanced Chatbot Endpoint (with Streaming) ---
328
  @app.post("/chat")
329
  async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
330
  user_ip = get_user_ip(request)
 
344
  try:
345
  data = await request.json()
346
  user_message = data.get("message", "").strip()
 
 
347
  use_search = data.get("use_search")
348
  if use_search is None:
349
+ use_search = data.get("user_search")
350
+
351
+ temperature = data.get("temperature", 0.7)
352
+ if not isinstance(temperature, (int, float)) or not 0 <= temperature <= 2:
 
 
353
  temperature = 0.7
354
+
355
  conversation_history = data.get("history", [])
356
 
 
 
 
357
  if not user_message:
358
  raise HTTPException(status_code=400, detail="No message provided")
359
 
 
360
  if use_search is None:
361
  use_search = should_use_search(user_message)
 
 
 
 
 
 
 
 
 
 
 
362
 
363
+ # --- Message and Tool Call Preparation (Same as before) ---
364
+ current_date = datetime.now().strftime("%Y-%m-%d")
365
+ system_content = SYSTEM_PROMPT_WITH_SEARCH.format(current_date=current_date) if use_search else SYSTEM_PROMPT_NO_SEARCH.format(current_date=current_date)
366
  system_message = {"role": "system", "content": system_content}
367
  messages = [system_message] + conversation_history + [{"role": "user", "content": user_message}]
368
 
369
  llm_kwargs = {
370
+ "model": "unsloth/Qwen3-30B-A3B-GGUF",
371
+ "temperature": temperature,
372
  "messages": messages,
373
+ "max_tokens": 2000
374
  }
375
 
376
  if use_search:
 
377
  llm_kwargs["tools"] = available_tools
378
+ llm_kwargs["tool_choice"] = "auto"
 
 
379
 
380
+ # First LLM call (for tool decision) - This part remains blocking
 
381
  llm_response = client.chat.completions.create(**llm_kwargs)
382
  tool_calls = llm_response.choices[0].message.tool_calls
383
  source_links = []
384
 
 
 
 
 
 
 
 
 
 
 
385
  if tool_calls:
386
  logger.info(f"Processing {len(tool_calls)} tool calls")
387
  tool_outputs = []
388
 
389
  for tool_call in tool_calls:
390
+ if tool_call.function.name == "Google Search":
391
  try:
392
  function_args = json.loads(tool_call.function.arguments)
393
  search_query = function_args.get("query", "").strip()
394
 
395
  if search_query:
396
+ search_results = Google_Search_tool([search_query], num_results=5)
 
 
 
397
  for result in search_results:
398
  source_links.append({
399
  "title": result["source_title"],
 
401
  "domain": result["domain"]
402
  })
403
 
 
404
  formatted_results = format_search_results_for_llm(search_results)
 
 
 
 
 
 
405
  tool_outputs.append({
406
  "tool_call_id": tool_call.id,
407
+ "output": formatted_results
408
  })
409
+ except Exception as e:
410
+ logger.error(f"Error processing tool call: {e}")
411
+
 
 
 
 
 
 
412
  messages.append(llm_response.choices[0].message)
413
  for output_item in tool_outputs:
414
  messages.append({
415
+ "role": "tool",
416
+ "tool_call_id": output_item["tool_call_id"],
417
  "content": output_item["output"]
418
  })
419
+
420
+ # --- MODIFICATION FOR STREAMING ---
421
+
422
+ async def response_generator():
423
+ """This async generator streams the final response."""
424
+ # First, yield metadata (like sources) as a single event
425
+ initial_data = {
426
+ "sources": source_links,
427
+ "search_used": bool(tool_calls),
428
+ }
429
+ yield f"data: {json.dumps(initial_data)}\n\n"
430
+
431
+ # This is the final API call that will actually be streamed
432
+ stream = client.chat.completions.create(
433
  model="unsloth/Qwen3-30B-A3B-GGUF",
434
+ temperature=temperature,
435
  messages=messages,
436
+ max_tokens=2000,
437
+ stream=True # <-- Enable streaming from the AI
438
  )
439
+
440
+ try:
441
+ for chunk in stream:
442
+ content = chunk.choices[0].delta.content
443
+ if content:
444
+ # Yield each piece of content in SSE format
445
+ chunk_data = {"response_chunk": content}
446
+ yield f"data: {json.dumps(chunk_data)}\n\n"
447
+ await asyncio.sleep(0) # Give up control to the event loop
448
+ finally:
449
+ # Signal the end of the stream to the client
450
+ yield "data: [DONE]\n\n"
451
+
452
+ # Return the StreamingResponse, which FastAPI will handle.
453
+ return StreamingResponse(response_generator(), media_type="text/event-stream")
454
 
455
  except HTTPException:
456
  raise
 
467
  return {
468
  "message": "Enhanced AI Chatbot API is running",
469
  "version": "2.0.0",
470
+ "features": ["Google Search Integration", "Intelligent Search Decision", "Enhanced Prompting", "Streaming Response"],
471
  "timestamp": datetime.now().isoformat()
472
  }
473
 
 
479
  "timestamp": datetime.now().isoformat(),
480
  "services": {
481
  "llm_client": client is not None,
482
+ "Google Search": bool(GOOGLE_API_KEY and GOOGLE_CX)
483
  }
484
  }
485
  return health_status