Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,8 @@ from datetime import datetime, timedelta
|
|
| 5 |
from typing import List, Dict, Optional
|
| 6 |
from fastapi import FastAPI, Request, HTTPException, Depends
|
| 7 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
|
|
|
| 8 |
from openai import OpenAI
|
| 9 |
import logging
|
| 10 |
import time
|
|
@@ -81,7 +83,7 @@ When you don't have current information about recent events or changing data, ac
|
|
| 81 |
**Current Context**: Today's date is {current_date}, but your knowledge has a cutoff date and may not include the most recent information."""
|
| 82 |
|
| 83 |
# --- Enhanced Web Search Tool Implementation ---
|
| 84 |
-
def
|
| 85 |
"""
|
| 86 |
Enhanced Google Custom Search with better error handling and result formatting
|
| 87 |
"""
|
|
@@ -151,7 +153,7 @@ def google_search_tool(queries: List[str], num_results: int = 5) -> List[Dict]:
|
|
| 151 |
logger.error(f"Error during Google search request: {e}")
|
| 152 |
return []
|
| 153 |
except Exception as e:
|
| 154 |
-
logger.error(f"Unexpected error in
|
| 155 |
return []
|
| 156 |
|
| 157 |
def format_search_results_for_llm(search_results: List[Dict]) -> str:
|
|
@@ -210,7 +212,7 @@ available_tools = [
|
|
| 210 |
{
|
| 211 |
"type": "function",
|
| 212 |
"function": {
|
| 213 |
-
"name": "
|
| 214 |
"description": "REQUIRED for current information: Performs a Google search for recent events, current data, latest news, statistics, prices, or any information that changes frequently. Use this tool proactively when the user's query could benefit from up-to-date information, even if you have some relevant knowledge from training data.",
|
| 215 |
"parameters": {
|
| 216 |
"type": "object",
|
|
@@ -288,7 +290,7 @@ class RateLimiter:
|
|
| 288 |
return len(self.requests[user_ip])
|
| 289 |
|
| 290 |
|
| 291 |
-
# Initialize rate limiter with
|
| 292 |
rate_limiter = RateLimiter(
|
| 293 |
max_requests=50,
|
| 294 |
time_window=timedelta(days=1)
|
|
@@ -322,7 +324,7 @@ class ApiRotator:
|
|
| 322 |
self.last_successful_index = index
|
| 323 |
|
| 324 |
|
| 325 |
-
# --- Enhanced Chatbot Endpoint ---
|
| 326 |
@app.post("/chat")
|
| 327 |
async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
|
| 328 |
user_ip = get_user_ip(request)
|
|
@@ -342,89 +344,56 @@ async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
|
|
| 342 |
try:
|
| 343 |
data = await request.json()
|
| 344 |
user_message = data.get("message", "").strip()
|
| 345 |
-
|
| 346 |
-
# Support both 'use_search' and 'user_search' parameter names for flexibility
|
| 347 |
use_search = data.get("use_search")
|
| 348 |
if use_search is None:
|
| 349 |
-
use_search = data.get("user_search")
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
if not isinstance(temperature, (int, float)) or temperature < 0 or temperature > 2:
|
| 354 |
-
logger.warning(f"Invalid temperature value: {temperature}, defaulting to 0.7")
|
| 355 |
temperature = 0.7
|
| 356 |
-
|
| 357 |
conversation_history = data.get("history", [])
|
| 358 |
|
| 359 |
-
# Debug logging for request parameters
|
| 360 |
-
logger.info(f"Request parameters - message length: {len(user_message)}, use_search: {use_search}, temperature: {temperature}, history length: {len(conversation_history)}")
|
| 361 |
-
|
| 362 |
if not user_message:
|
| 363 |
raise HTTPException(status_code=400, detail="No message provided")
|
| 364 |
|
| 365 |
-
# Auto-decide search usage if not specified
|
| 366 |
if use_search is None:
|
| 367 |
use_search = should_use_search(user_message)
|
| 368 |
-
logger.info(f"Auto-decided search usage: {use_search}")
|
| 369 |
-
else:
|
| 370 |
-
logger.info(f"Manual search setting: {use_search}")
|
| 371 |
-
|
| 372 |
-
# Prepare messages with appropriate system prompt based on search availability
|
| 373 |
-
current_date = datetime.now().strftime("%Y-%m-%d")
|
| 374 |
-
|
| 375 |
-
if use_search:
|
| 376 |
-
system_content = SYSTEM_PROMPT_WITH_SEARCH.format(current_date=current_date)
|
| 377 |
-
else:
|
| 378 |
-
system_content = SYSTEM_PROMPT_NO_SEARCH.format(current_date=current_date)
|
| 379 |
|
|
|
|
|
|
|
|
|
|
| 380 |
system_message = {"role": "system", "content": system_content}
|
| 381 |
messages = [system_message] + conversation_history + [{"role": "user", "content": user_message}]
|
| 382 |
|
| 383 |
llm_kwargs = {
|
| 384 |
-
"model": "unsloth/Qwen3-30B-A3B-GGUF",
|
| 385 |
-
"temperature": temperature,
|
| 386 |
"messages": messages,
|
| 387 |
-
"max_tokens": 2000
|
| 388 |
}
|
| 389 |
|
| 390 |
if use_search:
|
| 391 |
-
logger.info("Search is ENABLED - tools will be available to the model")
|
| 392 |
llm_kwargs["tools"] = available_tools
|
| 393 |
-
llm_kwargs["tool_choice"] = "auto"
|
| 394 |
-
else:
|
| 395 |
-
logger.info("Search is DISABLED - no tools available")
|
| 396 |
|
| 397 |
-
# First LLM call
|
| 398 |
-
logger.info(f"Making LLM request with tools: {bool(use_search)}, temperature: {temperature}")
|
| 399 |
llm_response = client.chat.completions.create(**llm_kwargs)
|
| 400 |
tool_calls = llm_response.choices[0].message.tool_calls
|
| 401 |
source_links = []
|
| 402 |
|
| 403 |
-
# Debug: Log tool call information
|
| 404 |
-
if tool_calls:
|
| 405 |
-
logger.info(f"LLM made {len(tool_calls)} tool calls")
|
| 406 |
-
for i, call in enumerate(tool_calls):
|
| 407 |
-
logger.info(f"Tool call {i+1}: {call.function.name} with args: {call.function.arguments}")
|
| 408 |
-
else:
|
| 409 |
-
logger.info("LLM did not make any tool calls")
|
| 410 |
-
if use_search:
|
| 411 |
-
logger.warning("Search was enabled but LLM chose not to use search tools - this might indicate the query doesn't require current information")
|
| 412 |
-
|
| 413 |
if tool_calls:
|
| 414 |
logger.info(f"Processing {len(tool_calls)} tool calls")
|
| 415 |
tool_outputs = []
|
| 416 |
|
| 417 |
for tool_call in tool_calls:
|
| 418 |
-
if tool_call.function.name == "
|
| 419 |
try:
|
| 420 |
function_args = json.loads(tool_call.function.arguments)
|
| 421 |
search_query = function_args.get("query", "").strip()
|
| 422 |
|
| 423 |
if search_query:
|
| 424 |
-
|
| 425 |
-
search_results = google_search_tool([search_query], num_results=5)
|
| 426 |
-
|
| 427 |
-
# Collect source links for response
|
| 428 |
for result in search_results:
|
| 429 |
source_links.append({
|
| 430 |
"title": result["source_title"],
|
|
@@ -432,58 +401,56 @@ async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
|
|
| 432 |
"domain": result["domain"]
|
| 433 |
})
|
| 434 |
|
| 435 |
-
# Format results for LLM
|
| 436 |
formatted_results = format_search_results_for_llm(search_results)
|
| 437 |
-
tool_outputs.append({
|
| 438 |
-
"tool_call_id": tool_call.id,
|
| 439 |
-
"output": formatted_results
|
| 440 |
-
})
|
| 441 |
-
else:
|
| 442 |
-
logger.warning("Empty search query in tool call")
|
| 443 |
tool_outputs.append({
|
| 444 |
"tool_call_id": tool_call.id,
|
| 445 |
-
"output":
|
| 446 |
})
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
tool_outputs.append({
|
| 451 |
-
"tool_call_id": tool_call.id,
|
| 452 |
-
"output": "Error: Failed to parse search parameters."
|
| 453 |
-
})
|
| 454 |
-
|
| 455 |
-
# Continue conversation with search results
|
| 456 |
messages.append(llm_response.choices[0].message)
|
| 457 |
for output_item in tool_outputs:
|
| 458 |
messages.append({
|
| 459 |
-
"role": "tool",
|
| 460 |
-
"tool_call_id": output_item["tool_call_id"],
|
| 461 |
"content": output_item["output"]
|
| 462 |
})
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
model="unsloth/Qwen3-30B-A3B-GGUF",
|
| 467 |
-
temperature=temperature,
|
| 468 |
messages=messages,
|
| 469 |
-
max_tokens=2000
|
| 470 |
-
stream=True #
|
| 471 |
)
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
return
|
| 487 |
|
| 488 |
except HTTPException:
|
| 489 |
raise
|
|
@@ -500,7 +467,7 @@ async def root():
|
|
| 500 |
return {
|
| 501 |
"message": "Enhanced AI Chatbot API is running",
|
| 502 |
"version": "2.0.0",
|
| 503 |
-
"features": ["Google Search Integration", "Intelligent Search Decision", "Enhanced Prompting"],
|
| 504 |
"timestamp": datetime.now().isoformat()
|
| 505 |
}
|
| 506 |
|
|
@@ -512,7 +479,7 @@ async def health_check():
|
|
| 512 |
"timestamp": datetime.now().isoformat(),
|
| 513 |
"services": {
|
| 514 |
"llm_client": client is not None,
|
| 515 |
-
"
|
| 516 |
}
|
| 517 |
}
|
| 518 |
return health_status
|
|
|
|
| 5 |
from typing import List, Dict, Optional
|
| 6 |
from fastapi import FastAPI, Request, HTTPException, Depends
|
| 7 |
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
+
from fastapi.responses import StreamingResponse # <-- Import StreamingResponse
|
| 9 |
+
import asyncio # <-- Import asyncio
|
| 10 |
from openai import OpenAI
|
| 11 |
import logging
|
| 12 |
import time
|
|
|
|
| 83 |
**Current Context**: Today's date is {current_date}, but your knowledge has a cutoff date and may not include the most recent information."""
|
| 84 |
|
| 85 |
# --- Enhanced Web Search Tool Implementation ---
|
| 86 |
+
def Google_Search_tool(queries: List[str], num_results: int = 5) -> List[Dict]:
|
| 87 |
"""
|
| 88 |
Enhanced Google Custom Search with better error handling and result formatting
|
| 89 |
"""
|
|
|
|
| 153 |
logger.error(f"Error during Google search request: {e}")
|
| 154 |
return []
|
| 155 |
except Exception as e:
|
| 156 |
+
logger.error(f"Unexpected error in Google Search_tool: {e}")
|
| 157 |
return []
|
| 158 |
|
| 159 |
def format_search_results_for_llm(search_results: List[Dict]) -> str:
|
|
|
|
| 212 |
{
|
| 213 |
"type": "function",
|
| 214 |
"function": {
|
| 215 |
+
"name": "Google Search",
|
| 216 |
"description": "REQUIRED for current information: Performs a Google search for recent events, current data, latest news, statistics, prices, or any information that changes frequently. Use this tool proactively when the user's query could benefit from up-to-date information, even if you have some relevant knowledge from training data.",
|
| 217 |
"parameters": {
|
| 218 |
"type": "object",
|
|
|
|
| 290 |
return len(self.requests[user_ip])
|
| 291 |
|
| 292 |
|
| 293 |
+
# Initialize rate limiter with 50 requests per day
|
| 294 |
rate_limiter = RateLimiter(
|
| 295 |
max_requests=50,
|
| 296 |
time_window=timedelta(days=1)
|
|
|
|
| 324 |
self.last_successful_index = index
|
| 325 |
|
| 326 |
|
| 327 |
+
# --- Enhanced Chatbot Endpoint (with Streaming) ---
|
| 328 |
@app.post("/chat")
|
| 329 |
async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
|
| 330 |
user_ip = get_user_ip(request)
|
|
|
|
| 344 |
try:
|
| 345 |
data = await request.json()
|
| 346 |
user_message = data.get("message", "").strip()
|
|
|
|
|
|
|
| 347 |
use_search = data.get("use_search")
|
| 348 |
if use_search is None:
|
| 349 |
+
use_search = data.get("user_search")
|
| 350 |
+
|
| 351 |
+
temperature = data.get("temperature", 0.7)
|
| 352 |
+
if not isinstance(temperature, (int, float)) or not 0 <= temperature <= 2:
|
|
|
|
|
|
|
| 353 |
temperature = 0.7
|
| 354 |
+
|
| 355 |
conversation_history = data.get("history", [])
|
| 356 |
|
|
|
|
|
|
|
|
|
|
| 357 |
if not user_message:
|
| 358 |
raise HTTPException(status_code=400, detail="No message provided")
|
| 359 |
|
|
|
|
| 360 |
if use_search is None:
|
| 361 |
use_search = should_use_search(user_message)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
|
| 363 |
+
# --- Message and Tool Call Preparation (Same as before) ---
|
| 364 |
+
current_date = datetime.now().strftime("%Y-%m-%d")
|
| 365 |
+
system_content = SYSTEM_PROMPT_WITH_SEARCH.format(current_date=current_date) if use_search else SYSTEM_PROMPT_NO_SEARCH.format(current_date=current_date)
|
| 366 |
system_message = {"role": "system", "content": system_content}
|
| 367 |
messages = [system_message] + conversation_history + [{"role": "user", "content": user_message}]
|
| 368 |
|
| 369 |
llm_kwargs = {
|
| 370 |
+
"model": "unsloth/Qwen3-30B-A3B-GGUF",
|
| 371 |
+
"temperature": temperature,
|
| 372 |
"messages": messages,
|
| 373 |
+
"max_tokens": 2000
|
| 374 |
}
|
| 375 |
|
| 376 |
if use_search:
|
|
|
|
| 377 |
llm_kwargs["tools"] = available_tools
|
| 378 |
+
llm_kwargs["tool_choice"] = "auto"
|
|
|
|
|
|
|
| 379 |
|
| 380 |
+
# First LLM call (for tool decision) - This part remains blocking
|
|
|
|
| 381 |
llm_response = client.chat.completions.create(**llm_kwargs)
|
| 382 |
tool_calls = llm_response.choices[0].message.tool_calls
|
| 383 |
source_links = []
|
| 384 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
if tool_calls:
|
| 386 |
logger.info(f"Processing {len(tool_calls)} tool calls")
|
| 387 |
tool_outputs = []
|
| 388 |
|
| 389 |
for tool_call in tool_calls:
|
| 390 |
+
if tool_call.function.name == "Google Search":
|
| 391 |
try:
|
| 392 |
function_args = json.loads(tool_call.function.arguments)
|
| 393 |
search_query = function_args.get("query", "").strip()
|
| 394 |
|
| 395 |
if search_query:
|
| 396 |
+
search_results = Google_Search_tool([search_query], num_results=5)
|
|
|
|
|
|
|
|
|
|
| 397 |
for result in search_results:
|
| 398 |
source_links.append({
|
| 399 |
"title": result["source_title"],
|
|
|
|
| 401 |
"domain": result["domain"]
|
| 402 |
})
|
| 403 |
|
|
|
|
| 404 |
formatted_results = format_search_results_for_llm(search_results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 405 |
tool_outputs.append({
|
| 406 |
"tool_call_id": tool_call.id,
|
| 407 |
+
"output": formatted_results
|
| 408 |
})
|
| 409 |
+
except Exception as e:
|
| 410 |
+
logger.error(f"Error processing tool call: {e}")
|
| 411 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 412 |
messages.append(llm_response.choices[0].message)
|
| 413 |
for output_item in tool_outputs:
|
| 414 |
messages.append({
|
| 415 |
+
"role": "tool",
|
| 416 |
+
"tool_call_id": output_item["tool_call_id"],
|
| 417 |
"content": output_item["output"]
|
| 418 |
})
|
| 419 |
+
|
| 420 |
+
# --- MODIFICATION FOR STREAMING ---
|
| 421 |
+
|
| 422 |
+
async def response_generator():
|
| 423 |
+
"""This async generator streams the final response."""
|
| 424 |
+
# First, yield metadata (like sources) as a single event
|
| 425 |
+
initial_data = {
|
| 426 |
+
"sources": source_links,
|
| 427 |
+
"search_used": bool(tool_calls),
|
| 428 |
+
}
|
| 429 |
+
yield f"data: {json.dumps(initial_data)}\n\n"
|
| 430 |
+
|
| 431 |
+
# This is the final API call that will actually be streamed
|
| 432 |
+
stream = client.chat.completions.create(
|
| 433 |
model="unsloth/Qwen3-30B-A3B-GGUF",
|
| 434 |
+
temperature=temperature,
|
| 435 |
messages=messages,
|
| 436 |
+
max_tokens=2000,
|
| 437 |
+
stream=True # <-- Enable streaming from the AI
|
| 438 |
)
|
| 439 |
+
|
| 440 |
+
try:
|
| 441 |
+
for chunk in stream:
|
| 442 |
+
content = chunk.choices[0].delta.content
|
| 443 |
+
if content:
|
| 444 |
+
# Yield each piece of content in SSE format
|
| 445 |
+
chunk_data = {"response_chunk": content}
|
| 446 |
+
yield f"data: {json.dumps(chunk_data)}\n\n"
|
| 447 |
+
await asyncio.sleep(0) # Give up control to the event loop
|
| 448 |
+
finally:
|
| 449 |
+
# Signal the end of the stream to the client
|
| 450 |
+
yield "data: [DONE]\n\n"
|
| 451 |
+
|
| 452 |
+
# Return the StreamingResponse, which FastAPI will handle.
|
| 453 |
+
return StreamingResponse(response_generator(), media_type="text/event-stream")
|
| 454 |
|
| 455 |
except HTTPException:
|
| 456 |
raise
|
|
|
|
| 467 |
return {
|
| 468 |
"message": "Enhanced AI Chatbot API is running",
|
| 469 |
"version": "2.0.0",
|
| 470 |
+
"features": ["Google Search Integration", "Intelligent Search Decision", "Enhanced Prompting", "Streaming Response"],
|
| 471 |
"timestamp": datetime.now().isoformat()
|
| 472 |
}
|
| 473 |
|
|
|
|
| 479 |
"timestamp": datetime.now().isoformat(),
|
| 480 |
"services": {
|
| 481 |
"llm_client": client is not None,
|
| 482 |
+
"Google Search": bool(GOOGLE_API_KEY and GOOGLE_CX)
|
| 483 |
}
|
| 484 |
}
|
| 485 |
return health_status
|