Spaces:

tecuts
/

chat

Running

App Files Files Community

tecuts commited on Jul 1, 2025

Commit

0ec0144

verified ·

1 Parent(s): 174368c

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -95

app.py CHANGED Viewed

@@ -5,6 +5,8 @@ from datetime import datetime, timedelta
 from typing import List, Dict, Optional
 from fastapi import FastAPI, Request, HTTPException, Depends
 from fastapi.middleware.cors import CORSMiddleware
 from openai import OpenAI
 import logging
 import time
@@ -81,7 +83,7 @@ When you don't have current information about recent events or changing data, ac
 **Current Context**: Today's date is {current_date}, but your knowledge has a cutoff date and may not include the most recent information."""
 # --- Enhanced Web Search Tool Implementation ---
-def google_search_tool(queries: List[str], num_results: int = 5) -> List[Dict]:
     """
     Enhanced Google Custom Search with better error handling and result formatting
     """
@@ -151,7 +153,7 @@ def google_search_tool(queries: List[str], num_results: int = 5) -> List[Dict]:
         logger.error(f"Error during Google search request: {e}")
         return []
     except Exception as e:
-        logger.error(f"Unexpected error in google_search_tool: {e}")
         return []
 def format_search_results_for_llm(search_results: List[Dict]) -> str:
@@ -210,7 +212,7 @@ available_tools = [
     {
         "type": "function",
         "function": {
-            "name": "google_search",
             "description": "REQUIRED for current information: Performs a Google search for recent events, current data, latest news, statistics, prices, or any information that changes frequently. Use this tool proactively when the user's query could benefit from up-to-date information, even if you have some relevant knowledge from training data.",
             "parameters": {
                 "type": "object",
@@ -288,7 +290,7 @@ class RateLimiter:
         return len(self.requests[user_ip])
-# Initialize rate limiter with 100 requests per day
 rate_limiter = RateLimiter(
     max_requests=50,
     time_window=timedelta(days=1)
@@ -322,7 +324,7 @@ class ApiRotator:
         self.last_successful_index = index
-# --- Enhanced Chatbot Endpoint ---
 @app.post("/chat")
 async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
     user_ip = get_user_ip(request)
@@ -342,89 +344,56 @@ async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
     try:
         data = await request.json()
         user_message = data.get("message", "").strip()
-        # Support both 'use_search' and 'user_search' parameter names for flexibility
         use_search = data.get("use_search")
         if use_search is None:
-            use_search = data.get("user_search")  # Alternative parameter name
-        # Allow client to specify temperature (with validation)
-        temperature = data.get("temperature", 0.7)  # Default to 0.7
-        if not isinstance(temperature, (int, float)) or temperature < 0 or temperature > 2:
-            logger.warning(f"Invalid temperature value: {temperature}, defaulting to 0.7")
             temperature = 0.7
         conversation_history = data.get("history", [])
-        # Debug logging for request parameters
-        logger.info(f"Request parameters - message length: {len(user_message)}, use_search: {use_search}, temperature: {temperature}, history length: {len(conversation_history)}")
         if not user_message:
             raise HTTPException(status_code=400, detail="No message provided")
-        # Auto-decide search usage if not specified
         if use_search is None:
             use_search = should_use_search(user_message)
-            logger.info(f"Auto-decided search usage: {use_search}")
-        else:
-            logger.info(f"Manual search setting: {use_search}")
-        # Prepare messages with appropriate system prompt based on search availability
-        current_date = datetime.now().strftime("%Y-%m-%d")
-        if use_search:
-            system_content = SYSTEM_PROMPT_WITH_SEARCH.format(current_date=current_date)
-        else:
-            system_content = SYSTEM_PROMPT_NO_SEARCH.format(current_date=current_date)
         system_message = {"role": "system", "content": system_content}
         messages = [system_message] + conversation_history + [{"role": "user", "content": user_message}]
         llm_kwargs = {
-            "model": "unsloth/Qwen3-30B-A3B-GGUF",
-            "temperature": temperature,  # Use client-specified temperature
             "messages": messages,
-            "max_tokens": 2000  # Ensure comprehensive responses
         }
         if use_search:
-            logger.info("Search is ENABLED - tools will be available to the model")
             llm_kwargs["tools"] = available_tools
-            llm_kwargs["tool_choice"] = "auto"  # Consider using "required" for testing
-        else:
-            logger.info("Search is DISABLED - no tools available")
-        # First LLM call
-        logger.info(f"Making LLM request with tools: {bool(use_search)}, temperature: {temperature}")
         llm_response = client.chat.completions.create(**llm_kwargs)
         tool_calls = llm_response.choices[0].message.tool_calls
         source_links = []
-        # Debug: Log tool call information
-        if tool_calls:
-            logger.info(f"LLM made {len(tool_calls)} tool calls")
-            for i, call in enumerate(tool_calls):
-                logger.info(f"Tool call {i+1}: {call.function.name} with args: {call.function.arguments}")
-        else:
-            logger.info("LLM did not make any tool calls")
-            if use_search:
-                logger.warning("Search was enabled but LLM chose not to use search tools - this might indicate the query doesn't require current information")
         if tool_calls:
             logger.info(f"Processing {len(tool_calls)} tool calls")
             tool_outputs = []
             for tool_call in tool_calls:
-                if tool_call.function.name == "google_search":
                     try:
                         function_args = json.loads(tool_call.function.arguments)
                         search_query = function_args.get("query", "").strip()
                         if search_query:
-                            logger.info(f"Executing search for: {search_query}")
-                            search_results = google_search_tool([search_query], num_results=5)
-                            # Collect source links for response
                             for result in search_results:
                                 source_links.append({
                                     "title": result["source_title"],
@@ -432,58 +401,56 @@ async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
                                     "domain": result["domain"]
                                 })
-                            # Format results for LLM
                             formatted_results = format_search_results_for_llm(search_results)
-                            tool_outputs.append({
-                                "tool_call_id": tool_call.id,
-                                "output": formatted_results
-                            })
-                        else:
-                            logger.warning("Empty search query in tool call")
                             tool_outputs.append({
                                 "tool_call_id": tool_call.id,
-                                "output": "Error: Empty search query provided."
                             })
-                    except json.JSONDecodeError as e:
-                        logger.error(f"Failed to parse tool call arguments: {e}")
-                        tool_outputs.append({
-                            "tool_call_id": tool_call.id,
-                            "output": "Error: Failed to parse search parameters."
-                        })
-            # Continue conversation with search results
             messages.append(llm_response.choices[0].message)
             for output_item in tool_outputs:
                 messages.append({
-                    "role": "tool",
-                    "tool_call_id": output_item["tool_call_id"],
                     "content": output_item["output"]
                 })
-            # Final response generation with search context
-            final_response = client.chat.completions.create(
                 model="unsloth/Qwen3-30B-A3B-GGUF",
-                temperature=temperature,  # Use same temperature for consistency
                 messages=messages,
-                max_tokens=2000
-                stream=True  # <<< KEY CHANGE: Enable streaming
             )
-            final_chatbot_response = final_response.choices[0].message.content
-        else:
-            final_chatbot_response = llm_response.choices[0].message.content
-        # Enhanced response structure
-        response_data = {
-            "response": final_chatbot_response,
-            "sources": source_links,
-            "search_used": bool(tool_calls),
-            "temperature": temperature,  # Include temperature in response for debugging
-            "timestamp": datetime.now().isoformat()
-        }
-        logger.info(f"Chat response generated successfully. Search used: {bool(tool_calls)}, Temperature: {temperature}")
-        return response_data
     except HTTPException:
         raise
@@ -500,7 +467,7 @@ async def root():
     return {
         "message": "Enhanced AI Chatbot API is running",
         "version": "2.0.0",
-        "features": ["Google Search Integration", "Intelligent Search Decision", "Enhanced Prompting"],
         "timestamp": datetime.now().isoformat()
     }
@@ -512,7 +479,7 @@ async def health_check():
         "timestamp": datetime.now().isoformat(),
         "services": {
             "llm_client": client is not None,
-            "google_search": bool(GOOGLE_API_KEY and GOOGLE_CX)
         }
     }
     return health_status

 from typing import List, Dict, Optional
 from fastapi import FastAPI, Request, HTTPException, Depends
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse # <-- Import StreamingResponse
+import asyncio # <-- Import asyncio
 from openai import OpenAI
 import logging
 import time
 **Current Context**: Today's date is {current_date}, but your knowledge has a cutoff date and may not include the most recent information."""
 # --- Enhanced Web Search Tool Implementation ---
+def Google_Search_tool(queries: List[str], num_results: int = 5) -> List[Dict]:
     """
     Enhanced Google Custom Search with better error handling and result formatting
     """
         logger.error(f"Error during Google search request: {e}")
         return []
     except Exception as e:
+        logger.error(f"Unexpected error in Google Search_tool: {e}")
         return []
 def format_search_results_for_llm(search_results: List[Dict]) -> str:
     {
         "type": "function",
         "function": {
+            "name": "Google Search",
             "description": "REQUIRED for current information: Performs a Google search for recent events, current data, latest news, statistics, prices, or any information that changes frequently. Use this tool proactively when the user's query could benefit from up-to-date information, even if you have some relevant knowledge from training data.",
             "parameters": {
                 "type": "object",
         return len(self.requests[user_ip])
+# Initialize rate limiter with 50 requests per day
 rate_limiter = RateLimiter(
     max_requests=50,
     time_window=timedelta(days=1)
         self.last_successful_index = index
+# --- Enhanced Chatbot Endpoint (with Streaming) ---
 @app.post("/chat")
 async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
     user_ip = get_user_ip(request)
     try:
         data = await request.json()
         user_message = data.get("message", "").strip()
         use_search = data.get("use_search")
         if use_search is None:
+            use_search = data.get("user_search")
+        temperature = data.get("temperature", 0.7)
+        if not isinstance(temperature, (int, float)) or not 0 <= temperature <= 2:
             temperature = 0.7
         conversation_history = data.get("history", [])
         if not user_message:
             raise HTTPException(status_code=400, detail="No message provided")
         if use_search is None:
             use_search = should_use_search(user_message)
+        # --- Message and Tool Call Preparation (Same as before) ---
+        current_date = datetime.now().strftime("%Y-%m-%d")
+        system_content = SYSTEM_PROMPT_WITH_SEARCH.format(current_date=current_date) if use_search else SYSTEM_PROMPT_NO_SEARCH.format(current_date=current_date)
         system_message = {"role": "system", "content": system_content}
         messages = [system_message] + conversation_history + [{"role": "user", "content": user_message}]
         llm_kwargs = {
+            "model": "unsloth/Qwen3-30B-A3B-GGUF",
+            "temperature": temperature,
             "messages": messages,
+            "max_tokens": 2000
         }
         if use_search:
             llm_kwargs["tools"] = available_tools
+            llm_kwargs["tool_choice"] = "auto"
+        # First LLM call (for tool decision) - This part remains blocking
         llm_response = client.chat.completions.create(**llm_kwargs)
         tool_calls = llm_response.choices[0].message.tool_calls
         source_links = []
         if tool_calls:
             logger.info(f"Processing {len(tool_calls)} tool calls")
             tool_outputs = []
             for tool_call in tool_calls:
+                if tool_call.function.name == "Google Search":
                     try:
                         function_args = json.loads(tool_call.function.arguments)
                         search_query = function_args.get("query", "").strip()
                         if search_query:
+                            search_results = Google_Search_tool([search_query], num_results=5)
                             for result in search_results:
                                 source_links.append({
                                     "title": result["source_title"],
                                     "domain": result["domain"]
                                 })
                             formatted_results = format_search_results_for_llm(search_results)
                             tool_outputs.append({
                                 "tool_call_id": tool_call.id,
+                                "output": formatted_results
                             })
+                    except Exception as e:
+                        logger.error(f"Error processing tool call: {e}")
             messages.append(llm_response.choices[0].message)
             for output_item in tool_outputs:
                 messages.append({
+                    "role": "tool",
+                    "tool_call_id": output_item["tool_call_id"],
                     "content": output_item["output"]
                 })
+        # --- MODIFICATION FOR STREAMING ---
+        async def response_generator():
+            """This async generator streams the final response."""
+            # First, yield metadata (like sources) as a single event
+            initial_data = {
+                "sources": source_links,
+                "search_used": bool(tool_calls),
+            }
+            yield f"data: {json.dumps(initial_data)}\n\n"
+            # This is the final API call that will actually be streamed
+            stream = client.chat.completions.create(
                 model="unsloth/Qwen3-30B-A3B-GGUF",
+                temperature=temperature,
                 messages=messages,
+                max_tokens=2000,
+                stream=True  # <-- Enable streaming from the AI
             )
+            try:
+                for chunk in stream:
+                    content = chunk.choices[0].delta.content
+                    if content:
+                        # Yield each piece of content in SSE format
+                        chunk_data = {"response_chunk": content}
+                        yield f"data: {json.dumps(chunk_data)}\n\n"
+                        await asyncio.sleep(0) # Give up control to the event loop
+            finally:
+                # Signal the end of the stream to the client
+                yield "data: [DONE]\n\n"
+        # Return the StreamingResponse, which FastAPI will handle.
+        return StreamingResponse(response_generator(), media_type="text/event-stream")
     except HTTPException:
         raise
     return {
         "message": "Enhanced AI Chatbot API is running",
         "version": "2.0.0",
+        "features": ["Google Search Integration", "Intelligent Search Decision", "Enhanced Prompting", "Streaming Response"],
         "timestamp": datetime.now().isoformat()
     }
         "timestamp": datetime.now().isoformat(),
         "services": {
             "llm_client": client is not None,
+            "Google Search": bool(GOOGLE_API_KEY and GOOGLE_CX)
         }
     }
     return health_status