Spaces:

tecuts
/

chat

Running

App Files Files Community

tecuts commited on Jul 1, 2025

Commit

5cb21e4

verified ·

1 Parent(s): f73a254

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -114

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ from fastapi import FastAPI, Request, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from openai import OpenAI
 import logging
 # --- Configure Logging ---
 logging.basicConfig(level=logging.INFO)
@@ -222,144 +224,48 @@ def should_use_search(message: str) -> bool:
 async def chat_endpoint(request: Request):
     if not client:
         raise HTTPException(status_code=500, detail="LLM client not configured")
     try:
         data = await request.json()
         user_message = data.get("message", "").strip()
-        # Support both 'use_search' and 'user_search' parameter names for flexibility
-        use_search = data.get("use_search")
-        if use_search is None:
-            use_search = data.get("user_search")  # Alternative parameter name
         conversation_history = data.get("history", [])
-        # Debug logging for request parameters
-        logger.info(f"Request parameters - message length: {len(user_message)}, use_search: {use_search}, history length: {len(conversation_history)}")
         if not user_message:
             raise HTTPException(status_code=400, detail="No message provided")
-        # Auto-decide search usage if not specified
         if use_search is None:
             use_search = should_use_search(user_message)
-            logger.info(f"Auto-decided search usage: {use_search}")
-        else:
-            logger.info(f"Manual search setting: {use_search}")
-        # Prepare messages with appropriate system prompt based on search availability
         current_date = datetime.now().strftime("%Y-%m-%d")
         if use_search:
             system_content = SYSTEM_PROMPT_WITH_SEARCH.format(current_date=current_date)
         else:
             system_content = SYSTEM_PROMPT_NO_SEARCH.format(current_date=current_date)
         system_message = {"role": "system", "content": system_content}
         messages = [system_message] + conversation_history + [{"role": "user", "content": user_message}]
         llm_kwargs = {
-            "model": "unsloth/Qwen3-30B-A3B-GGUF",
-            "temperature": 0.7,  # Slightly higher for more creative responses
             "messages": messages,
-            "max_tokens": 2000  # Ensure comprehensive responses
         }
         if use_search:
-            logger.info("Search is ENABLED - tools will be available to the model")
             llm_kwargs["tools"] = available_tools
-            llm_kwargs["tool_choice"] = "auto"  # Consider using "required" for testing
-        else:
-            logger.info("Search is DISABLED - no tools available")
-        # First LLM call
-        logger.info(f"Making LLM request with tools: {bool(use_search)}")
-        llm_response = client.chat.completions.create(**llm_kwargs)
-        tool_calls = llm_response.choices[0].message.tool_calls
-        source_links = []
-        # Debug: Log tool call information
-        if tool_calls:
-            logger.info(f"LLM made {len(tool_calls)} tool calls")
-            for i, call in enumerate(tool_calls):
-                logger.info(f"Tool call {i+1}: {call.function.name} with args: {call.function.arguments}")
-        else:
-            logger.info("LLM did not make any tool calls")
-            if use_search:
-                logger.warning("Search was enabled but LLM chose not to use search tools - this might indicate the query doesn't require current information")
-        if tool_calls:
-            logger.info(f"Processing {len(tool_calls)} tool calls")
-            tool_outputs = []
-            for tool_call in tool_calls:
-                if tool_call.function.name == "google_search":
-                    try:
-                        function_args = json.loads(tool_call.function.arguments)
-                        search_query = function_args.get("query", "").strip()
-                        if search_query:
-                            logger.info(f"Executing search for: {search_query}")
-                            search_results = google_search_tool([search_query], num_results=5)
-                            # Collect source links for response
-                            for result in search_results:
-                                source_links.append({
-                                    "title": result["source_title"],
-                                    "url": result["url"],
-                                    "domain": result["domain"]
-                                })
-                            # Format results for LLM
-                            formatted_results = format_search_results_for_llm(search_results)
-                            tool_outputs.append({
-                                "tool_call_id": tool_call.id,
-                                "output": formatted_results
-                            })
-                        else:
-                            logger.warning("Empty search query in tool call")
-                            tool_outputs.append({
-                                "tool_call_id": tool_call.id,
-                                "output": "Error: Empty search query provided."
-                            })
-                    except json.JSONDecodeError as e:
-                        logger.error(f"Failed to parse tool call arguments: {e}")
-                        tool_outputs.append({
-                            "tool_call_id": tool_call.id,
-                            "output": "Error: Failed to parse search parameters."
-                        })
-            # Continue conversation with search results
-            messages.append(llm_response.choices[0].message)
-            for output_item in tool_outputs:
-                messages.append({
-                    "role": "tool",
-                    "tool_call_id": output_item["tool_call_id"],
-                    "content": output_item["output"]
-                })
-            # Final response generation with search context
-            final_response = client.chat.completions.create(
-                model="unsloth/Qwen3-30B-A3B-GGUF",
-                temperature=0.7,
-                messages=messages,
-                max_tokens=2000
-            )
-            final_chatbot_response = final_response.choices[0].message.content
-        else:
-            final_chatbot_response = llm_response.choices[0].message.content
-        # Enhanced response structure
-        response_data = {
-            "response": final_chatbot_response,
-            "sources": source_links,
-            "search_used": bool(tool_calls),
-            "timestamp": datetime.now().isoformat()
-        }
-        logger.info(f"Chat response generated successfully. Search used: {bool(tool_calls)}")
-        return response_data
     except HTTPException:
         raise
@@ -370,6 +276,7 @@ async def chat_endpoint(request: Request):
         logger.error(f"Unexpected error in /chat endpoint: {e}")
         raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 # --- Health Check Endpoint ---
 @app.get("/")
 async def root():

 from fastapi.middleware.cors import CORSMiddleware
 from openai import OpenAI
 import logging
+from fastapi.responses import StreamingResponse
 # --- Configure Logging ---
 logging.basicConfig(level=logging.INFO)
 async def chat_endpoint(request: Request):
     if not client:
         raise HTTPException(status_code=500, detail="LLM client not configured")
     try:
         data = await request.json()
         user_message = data.get("message", "").strip()
+        use_search = data.get("use_search", data.get("user_search"))
         conversation_history = data.get("history", [])
         if not user_message:
             raise HTTPException(status_code=400, detail="No message provided")
         if use_search is None:
             use_search = should_use_search(user_message)
         current_date = datetime.now().strftime("%Y-%m-%d")
         if use_search:
             system_content = SYSTEM_PROMPT_WITH_SEARCH.format(current_date=current_date)
         else:
             system_content = SYSTEM_PROMPT_NO_SEARCH.format(current_date=current_date)
         system_message = {"role": "system", "content": system_content}
         messages = [system_message] + conversation_history + [{"role": "user", "content": user_message}]
         llm_kwargs = {
+            "model": "unsloth/Qwen3-30B-A3B-GGUF",
+            "temperature": 0.7,
             "messages": messages,
+            "max_tokens": 2000,
+            "stream": True,   # <--- Enable streaming
         }
         if use_search:
             llm_kwargs["tools"] = available_tools
+            llm_kwargs["tool_choice"] = "auto"
+        # Streaming generator
+        def stream_llm_response():
+            response = client.chat.completions.create(**llm_kwargs)
+            for chunk in response:
+                # Each chunk is an object, get the content delta
+                if hasattr(chunk.choices[0].delta, "content"):
+                    content = chunk.choices[0].delta.content
+                    if content:
+                        yield content
+        # Return as streaming response
+        return StreamingResponse(stream_llm_response(), media_type="text/plain")
     except HTTPException:
         raise
         logger.error(f"Unexpected error in /chat endpoint: {e}")
         raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 # --- Health Check Endpoint ---
 @app.get("/")
 async def root():