Spaces:

tecuts
/

chat

Running

App Files Files Community

tecuts commited on Jul 2, 2025

Commit

9601d52

verified ·

1 Parent(s): 13046df

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -284

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import os
 import json
 import asyncio
 import requests
-import re
 from datetime import datetime
 from typing import List, Dict, Optional
 from fastapi import FastAPI, Request, HTTPException, Depends
@@ -49,131 +48,80 @@ GOOGLE_CX = os.getenv("GOOGLE_CX")
 LLM_API_KEY = os.getenv("LLM_API_KEY")
 LLM_BASE_URL = os.getenv("LLM_BASE_URL", "https://api-15i2e8ze256bvfn6.aistudio-app.com/v1")
-# --- Enhanced System Prompts ---
-SYSTEM_PROMPT_WITH_SEARCH = """You are an intelligent AI assistant with access to real-time web search capabilities.
-IMPORTANT: When you need current information, recent events, or specific facts that might be outdated, you should explicitly request a search by including the phrase "SEARCH_NEEDED:" followed by your search query in your response.
-For example:
-- If asked about recent news: "SEARCH_NEEDED: latest news about [topic]"
-- If asked about current events: "SEARCH_NEEDED: current status of [event]"
-- If asked about recent developments: "SEARCH_NEEDED: recent developments in [field]"
-**Response Guidelines:**
-1. Use search for queries that need current, recent, or specific factual information
-2. Be proactive in identifying when search is needed
-3. Synthesize information from multiple sources when search results are provided
-4. Clearly indicate when information comes from search results
-5. Provide comprehensive, well-structured answers
-6. Cite sources appropriately
 Current date: {current_date}"""
 SYSTEM_PROMPT_NO_SEARCH = """You are an intelligent AI assistant. Provide helpful, accurate, and comprehensive responses based on your training data.
 Current date: {current_date}"""
-# --- Optimized Web Search Tool ---
-async def google_search_tool_async(query: str, num_results: int = 3) -> List[Dict]:
-    """
-    Async Google Custom Search - reduced results for faster response
-    """
     if not GOOGLE_API_KEY or not GOOGLE_CX or not query.strip():
         return []
-    logger.info(f"Executing search for: '{query}'")
-    search_url = "https://www.googleapis.com/customsearch/v1"
     params = {
         "key": GOOGLE_API_KEY,
         "cx": GOOGLE_CX,
         "q": query.strip(),
-        "num": min(num_results, 5),  # Reduced for speed
-        "dateRestrict": "m3"  # Last 3 months for freshness
     }
     try:
-        # Run in thread pool to avoid blocking
         loop = asyncio.get_event_loop()
         response = await loop.run_in_executor(
             None,
-            lambda: requests.get(search_url, params=params, timeout=10)
         )
         response.raise_for_status()
-        search_results = response.json()
-        if "items" not in search_results:
-            return []
-        parsed_results = []
-        for item in search_results.get("items", [])[:num_results]:  # Limit results
             title = item.get("title", "").strip()
             url = item.get("link", "").strip()
             snippet = item.get("snippet", "").strip()
             if title and url and snippet:
-                parsed_results.append({
-                    "source_title": title,
                     "url": url,
                     "snippet": snippet,
                     "domain": url.split('/')[2] if '/' in url else url
                 })
-        logger.info(f"Retrieved {len(parsed_results)} search results")
-        return parsed_results
     except Exception as e:
-        logger.error(f"Search error: {e}")
         return []
-def format_search_results_compact(search_results: List[Dict]) -> str:
-    """Compact formatting for faster processing"""
-    if not search_results:
-        return "No search results found."
-    formatted = ["Search Results:"]
-    for i, result in enumerate(search_results, 1):
-        formatted.append(f"\n{i}. {result['source_title']}")
-        formatted.append(f"   Source: {result['domain']}")
-        formatted.append(f"   Content: {result['snippet']}")
-    return "\n".join(formatted)
-# --- Check if query needs search ---
-def should_search(query: str, use_search: bool) -> Optional[str]:
-    """Determine if a query needs search and extract search terms"""
-    if not use_search:
-        return None
-    # Keywords that typically require current information
-    current_keywords = [
-        'today', 'recent', 'latest', 'current', 'now', 'this year', '2024', '2025',
-        'news', 'happening', 'update', 'development', 'status', 'price', 'stock',
-        'weather', 'score', 'result', 'election', 'covid', 'pandemic'
-    ]
-    query_lower = query.lower()
-    # Check for current-info keywords
-    if any(keyword in query_lower for keyword in current_keywords):
-        return query
-    # Check for questions about specific companies, products, or events
-    question_patterns = [
-        r'what.*happened.*',
-        r'when.*did.*',
-        r'how.*is.*doing',
-        r'what.*the.*status',
-        r'is.*still.*',
-        r'has.*been.*',
-    ]
-    if any(re.search(pattern, query_lower) for pattern in question_patterns):
-        return query
-    return None
 # --- FastAPI Application Setup ---
-app = FastAPI(title="Streaming AI Chatbot", version="2.1.0")
 app.add_middleware(
     CORSMiddleware,
@@ -194,227 +142,71 @@ if not LLM_API_KEY or not LLM_BASE_URL:
     client = None
 else:
     client = OpenAI(api_key=LLM_API_KEY, base_url=LLM_BASE_URL)
-    logger.info("OpenAI client initialized successfully")
-# --- Tool Definition (keeping for potential future use) ---
-available_tools = [
-    {
-        "type": "function",
-        "function": {
-            "name": "google_search",
-            "description": "Search Google for current information, recent events, or specific facts.",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "query": {
-                        "type": "string",
-                        "description": "Search query with relevant keywords"
-                    }
-                },
-                "required": ["query"]
-            }
-        }
-    }
-]
-# --- Enhanced Streaming Response Generator ---
 async def generate_streaming_response(messages: List[Dict], use_search: bool, temperature: float, original_query: str):
-    """Generate streaming response with intelligent search triggering"""
     try:
         source_links = []
-        search_performed = False
-        # Check if we should proactively search
-        proactive_search_query = should_search(original_query, use_search)
-        if proactive_search_query:
-            logger.info(f"Proactive search triggered for: {proactive_search_query}")
-            yield f"data: {json.dumps({'type': 'status', 'data': 'Searching for current information...'})}\n\n"
-            search_results = await google_search_tool_async(proactive_search_query, 4)
             if search_results:
-                search_context = format_search_results_compact(search_results)
                 # Add search context to messages
-                enhanced_messages = messages + [{
                     "role": "system",
-                    "content": f"Recent search results for your reference:\n\n{search_context}\n\nPlease use this information to provide a comprehensive and up-to-date response."
                 }]
-                for result in search_results:
-                    source_links.append({
-                        "title": result["source_title"],
-                        "url": result["url"],
-                        "domain": result["domain"]
-                    })
-                search_performed = True
-                messages = enhanced_messages
-        # Initial LLM call with streaming
         llm_kwargs = {
-            "model": "unsloth/Qwen3-30B-A3B-GGUF",
             "temperature": temperature,
             "messages": messages,
-            "max_tokens": 2000,
-            "stream": True
         }
-        # Try function calling as backup (in case model supports it)
-        if use_search and not search_performed:
-            llm_kwargs["tools"] = available_tools
-            llm_kwargs["tool_choice"] = "auto"
-        response_content = ""
-        tool_calls_data = []
-        yield f"data: {json.dumps({'type': 'status', 'data': 'Generating response...'})}\n\n"
         # Stream the response
         stream = client.chat.completions.create(**llm_kwargs)
         for chunk in stream:
-            delta = chunk.choices[0].delta
-            # Handle content streaming
-            if delta.content:
-                content_chunk = delta.content
-                response_content += content_chunk
-                # Check for search requests in the content
-                if use_search and not search_performed and "SEARCH_NEEDED:" in content_chunk:
-                    # Extract search query from the content
-                    search_match = re.search(r'SEARCH_NEEDED:\s*(.+?)(?:\n|$)', content_chunk)
-                    if search_match:
-                        search_query = search_match.group(1).strip()
-                        logger.info(f"Search requested by model: {search_query}")
-                        # Don't yield this chunk yet, we'll search first
-                        continue
-                yield f"data: {json.dumps({'type': 'content', 'data': content_chunk})}\n\n"
-            # Handle tool calls (backup method)
-            if delta.tool_calls:
-                for tool_call in delta.tool_calls:
-                    if len(tool_calls_data) <= tool_call.index:
-                        tool_calls_data.extend([{"id": "", "function": {"name": "", "arguments": ""}}
-                                              for _ in range(tool_call.index + 1 - len(tool_calls_data))])
-                    if tool_call.id:
-                        tool_calls_data[tool_call.index]["id"] = tool_call.id
-                    if tool_call.function.name:
-                        tool_calls_data[tool_call.index]["function"]["name"] = tool_call.function.name
-                    if tool_call.function.arguments:
-                        tool_calls_data[tool_call.index]["function"]["arguments"] += tool_call.function.arguments
-        # Handle model-requested search
-        if use_search and not search_performed and "SEARCH_NEEDED:" in response_content:
-            search_matches = re.findall(r'SEARCH_NEEDED:\s*(.+?)(?:\n|$)', response_content)
-            if search_matches:
-                yield f"data: {json.dumps({'type': 'status', 'data': 'Performing requested search...'})}\n\n"
-                # Execute all requested searches
-                search_tasks = [google_search_tool_async(query.strip()) for query in search_matches]
-                search_results_list = await asyncio.gather(*search_tasks, return_exceptions=True)
-                all_results = []
-                for results in search_results_list:
-                    if isinstance(results, list):
-                        all_results.extend(results)
-                if all_results:
-                    search_context = format_search_results_compact(all_results)
-                    for result in all_results:
-                        source_links.append({
-                            "title": result["source_title"],
-                            "url": result["url"],
-                            "domain": result["domain"]
-                        })
-                    # Generate new response with search results
-                    search_messages = messages + [{
-                        "role": "system",
-                        "content": f"Search Results:\n\n{search_context}\n\nPlease provide a comprehensive response based on these search results."
-                    }]
-                    final_stream = client.chat.completions.create(
-                        model="unsloth/Qwen3-30B-A3B-GGUF",
-                        temperature=temperature,
-                        messages=search_messages,
-                        max_tokens=2000,
-                        stream=True
-                    )
-                    for chunk in final_stream:
-                        if chunk.choices[0].delta.content:
-                            content = chunk.choices[0].delta.content
-                            yield f"data: {json.dumps({'type': 'content', 'data': content})}\n\n"
-                    search_performed = True
-        # Process function-based tool calls (backup method)
-        elif tool_calls_data and any(tc["function"]["name"] for tc in tool_calls_data):
-            yield f"data: {json.dumps({'type': 'status', 'data': 'Executing search tools...'})}\n\n"
-            search_tasks = []
-            for tool_call in tool_calls_data:
-                if tool_call["function"]["name"] == "google_search":
-                    try:
-                        args = json.loads(tool_call["function"]["arguments"])
-                        query = args.get("query", "").strip()
-                        if query:
-                            search_tasks.append(google_search_tool_async(query))
-                            logger.info(f"Function call search: {query}")
-                    except json.JSONDecodeError:
-                        continue
-            if search_tasks:
-                search_results_list = await asyncio.gather(*search_tasks, return_exceptions=True)
-                all_results = []
-                for results in search_results_list:
-                    if isinstance(results, list):
-                        all_results.extend(results)
-                        for result in results:
-                            source_links.append({
-                                "title": result["source_title"],
-                                "url": result["url"],
-                                "domain": result["domain"]
-                            })
-                if all_results:
-                    search_context = format_search_results_compact(all_results)
-                    search_messages = messages + [{
-                        "role": "system",
-                        "content": f"{search_context}\n\nPlease provide a comprehensive response based on the search results above."
-                    }]
-                    final_stream = client.chat.completions.create(
-                        model="unsloth/Qwen3-30B-A3B-GGUF",
-                        temperature=temperature,
-                        messages=search_messages,
-                        max_tokens=2000,
-                        stream=True
-                    )
-                    for chunk in final_stream:
-                        if chunk.choices[0].delta.content:
-                            content = chunk.choices[0].delta.content
-                            yield f"data: {json.dumps({'type': 'content', 'data': content})}\n\n"
-                    search_performed = True
-        # Send sources and completion
         if source_links:
             yield f"data: {json.dumps({'type': 'sources', 'data': source_links})}\n\n"
-        yield f"data: {json.dumps({'type': 'done', 'data': {'search_used': search_performed}})}\n\n"
     except Exception as e:
-        logger.error(f"Streaming error: {e}")
         yield f"data: {json.dumps({'type': 'error', 'data': str(e)})}\n\n"
 # --- Streaming Chat Endpoint ---
@@ -427,7 +219,7 @@ async def chat_stream_endpoint(request: Request, _: None = Depends(verify_origin
         data = await request.json()
         user_message = data.get("message", "").strip()
         use_search = data.get("use_search", False)
-        temperature = max(0, min(2, data.get("temperature", 0.7)))
         conversation_history = data.get("history", [])
         if not user_message:
@@ -438,7 +230,7 @@ async def chat_stream_endpoint(request: Request, _: None = Depends(verify_origin
         system_content = (SYSTEM_PROMPT_WITH_SEARCH if use_search else SYSTEM_PROMPT_NO_SEARCH).format(current_date=current_date)
         messages = [{"role": "system", "content": system_content}] + conversation_history + [{"role": "user", "content": user_message}]
-        logger.info(f"Stream request - search: {use_search}, temp: {temperature}, query: {user_message[:50]}...")
         return StreamingResponse(
             generate_streaming_response(messages, use_search, temperature, user_message),
@@ -446,12 +238,19 @@ async def chat_stream_endpoint(request: Request, _: None = Depends(verify_origin
             headers={
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
-                "X-Accel-Buffering": "no"
             }
         )
     except json.JSONDecodeError:
         raise HTTPException(status_code=400, detail="Invalid JSON")
     except Exception as e:
-        logger.error(f"Stream endpoint error: {e}")
-        raise HTTPException(status_code=500, detail=str(e))

 import json
 import asyncio
 import requests
 from datetime import datetime
 from typing import List, Dict, Optional
 from fastapi import FastAPI, Request, HTTPException, Depends
 LLM_API_KEY = os.getenv("LLM_API_KEY")
 LLM_BASE_URL = os.getenv("LLM_BASE_URL", "https://api-15i2e8ze256bvfn6.aistudio-app.com/v1")
+# --- Simplified System Prompts ---
+SYSTEM_PROMPT_WITH_SEARCH = """You are an intelligent AI assistant with access to current web search results.
+Use the provided search results to give accurate, up-to-date responses.
+Always reference and cite the search results when relevant.
 Current date: {current_date}"""
 SYSTEM_PROMPT_NO_SEARCH = """You are an intelligent AI assistant. Provide helpful, accurate, and comprehensive responses based on your training data.
 Current date: {current_date}"""
+# --- Fast Web Search Tool ---
+async def fast_google_search(query: str, num_results: int = 4) -> List[Dict]:
+    """Fast Google Custom Search with minimal processing"""
     if not GOOGLE_API_KEY or not GOOGLE_CX or not query.strip():
         return []
+    logger.info(f"Searching: '{query}'")
     params = {
         "key": GOOGLE_API_KEY,
         "cx": GOOGLE_CX,
         "q": query.strip(),
+        "num": num_results,
+        "dateRestrict": "m6"  # Last 6 months
     }
     try:
         loop = asyncio.get_event_loop()
         response = await loop.run_in_executor(
             None,
+            lambda: requests.get(
+                "https://www.googleapis.com/customsearch/v1",
+                params=params,
+                timeout=12  # Faster timeout
+            )
         )
         response.raise_for_status()
+        data = response.json()
+        results = []
+        for item in data.get("items", [])[:num_results]:
             title = item.get("title", "").strip()
             url = item.get("link", "").strip()
             snippet = item.get("snippet", "").strip()
             if title and url and snippet:
+                results.append({
+                    "title": title,
                     "url": url,
                     "snippet": snippet,
                     "domain": url.split('/')[2] if '/' in url else url
                 })
+        logger.info(f"Found {len(results)} results")
+        return results
     except Exception as e:
+        logger.error(f"Search failed: {e}")
         return []
+def format_search_context(results: List[Dict]) -> str:
+    """Fast search result formatting"""
+    if not results:
+        return "No search results available."
+    context = ["=== SEARCH RESULTS ==="]
+    for i, result in enumerate(results, 1):
+        context.append(f"\n[{i}] {result['title']}")
+        context.append(f"Source: {result['domain']}")
+        context.append(f"Content: {result['snippet']}")
+    return "\n".join(context)
 # --- FastAPI Application Setup ---
+app = FastAPI(title="Streaming AI Chatbot", version="2.2.0")
 app.add_middleware(
     CORSMiddleware,
     client = None
 else:
     client = OpenAI(api_key=LLM_API_KEY, base_url=LLM_BASE_URL)
+    logger.info("OpenAI client initialized")
+# --- Optimized Streaming Response Generator ---
 async def generate_streaming_response(messages: List[Dict], use_search: bool, temperature: float, original_query: str):
+    """Fast streaming response with optional search"""
     try:
         source_links = []
+        # ALWAYS search when use_search is True
+        if use_search:
+            yield f"data: {json.dumps({'type': 'status', 'data': 'Searching...'})}\n\n"
+            # Fast search execution
+            search_results = await fast_google_search(original_query, 4)
             if search_results:
+                # Format search context
+                search_context = format_search_context(search_results)
+                # Prepare source links for frontend
+                source_links = [{
+                    "title": result["title"],
+                    "url": result["url"],
+                    "domain": result["domain"]
+                } for result in search_results]
                 # Add search context to messages
+                messages = messages + [{
                     "role": "system",
+                    "content": f"{search_context}\n\nBased on the search results above, provide a comprehensive and accurate response."
                 }]
+                logger.info(f"Added {len(search_results)} search results to context")
+        # Generate response
+        yield f"data: {json.dumps({'type': 'status', 'data': 'Generating response...'})}\n\n"
+        # Optimized LLM parameters for speed
         llm_kwargs = {
+            "model": "unsloth/Qwen3-30B-A3B-GGUF",
             "temperature": temperature,
             "messages": messages,
+            "max_tokens": 2500,  # Reduced for faster response
+            "stream": True,
+            "top_p": 0.9,  # Optimize sampling
         }
         # Stream the response
         stream = client.chat.completions.create(**llm_kwargs)
         for chunk in stream:
+            if chunk.choices[0].delta.content:
+                content = chunk.choices[0].delta.content
+                yield f"data: {json.dumps({'type': 'content', 'data': content})}\n\n"
+        # Send sources if available
         if source_links:
             yield f"data: {json.dumps({'type': 'sources', 'data': source_links})}\n\n"
+        # Send completion
+        yield f"data: {json.dumps({'type': 'done', 'data': {'search_used': use_search and bool(source_links)}})}\n\n"
     except Exception as e:
+        logger.error(f"Response generation failed: {e}")
         yield f"data: {json.dumps({'type': 'error', 'data': str(e)})}\n\n"
 # --- Streaming Chat Endpoint ---
         data = await request.json()
         user_message = data.get("message", "").strip()
         use_search = data.get("use_search", False)
+        temperature = max(0.1, min(1.5, data.get("temperature", 0.7)))  # Optimized range
         conversation_history = data.get("history", [])
         if not user_message:
         system_content = (SYSTEM_PROMPT_WITH_SEARCH if use_search else SYSTEM_PROMPT_NO_SEARCH).format(current_date=current_date)
         messages = [{"role": "system", "content": system_content}] + conversation_history + [{"role": "user", "content": user_message}]
+        logger.info(f"Request: search={use_search}, temp={temperature}")
         return StreamingResponse(
             generate_streaming_response(messages, use_search, temperature, user_message),
             headers={
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
+                "X-Accel-Buffering": "no",
+                "Access-Control-Allow-Origin": "*"  # For faster preflight
             }
         )
     except json.JSONDecodeError:
         raise HTTPException(status_code=400, detail="Invalid JSON")
     except Exception as e:
+        logger.error(f"Endpoint error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+# --- Health Check Endpoint ---
+@app.get("/health")
+async def health_check():
+    """Fast health check"""
+    return {"status": "ok", "timestamp": datetime.now().isoformat()}