Spaces:

Chrunos
/

chat

Running

App Files Files Community

Chrunos commited on Jul 1, 2025

Commit

3d6ca7e

verified ·

1 Parent(s): 17dccdc

Update app.py

Browse files

Files changed (1) hide show

app.py +390 -286

app.py CHANGED Viewed

@@ -1,64 +1,259 @@
 import os
-import re
 import logging
-import uuid
 import time
-from datetime import datetime, timezone, timedelta
 from collections import defaultdict
-from typing import Optional, Dict, Any
-import asyncio
-from concurrent.futures import ThreadPoolExecutor
-from fastapi import FastAPI, HTTPException, Body, BackgroundTasks, Path, Request
-from fastapi.responses import StreamingResponse
-from pydantic import BaseModel, Field
-import openai # For your custom API
-import google.generativeai as genai # For Gemini API
-from google.generativeai.types import GenerationConfig
-# --- Logging Configuration ---
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-    datefmt='%Y-%m-%d %H:%M:%S'
-)
 logger = logging.getLogger(__name__)
-# --- Configuration ---
-CUSTOM_API_BASE_URL_DEFAULT = "https://api-q3ieh5raqfuad9o8.aistudio-app.com/v1"
-CUSTOM_API_MODEL_DEFAULT = "gemma3:27b"
-DEFAULT_GEMINI_MODEL = "gemini-2.0-flash"
-GEMINI_REQUEST_TIMEOUT_SECONDS = 300
-# --- In-Memory Task Storage ---
-tasks_db: Dict[str, Dict[str, Any]] = {}
-# --- Pydantic Models ---
-class ChatPayload(BaseModel):
-    message: str
-    temperature: float = Field(0.6, ge=0.0, le=1.0)
-class GeminiTaskRequest(BaseModel):
-    message: str
-    url: Optional[str] = None
-    gemini_model: Optional[str] = None
-    api_key: Optional[str] = Field(None, description="Gemini API Key (optional; uses Space secret if not provided)")
-class TaskSubmissionResponse(BaseModel):
-    task_id: str
-    status: str
-    task_detail_url: str
-class TaskStatusResponse(BaseModel):
-    task_id: str
-    status: str
-    submitted_at: datetime
-    last_updated_at: datetime
-    result: Optional[str] = None
-    error: Optional[str] = None
-    # request_params: Optional[Dict[str, Any]] = None # Optionally return original params
 # Rate limiting dictionary
 class RateLimiter:
@@ -95,7 +290,7 @@ class RateLimiter:
         return len(self.requests[user_ip])
-# Initialize rate limiter with 100 requests per day
 rate_limiter = RateLimiter(
     max_requests=50,
     time_window=timedelta(days=1)
@@ -129,110 +324,9 @@ class ApiRotator:
         self.last_successful_index = index
-# --- FastAPI App Initialization ---
-app = FastAPI(
-    title="Dual Chat & Async Gemini API",
-    description="Made by Cody from chrunos.com.",
-    version="2.0.0"
-)
-# --- Helper Functions ---
-def is_video_url_for_gemini(url: Optional[str]) -> bool:
-    if not url:
-        return False
-    # Use raw strings (r"...") for regular expressions to avoid SyntaxWarnings
-    youtube_regex = (
-        r'(https_?://)?(www\.)?'
-        r'(youtube|youtu|youtube-nocookie)\.(com|be)/'  # Changed to raw string
-        r'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})'  # Changed to raw string
-    )
-    # This regex was likely fine as it didn't have ambiguous escapes, but good practice to make it raw too
-    googleusercontent_youtube_regex = r'https_?://googleusercontent\.com/youtube\.com/\w+'
-    return re.match(youtube_regex, url) is not None or \
-           re.match(googleusercontent_youtube_regex, url) is not None
-async def process_gemini_request_background(
-    task_id: str,
-    user_message: str,
-    input_url: Optional[str],
-    requested_gemini_model: str,
-    gemini_key_to_use: str
-):
-    logger.info(f"[Task {task_id}] Starting background Gemini processing. Model: {requested_gemini_model}, URL: {input_url}")
-    tasks_db[task_id]["status"] = "PROCESSING"
-    tasks_db[task_id]["last_updated_at"] = datetime.now(timezone.utc)
-    try:
-        genai.configure(api_key=gemini_key_to_use)
-        model_instance = genai.GenerativeModel(model_name=requested_gemini_model)
-        content_parts = [{"text": user_message}]
-        if input_url and is_video_url_for_gemini(input_url):
-            logger.info(f"[Task {task_id}] Adding video URL to Gemini content: {input_url}")
-            content_parts.append({
-                "file_data": {
-                    "mime_type": "video/youtube", # Or let Gemini infer
-                    "file_uri": input_url
-                }
-            })
-        gemini_contents = [{"parts": content_parts}]
-        generation_config = GenerationConfig(candidate_count=1)
-        request_options = {"timeout": GEMINI_REQUEST_TIMEOUT_SECONDS}
-        logger.info(f"[Task {task_id}] Sending request to Gemini API...")
-        response = await model_instance.generate_content_async(
-            gemini_contents,
-            stream=False, # Collect full response for async task
-            generation_config=generation_config,
-            request_options=request_options
-        )
-        # Assuming response.text contains the full aggregated text
-        # If using a model version that streams even for non-stream call, aggregate it:
-        full_response_text = ""
-        if hasattr(response, 'text') and response.text:
-            full_response_text = response.text
-        elif hasattr(response, 'parts'): # Check for newer API structures if .text is not primary
-            for part in response.parts:
-                if hasattr(part, 'text'):
-                    full_response_text += part.text
-        else: # Fallback for safety if structure is unexpected or if it's an iterable of chunks
-            # This part might need adjustment based on actual non-streaming response object
-            # For now, assuming generate_content_async with stream=False gives a response with .text
-            # or we need to iterate if it's still a stream internally for some models
-            logger.warning(f"[Task {task_id}] Gemini response structure not as expected or empty. Response: {response}")
-        if not full_response_text and response.prompt_feedback and response.prompt_feedback.block_reason:
-            block_reason_name = response.prompt_feedback.block_reason.name if hasattr(response.prompt_feedback.block_reason, 'name') else str(response.prompt_feedback.block_reason)
-            logger.warning(f"[Task {task_id}] Gemini content blocked: {block_reason_name}")
-            tasks_db[task_id]["status"] = "FAILED"
-            tasks_db[task_id]["error"] = f"Content blocked by Gemini due to: {block_reason_name}"
-        elif full_response_text:
-            logger.info(f"[Task {task_id}] Gemini processing successful. Result length: {len(full_response_text)}")
-            tasks_db[task_id]["status"] = "COMPLETED"
-            tasks_db[task_id]["result"] = full_response_text
-        else:
-            logger.warning(f"[Task {task_id}] Gemini processing completed but no text content found and no block reason.")
-            tasks_db[task_id]["status"] = "FAILED"
-            tasks_db[task_id]["error"] = "Gemini returned no content and no specific block reason."
-    except Exception as e:
-        logger.error(f"[Task {task_id}] Error during Gemini background processing: {e}", exc_info=True)
-        tasks_db[task_id]["status"] = "FAILED"
-        tasks_db[task_id]["error"] = str(e)
-    finally:
-        tasks_db[task_id]["last_updated_at"] = datetime.now(timezone.utc)
-# --- API Endpoints ---
-@app.post("/chat", response_class=StreamingResponse)
-async def direct_chat(payload: ChatPayload, request: Request):
-    logger.info(f"Direct chat request received. Temperature: {payload.temperature}, Message: '{payload.message[:50]}...'")
     user_ip = get_user_ip(request)
     if rate_limiter.is_rate_limited(user_ip):
@@ -244,138 +338,148 @@ async def direct_chat(payload: ChatPayload, request: Request):
                 "url": "https://t.me/chrunoss"
             }
         )
-    custom_api_key_secret = os.getenv("CUSTOM_API_SECRET_KEY")
-    custom_api_base_url = os.getenv("CUSTOM_API_BASE_URL", CUSTOM_API_BASE_URL_DEFAULT)
-    custom_api_model = os.getenv("CUSTOM_API_MODEL", CUSTOM_API_MODEL_DEFAULT)
-    if not custom_api_key_secret:
-        logger.error("Custom API key ('CUSTOM_API_SECRET_KEY') is not configured for /chat.")
-        raise HTTPException(status_code=500, detail="Custom API key not configured.")
-    async def custom_api_streamer():
-        client = None
-        try:
-            logger.info("Sending request to Custom API for /chat.")
-            # Use AsyncOpenAI with proper configuration
-            from openai import AsyncOpenAI
-            client = AsyncOpenAI(
-                api_key=custom_api_key_secret,
-                base_url=custom_api_base_url,
-                timeout=60.0  # Longer timeout for gemma3:27b model
-            )
-            stream = await client.chat.completions.create(
-                model=custom_api_model,
-                temperature=payload.temperature,
-                messages=[{"role": "user", "content": payload.message}],
-                stream=True
-            )
-            async for chunk in stream:
-                try:
-                    # Exact same logic as your working code
-                    if hasattr(chunk.choices[0].delta, "reasoning_content") and chunk.choices[0].delta.reasoning_content:
-                        yield chunk.choices[0].delta.reasoning_content
-                    elif chunk.choices[0].delta.content is not None:  # Handle None explicitly
-                        yield chunk.choices[0].delta.content
-                except (IndexError, AttributeError) as e:
-                    # Skip malformed chunks silently (some APIs send empty chunks)
-                    continue
-                except Exception as e:
-                    logger.warning(f"Skipping chunk due to error: {e}")
-                    continue
-        except Exception as e:
-            logger.error(f"Error during Custom API call for /chat: {e}", exc_info=True)
-            # Handle specific connection errors with retry suggestion
-            if "peer closed connection" in str(e) or "incomplete chunked read" in str(e):
-                yield "Connection interrupted. Please try again."
-            else:
-                yield f"Error processing with Custom API: {str(e)}"
-        finally:
-            if client:
-                try:
-                    await client.close()
-                except Exception as cleanup_error:
-                    logger.warning(f"Error closing OpenAI client: {cleanup_error}")
-    return StreamingResponse(
-        custom_api_streamer(),
-        media_type="text/plain",
-        headers={
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-        }
-    )
-@app.post("/gemini/submit_task", response_model=TaskSubmissionResponse)
-async def submit_gemini_task(request: GeminiTaskRequest, background_tasks: BackgroundTasks):
-    task_id = str(uuid.uuid4())
-    logger.info(f"Received Gemini task submission. Assigning Task ID: {task_id}. Message: '{request.message[:50]}...'")
-    gemini_api_key_from_request = request.api_key
-    gemini_api_key_secret = os.getenv("GEMINI_API_KEY")
-    key_to_use = gemini_api_key_from_request
-    if not key_to_use:
-        logger.error(f"[Task {task_id}] Gemini API Key missing for task submission.")
-        raise HTTPException(status_code=400, detail="Gemini API Key required.")
-    requested_model = request.gemini_model or DEFAULT_GEMINI_MODEL
-    logger.info(key_to_use)
-    current_time = datetime.now(timezone.utc)
-    tasks_db[task_id] = {
-        "status": "PENDING",
-        "result": None,
-        "error": None,
-        "submitted_at": current_time,
-        "last_updated_at": current_time,
-        "request_params": request.model_dump() # Store original request
     }
-    background_tasks.add_task(
-        process_gemini_request_background,
-        task_id,
-        request.message,
-        request.url,
-        requested_model,
-        key_to_use
-    )
-    logger.info(f"[Task {task_id}] Task submitted to background processing.")
-    return TaskSubmissionResponse(
-        task_id=task_id,
-        status="PENDING",
-        task_detail_url=f"/gemini/task/{task_id}" # Provide the URL to poll
-    )
-@app.get("/gemini/task/{task_id}", response_model=TaskStatusResponse)
-async def get_gemini_task_status(task_id: str = Path(..., description="The ID of the task to retrieve")):
-    logger.info(f"Status query for Task ID: {task_id}")
-    task = tasks_db.get(task_id)
-    if not task:
-        logger.warning(f"Task ID not found: {task_id}")
-        raise HTTPException(status_code=404, detail="Task ID not found.")
-    logger.info(f"[Task {task_id}] Current status: {task['status']}")
-    return TaskStatusResponse(
-        task_id=task_id,
-        status=task["status"],
-        submitted_at=task["submitted_at"],
-        last_updated_at=task["last_updated_at"],
-        result=task.get("result"),
-        error=task.get("error"),
-        # request_params=task.get("request_params") # Optionally include original params
-    )
-@app.get("/")
-async def read_root():
-    logger.info("Root endpoint '/' accessed (health check).")
-    return {"message": "API for Direct Chat and Async Gemini Tasks is running."}

 import os
+import json
+import requests
+from datetime import datetime, timedelta
+from typing import List, Dict, Optional
+from fastapi import FastAPI, Request, HTTPException, Depends
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse # <-- Import StreamingResponse
+import asyncio # <-- Import asyncio
+from openai import OpenAI
 import logging
 import time
 from collections import defaultdict
+# --- Security Helper Functions ---
+def verify_origin(request: Request):
+    """Verify that the request comes from an allowed origin for /chat endpoint"""
+    origin = request.headers.get("origin")
+    referer = request.headers.get("referer")
+    allowed_origins = [
+        "https://chrunos.com",
+        "https://www.chrunos.com"
+    ]
+    # Allow localhost for development (you can remove this in production)
+    if origin and any(origin.startswith(local) for local in ["http://localhost:", "http://127.0.0.1:"]):
+        return True
+    # Check origin header
+    if origin in allowed_origins:
+        return True
+    # Check referer header as fallback
+    if referer and any(referer.startswith(allowed) for allowed in allowed_origins):
+        return True
+    raise HTTPException(
+        status_code=403,
+        detail="Access denied: This endpoint is only accessible from chrunos.com"
+    )
+# --- Configure Logging ---
+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# --- Load API Keys from Environment Variables ---
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+GOOGLE_CX = os.getenv("GOOGLE_CX")
+LLM_API_KEY = os.getenv("LLM_API_KEY")
+LLM_BASE_URL = os.getenv("LLM_BASE_URL", "https://api-15i2e8ze256bvfn6.aistudio-app.com/v1")
+# --- Enhanced System Prompt ---
+SYSTEM_PROMPT_WITH_SEARCH = """You are an intelligent AI assistant with access to real-time web search capabilities.
+IMPORTANT: When search tools are available to you, you should USE them for any query that could benefit from current, recent, or specific factual information, even if you have some knowledge about the topic from your training data.
+**When to use search (be proactive about using search when available):**
+- Any mention of recent events, current affairs, or "latest" information
+- Specific facts that could have changed since your training
+- Statistics, prices, scores, or numerical data
+- News, announcements, or current status of anything
+- When the user explicitly asks for current information
+- Any factual query where fresh information would be valuable
+**Response Guidelines:**
+1. Use search tools when they're available and relevant to the query
+2. Synthesize information from multiple sources when possible
+3. Clearly indicate when information comes from search results
+4. Provide comprehensive, well-structured answers
+5. Cite sources appropriately
+6. If search results are contradictory, mention the discrepancy
+**Current Context**: Today's date is {current_date}. Prioritize recent information when available.
+Remember: When in doubt about whether to search, lean towards using the search tool for more accurate and current information."""
+SYSTEM_PROMPT_NO_SEARCH = """You are an intelligent AI assistant. Provide helpful, accurate, and comprehensive responses based on your training data.
+When you don't have current information about recent events or changing data, acknowledge this limitation and suggest that the user might want to search for the most up-to-date information.
+**Current Context**: Today's date is {current_date}, but your knowledge has a cutoff date and may not include the most recent information."""
+# --- Enhanced Web Search Tool Implementation ---
+def Google Search_tool(queries: List[str], num_results: int = 5) -> List[Dict]:
+    """
+    Enhanced Google Custom Search with better error handling and result formatting
+    """
+    if not GOOGLE_API_KEY or not GOOGLE_CX:
+        logger.error("GOOGLE_API_KEY or GOOGLE_CX environment variables not set.")
+        return []
+    if not queries or not queries[0].strip():
+        logger.warning("Empty search query provided")
+        return []
+    query = queries[0].strip()
+    logger.info(f"Executing Google Custom Search for: '{query}'")
+    search_url = "https://www.googleapis.com/customsearch/v1"
+    params = {
+        "key": GOOGLE_API_KEY,
+        "cx": GOOGLE_CX,
+        "q": query,
+        "num": min(num_results, 10),  # Google API max is 10
+        "dateRestrict": "m6"  # Prioritize results from last 6 months for freshness
+    }
+    try:
+        response = requests.get(search_url, params=params, timeout=15)
+        response.raise_for_status()
+        search_results = response.json()
+        if "items" not in search_results:
+            logger.warning(f"No search results found for query: '{query}'")
+            return []
+        # Enhanced result parsing with better data validation
+        parsed_results = []
+        for item in search_results.get("items", []):
+            title = item.get("title", "").strip()
+            url = item.get("link", "").strip()
+            snippet = item.get("snippet", "").strip()
+            # Skip results with missing essential information
+            if not title or not url or not snippet:
+                continue
+            # Extract publication date if available
+            pub_date = None
+            if "pagemap" in item and "metatags" in item["pagemap"]:
+                for meta in item["pagemap"]["metatags"]:
+                    if "article:published_time" in meta:
+                        pub_date = meta["article:published_time"]
+                        break
+            parsed_results.append({
+                "source_title": title,
+                "url": url,
+                "snippet": snippet,
+                "published_date": pub_date,
+                "domain": url.split('/')[2] if '/' in url else url
+            })
+        logger.info(f"Successfully parsed {len(parsed_results)} search results")
+        return parsed_results
+    except requests.exceptions.Timeout:
+        logger.error("Google search request timed out")
+        return []
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Error during Google search request: {e}")
+        return []
+    except Exception as e:
+        logger.error(f"Unexpected error in Google Search_tool: {e}")
+        return []
+def format_search_results_for_llm(search_results: List[Dict]) -> str:
+    """
+    Format search results with enhanced context for better LLM understanding
+    """
+    if not search_results:
+        return "No relevant search results were found for this query."
+    current_date = datetime.now().strftime("%Y-%m-%d")
+    formatted_results = [f"Search Results (Retrieved on {current_date}):\n"]
+    for i, result in enumerate(search_results, 1):
+        formatted_result = f"\n--- Result {i} ---"
+        formatted_result += f"\nTitle: {result['source_title']}"
+        formatted_result += f"\nSource: {result['domain']}"
+        formatted_result += f"\nURL: {result['url']}"
+        if result.get('published_date'):
+            formatted_result += f"\nPublished: {result['published_date']}"
+        formatted_result += f"\nContent: {result['snippet']}"
+        formatted_results.append(formatted_result)
+    formatted_results.append(f"\n--- End of Search Results ---\n")
+    formatted_results.append("Please synthesize this information to provide a comprehensive answer to the user's question. If the search results contain conflicting information, please note the discrepancy. Always cite your sources when using information from the search results.")
+    return "\n".join(formatted_results)
+# --- FastAPI Application Setup ---
+app = FastAPI(title="AI Chatbot with Enhanced Search", version="2.0.0")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=[
+        "https://chrunos.com",
+        "https://www.chrunos.com",
+        "http://localhost:3000",  # For local development
+        "http://localhost:8000",  # For local development
+    ],
+    allow_credentials=True,
+    allow_methods=["GET", "POST", "OPTIONS"],
+    allow_headers=["*"],
+)
+# --- OpenAI Client Initialization ---
+if not LLM_API_KEY or not LLM_BASE_URL:
+    logger.error("LLM_API_KEY or LLM_BASE_URL not configured")
+    client = None
+else:
+    client = OpenAI(api_key=LLM_API_KEY, base_url=LLM_BASE_URL)
+    logger.info("OpenAI client initialized successfully")
+# --- Enhanced Tool Definition ---
+available_tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "Google Search",
+            "description": "REQUIRED for current information: Performs a Google search for recent events, current data, latest news, statistics, prices, or any information that changes frequently. Use this tool proactively when the user's query could benefit from up-to-date information, even if you have some relevant knowledge from training data.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "The search query. Be specific and include relevant keywords. For recent events, include time-related terms like 'latest', '2024', 'recent', etc."
+                    }
+                },
+                "required": ["query"]
+            }
+        }
+    }
+]
+def should_use_search(message: str) -> bool:
+    """
+    Intelligent decision making for when to enable search based on message content
+    """
+    search_indicators = [
+        "latest", "recent", "current", "now", "today", "this year", "2024", "2025",
+        "news", "update", "what's happening", "status", "price", "stock",
+        "weather", "score", "results", "announcement", "release"
+    ]
+    factual_indicators = [
+        "who is", "what is", "where is", "when did", "how many", "statistics",
+        "data", "information about", "tell me about", "facts about"
+    ]
+    message_lower = message.lower()
+    # Strong indicators for search
+    if any(indicator in message_lower for indicator in search_indicators):
+        return True
+    # Moderate indicators for search (factual queries)
+    if any(indicator in message_lower for indicator in factual_indicators):
+        return True
+    return False
 # Rate limiting dictionary
 class RateLimiter:
         return len(self.requests[user_ip])
+# Initialize rate limiter with 50 requests per day
 rate_limiter = RateLimiter(
     max_requests=50,
     time_window=timedelta(days=1)
         self.last_successful_index = index
+# --- Enhanced Chatbot Endpoint (with Streaming) ---
+@app.post("/chat")
+async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
     user_ip = get_user_ip(request)
     if rate_limiter.is_rate_limited(user_ip):
                 "url": "https://t.me/chrunoss"
             }
         )
+    if not client:
+        raise HTTPException(status_code=500, detail="LLM client not configured")
+    try:
+        data = await request.json()
+        user_message = data.get("message", "").strip()
+        use_search = data.get("use_search")
+        if use_search is None:
+            use_search = data.get("user_search")
+        temperature = data.get("temperature", 0.7)
+        if not isinstance(temperature, (int, float)) or not 0 <= temperature <= 2:
+            temperature = 0.7
+        conversation_history = data.get("history", [])
+        if not user_message:
+            raise HTTPException(status_code=400, detail="No message provided")
+        if use_search is None:
+            use_search = should_use_search(user_message)
+        # --- Message and Tool Call Preparation (Same as before) ---
+        current_date = datetime.now().strftime("%Y-%m-%d")
+        system_content = SYSTEM_PROMPT_WITH_SEARCH.format(current_date=current_date) if use_search else SYSTEM_PROMPT_NO_SEARCH.format(current_date=current_date)
+        system_message = {"role": "system", "content": system_content}
+        messages = [system_message] + conversation_history + [{"role": "user", "content": user_message}]
+        llm_kwargs = {
+            "model": "unsloth/Qwen3-30B-A3B-GGUF",
+            "temperature": temperature,
+            "messages": messages,
+            "max_tokens": 2000
+        }
+        if use_search:
+            llm_kwargs["tools"] = available_tools
+            llm_kwargs["tool_choice"] = "auto"
+        # First LLM call (for tool decision) - This part remains blocking
+        llm_response = client.chat.completions.create(**llm_kwargs)
+        tool_calls = llm_response.choices[0].message.tool_calls
+        source_links = []
+        if tool_calls:
+            logger.info(f"Processing {len(tool_calls)} tool calls")
+            tool_outputs = []
+            for tool_call in tool_calls:
+                if tool_call.function.name == "Google Search":
+                    try:
+                        function_args = json.loads(tool_call.function.arguments)
+                        search_query = function_args.get("query", "").strip()
+                        if search_query:
+                            search_results = Google Search_tool([search_query], num_results=5)
+                            for result in search_results:
+                                source_links.append({
+                                    "title": result["source_title"],
+                                    "url": result["url"],
+                                    "domain": result["domain"]
+                                })
+                            formatted_results = format_search_results_for_llm(search_results)
+                            tool_outputs.append({
+                                "tool_call_id": tool_call.id,
+                                "output": formatted_results
+                            })
+                    except Exception as e:
+                        logger.error(f"Error processing tool call: {e}")
+            messages.append(llm_response.choices[0].message)
+            for output_item in tool_outputs:
+                messages.append({
+                    "role": "tool",
+                    "tool_call_id": output_item["tool_call_id"],
+                    "content": output_item["output"]
+                })
+        # --- MODIFICATION FOR STREAMING ---
+        async def response_generator():
+            """This async generator streams the final response."""
+            # First, yield metadata (like sources) as a single event
+            initial_data = {
+                "sources": source_links,
+                "search_used": bool(tool_calls),
+            }
+            yield f"data: {json.dumps(initial_data)}\n\n"
+            # This is the final API call that will actually be streamed
+            stream = client.chat.completions.create(
+                model="unsloth/Qwen3-30B-A3B-GGUF",
+                temperature=temperature,
+                messages=messages,
+                max_tokens=2000,
+                stream=True  # <-- Enable streaming from the AI
+            )
+            try:
+                for chunk in stream:
+                    content = chunk.choices[0].delta.content
+                    if content:
+                        # Yield each piece of content in SSE format
+                        chunk_data = {"response_chunk": content}
+                        yield f"data: {json.dumps(chunk_data)}\n\n"
+                        await asyncio.sleep(0) # Give up control to the event loop
+            finally:
+                # Signal the end of the stream to the client
+                yield "data: [DONE]\n\n"
+        # Return the StreamingResponse, which FastAPI will handle.
+        return StreamingResponse(response_generator(), media_type="text/event-stream")
+    except HTTPException:
+        raise
+    except json.JSONDecodeError:
+        logger.error("Invalid JSON in request body")
+        raise HTTPException(status_code=400, detail="Invalid JSON in request body")
+    except Exception as e:
+        logger.error(f"Unexpected error in /chat endpoint: {e}")
+        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
+# --- Health Check Endpoint ---
+@app.get("/")
+async def root():
+    return {
+        "message": "Enhanced AI Chatbot API is running",
+        "version": "2.0.0",
+        "features": ["Google Search Integration", "Intelligent Search Decision", "Enhanced Prompting", "Streaming Response"],
+        "timestamp": datetime.now().isoformat()
     }
+# --- Health Check Endpoint ---
+@app.get("/health")
+async def health_check():
+    health_status = {
+        "status": "healthy",
+        "timestamp": datetime.now().isoformat(),
+        "services": {
+            "llm_client": client is not None,
+            "Google Search": bool(GOOGLE_API_KEY and GOOGLE_CX)
+        }
+    }
+    return health_status