Spaces:

pavan10504
/

search_bot

Sleeping

App Files Files Community

pavan10504 commited on Dec 23, 2025

Commit

c7968ae

verified ·

1 Parent(s): 1bc3301

Upload 24 files

Browse files

Files changed (24) hide show

.env +2 -0
.gitignore +3 -0
Dockerfile +13 -0
README.md +16 -10
app/__init__.py +1 -0
app/__pycache__/__init__.cpython-313.pyc +0 -0
app/__pycache__/main.cpython-313.pyc +0 -0
app/__pycache__/schemas.cpython-313.pyc +0 -0
app/agent/__init__.py +1 -0
app/agent/__pycache__/__init__.cpython-313.pyc +0 -0
app/agent/__pycache__/analyzer.cpython-313.pyc +0 -0
app/agent/__pycache__/gemini_client.cpython-313.pyc +0 -0
app/agent/__pycache__/orchestrator.cpython-313.pyc +0 -0
app/agent/__pycache__/reader.cpython-313.pyc +0 -0
app/agent/__pycache__/reasoner.cpython-313.pyc +0 -0
app/agent/__pycache__/search.cpython-313.pyc +0 -0
app/agent/analyzer.py +46 -0
app/agent/gemini_client.py +20 -0
app/agent/orchestrator.py +117 -0
app/agent/reasoner.py +71 -0
app/agent/search.py +18 -0
app/main.py +53 -0
app/schemas.py +10 -0
requirements.txt +0 -0

.env ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ TAVILY_API_KEY=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
2	+ GEMINI_API_KEY=AIzaSyBCbtPfycqvBAI9yo8JLvhXIxxQrAd9_kk

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+venv/
+__pycache__/
+.env

Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+FROM python:3.10-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY app ./app
+EXPOSE 7860
+# Hugging Face Spaces injects PORT; default to 7860 for local/dev.
+CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-7860}"]

README.md CHANGED Viewed

@@ -1,10 +1,16 @@
----
-title: Search Bot
-emoji: 🐢
-colorFrom: blue
-colorTo: green
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Web Bot Backend
+sdk: docker
+app_port: 7860
+---
+This Space runs the FastAPI backend.
+**Required secrets / environment variables**
+- `GEMINI_API_KEY`
+- `TAVILY_API_KEY`
+The container starts with:
+- `uvicorn app.main:app --host 0.0.0.0 --port $PORT` (Spaces provides `PORT`; defaults to `7860` locally)

app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

app/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (183 Bytes). View file

app/__pycache__/main.cpython-313.pyc ADDED Viewed

Binary file (2.65 kB). View file

app/__pycache__/schemas.cpython-313.pyc ADDED Viewed

Binary file (841 Bytes). View file

app/agent/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

app/agent/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (189 Bytes). View file

app/agent/__pycache__/analyzer.cpython-313.pyc ADDED Viewed

Binary file (1.79 kB). View file

app/agent/__pycache__/gemini_client.cpython-313.pyc ADDED Viewed

Binary file (918 Bytes). View file

app/agent/__pycache__/orchestrator.cpython-313.pyc ADDED Viewed

Binary file (3.73 kB). View file

app/agent/__pycache__/reader.cpython-313.pyc ADDED Viewed

Binary file (600 Bytes). View file

app/agent/__pycache__/reasoner.cpython-313.pyc ADDED Viewed

Binary file (3.39 kB). View file

app/agent/__pycache__/search.cpython-313.pyc ADDED Viewed

Binary file (764 Bytes). View file

app/agent/analyzer.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from app.agent.gemini_client import get_client, json_generation_config
+import json
+INTENT_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "domain": { "type": "string" },
+        "entity": { "type": "string" },
+        "refined_query": { "type": "string" }
+    },
+    "required": ["domain", "entity", "refined_query"]
+}
+def analyze_query(query: str):
+    client = get_client()
+    prompt = f"""
+You are a search query optimization assistant.
+User query: "{query}"
+Your task:
+1. Identify the domain (e.g., technology, automotive, science, business, etc.)
+2. Extract the main entity being asked about
+3. Refine the query for optimal web search results by:
+   - Making it more specific and searchable
+   - Adding relevant keywords (like "2024", "2025", "latest", "current" for recent info)
+   - Removing conversational filler words
+   - Keeping it concise and search-engine friendly
+Examples:
+- "what are the new tesla cars" → "latest Tesla car models 2024 2025"
+- "tell me about iphone features" → "iPhone 15 Pro features specifications"
+- "best laptops" → "best laptops 2024 reviews"
+Return the refined query that will get the best search results.
+"""
+    response = client.models.generate_content(
+        model="gemini-flash-latest",
+        contents=prompt,
+        config=json_generation_config(schema=INTENT_SCHEMA)
+    )
+    # Gemini now guarantees valid JSON
+    return json.loads(response.text)

app/agent/gemini_client.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import os
+from google import genai
+GEMINI_MODEL = "models/gemini-1.5-flash-001"
+def get_client():
+    return genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
+def json_generation_config(schema: dict | None = None):
+    config = {
+        "temperature": 0,
+        "top_p": 0.9,
+        "max_output_tokens": 1024,
+        "response_mime_type": "application/json",
+    }
+    if schema:
+        config["response_schema"] = schema
+    return config

app/agent/orchestrator.py ADDED Viewed

	@@ -0,0 +1,117 @@

+from app.agent.analyzer import analyze_query
+from app.agent.search import search_web
+from app.agent.reasoner import refine_answer
+def run_agent(query: str):
+    try:
+        analysis = analyze_query(query)
+    except Exception:
+        analysis = {
+            "domain": "unknown",
+            "entity": query,
+            "refined_query": query
+        }
+    search_response = search_web(analysis["refined_query"])
+    # Get Tavily's generated answer and sources
+    tavily_answer = search_response.get("answer", "")
+    results = search_response.get("results", [])
+    sources = [r["url"] for r in results[:3]]
+    # Optionally refine the answer with Gemini for better formatting
+    try:
+        if tavily_answer:
+            answer = refine_answer(query, tavily_answer)
+        else:
+            answer = "I couldn't find a clear answer from the available sources."
+    except Exception:
+        # Fallback to Tavily's answer if Gemini refinement fails
+        answer = tavily_answer or "I encountered an issue while analyzing the information."
+    return {
+        "interpreted_query": analysis["refined_query"],
+        "answer": answer,
+        "sources": sources
+    }
+def run_agent_stream(query: str):
+    """Generator that yields streaming events for frontend display"""
+    yield {"type": "step", "step": "start", "message": f"Received query: {query}"}
+    # Step 1: Analyze query
+    yield {"type": "step", "step": "analyzing", "message": "Analyzing query intent..."}
+    try:
+        analysis = analyze_query(query)
+        yield {
+            "type": "step",
+            "step": "analyzed",
+            "message": f"Refined query: {analysis['refined_query']}",
+            "data": {
+                "domain": analysis.get('domain', 'unknown'),
+                "entity": analysis.get('entity', query),
+                "refined_query": analysis['refined_query']
+            }
+        }
+    except Exception as e:
+        yield {"type": "step", "step": "analyzed", "message": f"Query analysis failed, using original query"}
+        analysis = {
+            "domain": "unknown",
+            "entity": query,
+            "refined_query": query
+        }
+    # Step 2: Search web
+    yield {"type": "step", "step": "searching", "message": "Searching the web..."}
+    search_response = search_web(analysis["refined_query"])
+    tavily_answer = search_response.get("answer", "")
+    results = search_response.get("results", [])
+    sources = [r["url"] for r in results]  # Get all sources
+    yield {
+        "type": "step",
+        "step": "searched",
+        "message": f"Found {len(results)} results",
+        "data": {
+            "sources": sources,
+            "raw_answer": tavily_answer
+        }
+    }
+    # Step 3: Refine answer
+    yield {"type": "step", "step": "refining", "message": "Refining answer with AI..."}
+    refined_answer = None
+    refinement_error = None
+    try:
+        if tavily_answer:
+            refined_answer = refine_answer(query, tavily_answer)
+            # Check if refinement actually changed anything
+            if refined_answer.strip() == tavily_answer.strip():
+                yield {"type": "step", "step": "refined", "message": "Answer refined (no changes needed)"}
+            else:
+                yield {"type": "step", "step": "refined", "message": "Answer refined with AI formatting"}
+        else:
+            refined_answer = "I couldn't find a clear answer from the available sources."
+            yield {"type": "step", "step": "refined", "message": "No raw answer to refine"}
+    except Exception as e:
+        refinement_error = str(e)
+        # Use raw Tavily answer as fallback
+        refined_answer = tavily_answer or "I encountered an issue while analyzing the information."
+        yield {"type": "step", "step": "refined", "message": f"Using raw Tavily answer (Gemini unavailable)"}
+    answer = refined_answer
+    # Final result
+    yield {
+        "type": "result",
+        "data": {
+            "interpreted_query": analysis["refined_query"],
+            "answer": answer,
+            "sources": sources,
+            "raw_answer": tavily_answer,
+            "refinement_error": refinement_error
+        }
+    }

app/agent/reasoner.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from app.agent.gemini_client import get_client
+def refine_answer(user_query: str, tavily_answer: str) -> str:
+    """
+    Refine Tavily's answer using Gemini for better formatting and structure.
+    """
+    client = get_client()
+    prompt = f"""You are a factual research assistant. You will receive an answer from a web search and need to refine it for better readability.
+Rules:
+- Keep ALL factual information intact - do not remove any details
+- When listing items (languages, products, etc), format as a clear bulleted list with markdown
+- Each item should be on its own line with a bullet point (-)
+- Add brief context or explanation where helpful
+- Make the answer well-structured and easy to scan
+- Do not add information not present in the original answer
+- Ensure the answer is complete and ends properly
+User question: "{user_query}"
+Original answer from search:
+{tavily_answer}
+Refine this answer with proper markdown formatting and clear structure. Make it easy to read while preserving all information.
+"""
+    response = client.models.generate_content(
+        model="gemini-flash-latest",
+        contents=prompt,
+        config={
+            "temperature": 0.3,
+            "max_output_tokens": 2000
+        }
+    )
+    raw_text = response.text or ""
+    if not raw_text.strip():
+        return tavily_answer  # Fallback to original
+    return clean_answer(raw_text)
+def clean_answer(text: str) -> str:
+    if not text:
+        return "No answer could be generated."
+    text = text.strip()
+    # Remove incomplete bullet points or dangling text
+    if text.endswith(":") or text.endswith("("):
+        # Find the last complete line
+        lines = text.split("\n")
+        for i in range(len(lines) - 1, -1, -1):
+            if lines[i].strip() and not lines[i].strip().endswith(":") and not lines[i].strip().endswith("("):
+                return "\n".join(lines[:i+1])
+        return "Unable to generate a complete answer from the sources."
+    # Check for incomplete last line (no period, ends mid-word)
+    lines = text.split("\n")
+    if lines and lines[-1].strip():
+        last_line = lines[-1].strip()
+        # If last line doesn't end with proper punctuation and looks incomplete
+        if not any(last_line.endswith(char) for char in ['.', '!', '?', ')', ']', '"', "'"]):
+            if len(lines) > 1:
+                return "\n".join(lines[:-1])
+    return text

app/agent/search.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from tavily import TavilyClient
+import os
+TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
+if not TAVILY_API_KEY:
+    raise RuntimeError("TAVILY_API_KEY is missing")
+client = TavilyClient(api_key=TAVILY_API_KEY)
+def search_web(query: str):
+    response = client.search(
+        query=query,
+        search_depth="basic",
+        max_results=5,
+        include_answer="advanced"
+    )
+    return response

app/main.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+from dotenv import load_dotenv
+load_dotenv()  # MUST be first
+DEBUG = os.getenv("DEBUG", "").lower() in {"1", "true", "yes"}
+if DEBUG:
+    print("TAVILY_API_KEY exists:", bool(os.getenv("TAVILY_API_KEY")))
+    print("GEMINI_API_KEY exists:", bool(os.getenv("GEMINI_API_KEY")))
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
+from app.agent.orchestrator import run_agent, run_agent_stream
+from app.schemas import QueryRequest, QueryResponse
+from fastapi.middleware.cors import CORSMiddleware
+import json
+app = FastAPI(title="Agentic Web Bot")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/")
+def root():
+    return {"status": "ok", "service": "Agentic Web Bot"}
+@app.get("/healthz")
+def healthz():
+    return {"ok": True}
+@app.post("/ask", response_model=QueryResponse)
+def ask_agent(payload: QueryRequest):
+    return run_agent(payload.query)
+@app.post("/ask/stream")
+def ask_agent_stream(payload: QueryRequest):
+    async def event_stream():
+        for event in run_agent_stream(payload.query):
+            yield f"data: {json.dumps(event)}\n\n"
+    return StreamingResponse(
+        event_stream(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+        }
+    )

app/schemas.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from pydantic import BaseModel
+from typing import List
+class QueryRequest(BaseModel):
+    query: str
+class QueryResponse(BaseModel):
+    interpreted_query: str
+    answer: str
+    sources: List[str]

requirements.txt ADDED Viewed

Binary file (2.4 kB). View file