pavan10504 commited on
Commit
c7968ae
·
verified ·
1 Parent(s): 1bc3301

Upload 24 files

Browse files
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ TAVILY_API_KEY=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
2
+ GEMINI_API_KEY=AIzaSyBCbtPfycqvBAI9yo8JLvhXIxxQrAd9_kk
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ venv/
2
+ __pycache__/
3
+ .env
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY app ./app
9
+
10
+ EXPOSE 7860
11
+
12
+ # Hugging Face Spaces injects PORT; default to 7860 for local/dev.
13
+ CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-7860}"]
README.md CHANGED
@@ -1,10 +1,16 @@
1
- ---
2
- title: Search Bot
3
- emoji: 🐢
4
- colorFrom: blue
5
- colorTo: green
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
1
+ ---
2
+ title: Web Bot Backend
3
+ sdk: docker
4
+ app_port: 7860
5
+ ---
6
+
7
+ This Space runs the FastAPI backend.
8
+
9
+ **Required secrets / environment variables**
10
+
11
+ - `GEMINI_API_KEY`
12
+ - `TAVILY_API_KEY`
13
+
14
+ The container starts with:
15
+
16
+ - `uvicorn app.main:app --host 0.0.0.0 --port $PORT` (Spaces provides `PORT`; defaults to `7860` locally)
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ 
app/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (183 Bytes). View file
 
app/__pycache__/main.cpython-313.pyc ADDED
Binary file (2.65 kB). View file
 
app/__pycache__/schemas.cpython-313.pyc ADDED
Binary file (841 Bytes). View file
 
app/agent/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ 
app/agent/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (189 Bytes). View file
 
app/agent/__pycache__/analyzer.cpython-313.pyc ADDED
Binary file (1.79 kB). View file
 
app/agent/__pycache__/gemini_client.cpython-313.pyc ADDED
Binary file (918 Bytes). View file
 
app/agent/__pycache__/orchestrator.cpython-313.pyc ADDED
Binary file (3.73 kB). View file
 
app/agent/__pycache__/reader.cpython-313.pyc ADDED
Binary file (600 Bytes). View file
 
app/agent/__pycache__/reasoner.cpython-313.pyc ADDED
Binary file (3.39 kB). View file
 
app/agent/__pycache__/search.cpython-313.pyc ADDED
Binary file (764 Bytes). View file
 
app/agent/analyzer.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.agent.gemini_client import get_client, json_generation_config
2
+ import json
3
+
4
+ INTENT_SCHEMA = {
5
+ "type": "object",
6
+ "properties": {
7
+ "domain": { "type": "string" },
8
+ "entity": { "type": "string" },
9
+ "refined_query": { "type": "string" }
10
+ },
11
+ "required": ["domain", "entity", "refined_query"]
12
+ }
13
+
14
+ def analyze_query(query: str):
15
+ client = get_client()
16
+
17
+ prompt = f"""
18
+ You are a search query optimization assistant.
19
+
20
+ User query: "{query}"
21
+
22
+ Your task:
23
+ 1. Identify the domain (e.g., technology, automotive, science, business, etc.)
24
+ 2. Extract the main entity being asked about
25
+ 3. Refine the query for optimal web search results by:
26
+ - Making it more specific and searchable
27
+ - Adding relevant keywords (like "2024", "2025", "latest", "current" for recent info)
28
+ - Removing conversational filler words
29
+ - Keeping it concise and search-engine friendly
30
+
31
+ Examples:
32
+ - "what are the new tesla cars" → "latest Tesla car models 2024 2025"
33
+ - "tell me about iphone features" → "iPhone 15 Pro features specifications"
34
+ - "best laptops" → "best laptops 2024 reviews"
35
+
36
+ Return the refined query that will get the best search results.
37
+ """
38
+
39
+ response = client.models.generate_content(
40
+ model="gemini-flash-latest",
41
+ contents=prompt,
42
+ config=json_generation_config(schema=INTENT_SCHEMA)
43
+ )
44
+
45
+ # Gemini now guarantees valid JSON
46
+ return json.loads(response.text)
app/agent/gemini_client.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from google import genai
3
+
4
+ GEMINI_MODEL = "models/gemini-1.5-flash-001"
5
+
6
+ def get_client():
7
+ return genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
8
+
9
+ def json_generation_config(schema: dict | None = None):
10
+ config = {
11
+ "temperature": 0,
12
+ "top_p": 0.9,
13
+ "max_output_tokens": 1024,
14
+ "response_mime_type": "application/json",
15
+ }
16
+
17
+ if schema:
18
+ config["response_schema"] = schema
19
+
20
+ return config
app/agent/orchestrator.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.agent.analyzer import analyze_query
2
+ from app.agent.search import search_web
3
+ from app.agent.reasoner import refine_answer
4
+
5
+ def run_agent(query: str):
6
+ try:
7
+ analysis = analyze_query(query)
8
+ except Exception:
9
+ analysis = {
10
+ "domain": "unknown",
11
+ "entity": query,
12
+ "refined_query": query
13
+ }
14
+
15
+ search_response = search_web(analysis["refined_query"])
16
+
17
+ # Get Tavily's generated answer and sources
18
+ tavily_answer = search_response.get("answer", "")
19
+ results = search_response.get("results", [])
20
+ sources = [r["url"] for r in results[:3]]
21
+
22
+ # Optionally refine the answer with Gemini for better formatting
23
+ try:
24
+ if tavily_answer:
25
+ answer = refine_answer(query, tavily_answer)
26
+ else:
27
+ answer = "I couldn't find a clear answer from the available sources."
28
+ except Exception:
29
+ # Fallback to Tavily's answer if Gemini refinement fails
30
+ answer = tavily_answer or "I encountered an issue while analyzing the information."
31
+
32
+ return {
33
+ "interpreted_query": analysis["refined_query"],
34
+ "answer": answer,
35
+ "sources": sources
36
+ }
37
+
38
+ def run_agent_stream(query: str):
39
+ """Generator that yields streaming events for frontend display"""
40
+ yield {"type": "step", "step": "start", "message": f"Received query: {query}"}
41
+
42
+ # Step 1: Analyze query
43
+ yield {"type": "step", "step": "analyzing", "message": "Analyzing query intent..."}
44
+ try:
45
+ analysis = analyze_query(query)
46
+ yield {
47
+ "type": "step",
48
+ "step": "analyzed",
49
+ "message": f"Refined query: {analysis['refined_query']}",
50
+ "data": {
51
+ "domain": analysis.get('domain', 'unknown'),
52
+ "entity": analysis.get('entity', query),
53
+ "refined_query": analysis['refined_query']
54
+ }
55
+ }
56
+ except Exception as e:
57
+ yield {"type": "step", "step": "analyzed", "message": f"Query analysis failed, using original query"}
58
+ analysis = {
59
+ "domain": "unknown",
60
+ "entity": query,
61
+ "refined_query": query
62
+ }
63
+
64
+ # Step 2: Search web
65
+ yield {"type": "step", "step": "searching", "message": "Searching the web..."}
66
+ search_response = search_web(analysis["refined_query"])
67
+ tavily_answer = search_response.get("answer", "")
68
+ results = search_response.get("results", [])
69
+ sources = [r["url"] for r in results] # Get all sources
70
+
71
+ yield {
72
+ "type": "step",
73
+ "step": "searched",
74
+ "message": f"Found {len(results)} results",
75
+ "data": {
76
+ "sources": sources,
77
+ "raw_answer": tavily_answer
78
+ }
79
+ }
80
+
81
+ # Step 3: Refine answer
82
+ yield {"type": "step", "step": "refining", "message": "Refining answer with AI..."}
83
+
84
+ refined_answer = None
85
+ refinement_error = None
86
+
87
+ try:
88
+ if tavily_answer:
89
+ refined_answer = refine_answer(query, tavily_answer)
90
+
91
+ # Check if refinement actually changed anything
92
+ if refined_answer.strip() == tavily_answer.strip():
93
+ yield {"type": "step", "step": "refined", "message": "Answer refined (no changes needed)"}
94
+ else:
95
+ yield {"type": "step", "step": "refined", "message": "Answer refined with AI formatting"}
96
+ else:
97
+ refined_answer = "I couldn't find a clear answer from the available sources."
98
+ yield {"type": "step", "step": "refined", "message": "No raw answer to refine"}
99
+ except Exception as e:
100
+ refinement_error = str(e)
101
+ # Use raw Tavily answer as fallback
102
+ refined_answer = tavily_answer or "I encountered an issue while analyzing the information."
103
+ yield {"type": "step", "step": "refined", "message": f"Using raw Tavily answer (Gemini unavailable)"}
104
+
105
+ answer = refined_answer
106
+
107
+ # Final result
108
+ yield {
109
+ "type": "result",
110
+ "data": {
111
+ "interpreted_query": analysis["refined_query"],
112
+ "answer": answer,
113
+ "sources": sources,
114
+ "raw_answer": tavily_answer,
115
+ "refinement_error": refinement_error
116
+ }
117
+ }
app/agent/reasoner.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.agent.gemini_client import get_client
2
+
3
+ def refine_answer(user_query: str, tavily_answer: str) -> str:
4
+ """
5
+ Refine Tavily's answer using Gemini for better formatting and structure.
6
+ """
7
+ client = get_client()
8
+
9
+ prompt = f"""You are a factual research assistant. You will receive an answer from a web search and need to refine it for better readability.
10
+
11
+ Rules:
12
+ - Keep ALL factual information intact - do not remove any details
13
+ - When listing items (languages, products, etc), format as a clear bulleted list with markdown
14
+ - Each item should be on its own line with a bullet point (-)
15
+ - Add brief context or explanation where helpful
16
+ - Make the answer well-structured and easy to scan
17
+ - Do not add information not present in the original answer
18
+ - Ensure the answer is complete and ends properly
19
+
20
+ User question: "{user_query}"
21
+
22
+ Original answer from search:
23
+ {tavily_answer}
24
+
25
+ Refine this answer with proper markdown formatting and clear structure. Make it easy to read while preserving all information.
26
+ """
27
+
28
+ response = client.models.generate_content(
29
+ model="gemini-flash-latest",
30
+ contents=prompt,
31
+ config={
32
+ "temperature": 0.3,
33
+ "max_output_tokens": 2000
34
+ }
35
+ )
36
+
37
+ raw_text = response.text or ""
38
+
39
+ if not raw_text.strip():
40
+ return tavily_answer # Fallback to original
41
+
42
+ return clean_answer(raw_text)
43
+
44
+ def clean_answer(text: str) -> str:
45
+ if not text:
46
+ return "No answer could be generated."
47
+
48
+ text = text.strip()
49
+
50
+ # Remove incomplete bullet points or dangling text
51
+ if text.endswith(":") or text.endswith("("):
52
+ # Find the last complete line
53
+ lines = text.split("\n")
54
+ for i in range(len(lines) - 1, -1, -1):
55
+ if lines[i].strip() and not lines[i].strip().endswith(":") and not lines[i].strip().endswith("("):
56
+ return "\n".join(lines[:i+1])
57
+ return "Unable to generate a complete answer from the sources."
58
+
59
+ # Check for incomplete last line (no period, ends mid-word)
60
+ lines = text.split("\n")
61
+ if lines and lines[-1].strip():
62
+ last_line = lines[-1].strip()
63
+ # If last line doesn't end with proper punctuation and looks incomplete
64
+ if not any(last_line.endswith(char) for char in ['.', '!', '?', ')', ']', '"', "'"]):
65
+ if len(lines) > 1:
66
+ return "\n".join(lines[:-1])
67
+
68
+ return text
69
+
70
+
71
+
app/agent/search.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tavily import TavilyClient
2
+ import os
3
+
4
+ TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
5
+
6
+ if not TAVILY_API_KEY:
7
+ raise RuntimeError("TAVILY_API_KEY is missing")
8
+
9
+ client = TavilyClient(api_key=TAVILY_API_KEY)
10
+
11
+ def search_web(query: str):
12
+ response = client.search(
13
+ query=query,
14
+ search_depth="basic",
15
+ max_results=5,
16
+ include_answer="advanced"
17
+ )
18
+ return response
app/main.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv() # MUST be first
5
+
6
+ DEBUG = os.getenv("DEBUG", "").lower() in {"1", "true", "yes"}
7
+ if DEBUG:
8
+ print("TAVILY_API_KEY exists:", bool(os.getenv("TAVILY_API_KEY")))
9
+ print("GEMINI_API_KEY exists:", bool(os.getenv("GEMINI_API_KEY")))
10
+
11
+ from fastapi import FastAPI
12
+ from fastapi.responses import StreamingResponse
13
+ from app.agent.orchestrator import run_agent, run_agent_stream
14
+ from app.schemas import QueryRequest, QueryResponse
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+ import json
17
+
18
+ app = FastAPI(title="Agentic Web Bot")
19
+ app.add_middleware(
20
+ CORSMiddleware,
21
+ allow_origins=["*"],
22
+ allow_methods=["*"],
23
+ allow_headers=["*"],
24
+ )
25
+
26
+
27
+ @app.get("/")
28
+ def root():
29
+ return {"status": "ok", "service": "Agentic Web Bot"}
30
+
31
+
32
+ @app.get("/healthz")
33
+ def healthz():
34
+ return {"ok": True}
35
+
36
+ @app.post("/ask", response_model=QueryResponse)
37
+ def ask_agent(payload: QueryRequest):
38
+ return run_agent(payload.query)
39
+
40
+ @app.post("/ask/stream")
41
+ def ask_agent_stream(payload: QueryRequest):
42
+ async def event_stream():
43
+ for event in run_agent_stream(payload.query):
44
+ yield f"data: {json.dumps(event)}\n\n"
45
+
46
+ return StreamingResponse(
47
+ event_stream(),
48
+ media_type="text/event-stream",
49
+ headers={
50
+ "Cache-Control": "no-cache",
51
+ "Connection": "keep-alive",
52
+ }
53
+ )
app/schemas.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import List
3
+
4
+ class QueryRequest(BaseModel):
5
+ query: str
6
+
7
+ class QueryResponse(BaseModel):
8
+ interpreted_query: str
9
+ answer: str
10
+ sources: List[str]
requirements.txt ADDED
Binary file (2.4 kB). View file