Update app.py
Browse files
app.py
CHANGED
|
@@ -1,110 +1,173 @@
|
|
| 1 |
import os
|
| 2 |
import asyncio
|
| 3 |
-
import
|
| 4 |
-
import json
|
| 5 |
from typing import Optional
|
| 6 |
from datetime import datetime, timezone
|
| 7 |
-
from fastapi import FastAPI
|
| 8 |
-
from pydantic import BaseModel
|
| 9 |
-
from fastapi.middleware.cors import CORSMiddleware
|
| 10 |
import httpx
|
| 11 |
import trafilatura
|
| 12 |
-
import
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
try:
|
| 31 |
-
|
|
|
|
| 32 |
async with httpx.AsyncClient(timeout=15) as client:
|
| 33 |
-
resp = await client.post(
|
| 34 |
-
if resp.status_code =
|
| 35 |
-
|
| 36 |
-
results
|
| 37 |
-
if not results: return f"Error: No web results found for query '{query}'."
|
| 38 |
urls = [r["link"] for r in results]
|
| 39 |
async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
|
| 40 |
tasks = [client.get(u) for u in urls]; responses = await asyncio.gather(*tasks, return_exceptions=True)
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
| 48 |
try:
|
| 49 |
genai.configure(api_key=gemini_key)
|
| 50 |
model = genai.GenerativeModel(model_name)
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
return response.text
|
| 54 |
-
except Exception as e:
|
|
|
|
| 55 |
|
| 56 |
-
# ---
|
| 57 |
-
async def
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
all_scraped_text = "\n".join([res for res in search_results if not res.startswith("Error:")])
|
| 73 |
-
if not all_scraped_text: return "Error: Could not retrieve any web content for the planned queries. Check Serper key."
|
| 74 |
-
|
| 75 |
-
# Step 3: Synthesize
|
| 76 |
-
current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
| 77 |
-
synthesizer_prompt = PROMPT_ULTRADEEP_SYNTHESIZER.format(query=query, current_date=current_date, context_text=all_scraped_text)
|
| 78 |
-
final_report = await call_gemini(synthesizer_prompt, gemini_key, model_name)
|
| 79 |
-
return final_report
|
| 80 |
-
|
| 81 |
-
# --- The Single, Unified FastAPI Endpoint ---
|
| 82 |
-
class ResearchRequest(BaseModel):
|
| 83 |
query: str
|
| 84 |
serper_api_key: str
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
if scraped_text.startswith("Error:"):
|
| 98 |
-
return {"result": scraped_text}
|
| 99 |
-
|
| 100 |
-
current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
| 101 |
-
prompt_template = PROMPT_DEEP if request.research_mode == "deep" else PROMPT_NORMAL
|
| 102 |
-
final_prompt = prompt_template.format(query=request.query, context_text=scraped_text, current_date=current_date)
|
| 103 |
-
result = await call_gemini(final_prompt, request.gemini_api_key, request.gemini_model)
|
| 104 |
-
|
| 105 |
return {"result": result}
|
| 106 |
|
| 107 |
-
# ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
if __name__ == "__main__":
|
| 109 |
import uvicorn
|
| 110 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 1 |
import os
|
| 2 |
import asyncio
|
| 3 |
+
import time
|
|
|
|
| 4 |
from typing import Optional
|
| 5 |
from datetime import datetime, timezone
|
|
|
|
|
|
|
|
|
|
| 6 |
import httpx
|
| 7 |
import trafilatura
|
| 8 |
+
import gradio as gr
|
| 9 |
+
from dateutil import parser as dateparser
|
| 10 |
+
from fastapi import FastAPI
|
| 11 |
+
from pydantic import BaseModel
|
| 12 |
+
import google.generai as genai
|
| 13 |
+
|
| 14 |
+
# <<< NEW: Dono research modes ke liye alag prompts >>>
|
| 15 |
+
PROMPT_NORMAL = """
|
| 16 |
+
Based on the user's original query, provide a concise summary (3-4 important bullet points) of the following text. Focus only on the most critical information.
|
| 17 |
+
|
| 18 |
+
USER'S QUERY: "{query}"
|
| 19 |
+
|
| 20 |
+
TEXT TO SUMMARIZE:
|
| 21 |
+
---
|
| 22 |
+
{context_text}
|
| 23 |
+
---
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
PROMPT_DEEP = """
|
| 27 |
+
As a meticulous research analyst, your task is to synthesize the information from the provided web search results into a detailed and comprehensive report.
|
| 28 |
+
**Current Date:** {current_date}.
|
| 29 |
+
**VERY IMPORTANT:** Your top priority is to provide information relevant to this current date and the future. If the user's query is about a recurring event (like an exam), you MUST focus on the upcoming or current event.
|
| 30 |
+
**User's Original Query:** "{query}"
|
| 31 |
+
**Instructions:**
|
| 32 |
+
1. Combine information from different sources to create a coherent and detailed report.
|
| 33 |
+
2. Cite source URLs inline, like this: "(Source: http://...)." The URL is provided in the text.
|
| 34 |
+
3. At the end of your report, create a "## Sources" section and list all the unique URLs you used.
|
| 35 |
+
4. Use clear markdown with headings and bold text.
|
| 36 |
+
|
| 37 |
+
**Provided Search Results:**
|
| 38 |
+
---
|
| 39 |
+
{context_text}
|
| 40 |
+
---
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
# --- Core Search Logic (No Changes) ---
|
| 44 |
+
async def search_web_logic(query: str, serper_api_key: str, search_type: str, num_results: int) -> str:
|
| 45 |
+
start_time = time.time()
|
| 46 |
+
if not serper_api_key: return "Error: Serper API Key is required."
|
| 47 |
+
num_results = max(1, min(20, num_results))
|
| 48 |
+
search_type = "search" if search_type not in ["search", "news"] else search_type
|
| 49 |
try:
|
| 50 |
+
endpoint = "https://google.serper.dev/news" if search_type == "news" else "https://google.serper.dev/search"
|
| 51 |
+
payload = {"q": query, "num": num_results}; headers = {"X-API-KEY": serper_api_key, "Content-Type": "application/json"}
|
| 52 |
async with httpx.AsyncClient(timeout=15) as client:
|
| 53 |
+
resp = await client.post(endpoint, headers=headers, json=payload)
|
| 54 |
+
if resp.status_code != 200: return f"Error: Search API returned status {resp.status_code}."
|
| 55 |
+
results = resp.json().get("news" if search_type == "news" else "organic", [])
|
| 56 |
+
if not results: return f"No {search_type} results found."
|
|
|
|
| 57 |
urls = [r["link"] for r in results]
|
| 58 |
async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
|
| 59 |
tasks = [client.get(u) for u in urls]; responses = await asyncio.gather(*tasks, return_exceptions=True)
|
| 60 |
+
chunks, successful_extractions = [], 0
|
| 61 |
+
for meta, response in zip(results, responses):
|
| 62 |
+
if isinstance(response, Exception) or not (body := trafilatura.extract(response.text)): continue
|
| 63 |
+
successful_extractions += 1
|
| 64 |
+
if search_type == "news":
|
| 65 |
+
try: date_iso = dateparser.parse(meta.get("date", ""), fuzzy=True).strftime("%Y-%m-%d")
|
| 66 |
+
except Exception: date_iso = "Unknown"
|
| 67 |
+
chunk = f"## {meta['title']}\n**Source:** {meta.get('source', 'Unknown')} | **Date:** {date_iso}\n**URL:** {meta['link']}\n\n{body.strip()}\n"
|
| 68 |
+
else:
|
| 69 |
+
domain = meta["link"].split("/")[2].replace("www.", ""); chunk = f"## {meta['title']}\n**Domain:** {domain}\n**URL:** {meta['link']}\n\n{body.strip()}\n"
|
| 70 |
+
chunks.append(chunk)
|
| 71 |
+
if not chunks: return "Found results, but couldn't extract content."
|
| 72 |
+
summary = f"Successfully extracted content from {successful_extractions}/{len(results)} results.\n\n---\n\n"
|
| 73 |
+
# Removed the record_request call to simplify, can be added back if needed
|
| 74 |
+
return summary + "\n---\n".join(chunks)
|
| 75 |
+
except Exception as e:
|
| 76 |
+
return f"An error occurred during web search: {str(e)}"
|
| 77 |
|
| 78 |
+
# --- Gemini Summarization Logic ---
|
| 79 |
+
async def summarize_with_gemini(text_to_summarize: str, query: str, gemini_key: str, model_name: str, research_mode: str) -> str:
|
| 80 |
try:
|
| 81 |
genai.configure(api_key=gemini_key)
|
| 82 |
model = genai.GenerativeModel(model_name)
|
| 83 |
+
current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
| 84 |
+
|
| 85 |
+
# <<< CHANGE: Research mode ke hisab se prompt select hoga >>>
|
| 86 |
+
if research_mode == 'deep':
|
| 87 |
+
prompt_template = PROMPT_DEEP
|
| 88 |
+
else: # Default to normal
|
| 89 |
+
prompt_template = PROMPT_NORMAL
|
| 90 |
+
|
| 91 |
+
prompt = prompt_template.format(query=query, context_text=text_to_summarize, current_date=current_date)
|
| 92 |
+
|
| 93 |
+
response = await model.generate_content_async(prompt)
|
| 94 |
return response.text
|
| 95 |
+
except Exception as e:
|
| 96 |
+
return f"\n\n--- ⚠️ Gemini Summarization Failed ---\nError: {str(e)}\nReturning raw text instead."
|
| 97 |
|
| 98 |
+
# --- Main Orchestrator Function ---
|
| 99 |
+
async def search_and_summarize(query, serper_api_key, search_type, num_results, gemini_api_key, gemini_model, research_mode):
|
| 100 |
+
scraped_text = await search_web_logic(query, serper_api_key, search_type, num_results)
|
| 101 |
+
|
| 102 |
+
if gemini_api_key and "Error:" not in scraped_text:
|
| 103 |
+
summarized_text = await summarize_with_gemini(scraped_text, query, gemini_api_key, gemini_model, research_mode)
|
| 104 |
+
if "⚠️ Gemini Summarization Failed" in summarized_text:
|
| 105 |
+
return scraped_text + summarized_text
|
| 106 |
+
else:
|
| 107 |
+
return summarized_text
|
| 108 |
+
return scraped_text
|
| 109 |
+
|
| 110 |
+
# --- FastAPI App ---
|
| 111 |
+
app = FastAPI()
|
| 112 |
+
|
| 113 |
+
class SearchRequest(BaseModel):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
query: str
|
| 115 |
serper_api_key: str
|
| 116 |
+
search_type: str = "search"
|
| 117 |
+
num_results: int = 4
|
| 118 |
+
gemini_api_key: Optional[str] = None
|
| 119 |
+
gemini_model: Optional[str] = "gemini-1.5-flash-latest"
|
| 120 |
+
research_mode: str = "normal" # <<< NEW: Research mode field
|
| 121 |
+
|
| 122 |
+
@app.post("/api/search")
|
| 123 |
+
async def api_search(request: SearchRequest):
|
| 124 |
+
result = await search_and_summarize(
|
| 125 |
+
request.query, request.serper_api_key, request.search_type, request.num_results,
|
| 126 |
+
request.gemini_api_key, request.gemini_model, request.research_mode
|
| 127 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
return {"result": result}
|
| 129 |
|
| 130 |
+
# --- Gradio App ---
|
| 131 |
+
def create_gradio_app():
|
| 132 |
+
with gr.Blocks(title="Web Search & Summarize UI") as demo:
|
| 133 |
+
gr.Markdown("# 🔍 AI Search & Summarize")
|
| 134 |
+
with gr.Tabs():
|
| 135 |
+
with gr.Tab("App"):
|
| 136 |
+
gr.Markdown("### Step 1: Web Search")
|
| 137 |
+
query_input = gr.Textbox(label="Search Query")
|
| 138 |
+
serper_api_key_input = gr.Textbox(label="Your Serper API Key", type="password")
|
| 139 |
+
with gr.Row():
|
| 140 |
+
search_type_input = gr.Radio(["search", "news"], value="search", label="Search Type")
|
| 141 |
+
num_results_input = gr.Slider(1, 20, value=4, step=1, label="Number of Results")
|
| 142 |
+
|
| 143 |
+
gr.Markdown("### Step 2: AI Summarization")
|
| 144 |
+
# <<< NEW: Research mode ke liye radio buttons >>>
|
| 145 |
+
research_mode_input = gr.Radio(["normal", "deep"], value="normal", label="Research Mode", info="Normal for fast summary, Deep for detailed report.")
|
| 146 |
+
gemini_api_key_input = gr.Textbox(label="Your Gemini API Key", type="password", placeholder="Leave empty to skip summarization")
|
| 147 |
+
gemini_model_input = gr.Textbox(label="Gemini Model", value="gemini-1.5-flash-latest")
|
| 148 |
+
|
| 149 |
+
search_button = gr.Button("Search & Summarize", variant="primary")
|
| 150 |
+
output = gr.Textbox(label="Result", lines=25, max_lines=40)
|
| 151 |
+
|
| 152 |
+
# <<< CHANGE: Naya research_mode_input, inputs list me add kiya gaya >>>
|
| 153 |
+
search_button.click(
|
| 154 |
+
fn=search_and_summarize,
|
| 155 |
+
inputs=[query_input, serper_api_key_input, search_type_input, num_results_input, gemini_api_key_input, gemini_model_input, research_mode_input],
|
| 156 |
+
outputs=output
|
| 157 |
+
)
|
| 158 |
+
with gr.Tab("Analytics"):
|
| 159 |
+
# Analytics tab remains unchanged
|
| 160 |
+
requests_plot = gr.BarPlot(x="date", y="count", title="Daily Requests")
|
| 161 |
+
avg_time_plot = gr.BarPlot(x="date", y="avg_time", title="Avg. Response Time (s)")
|
| 162 |
+
def update_analytics(): return last_n_days_df(14), last_n_days_avg_time_df(14)
|
| 163 |
+
demo.load(update_analytics, [], [requests_plot, avg_time_plot])
|
| 164 |
+
|
| 165 |
+
return demo
|
| 166 |
+
|
| 167 |
+
# --- Mount and Startup ---
|
| 168 |
+
gradio_ui = create_gradio_app()
|
| 169 |
+
app = gr.mount_gradio_app(app, gradio_ui, path="/")
|
| 170 |
+
|
| 171 |
if __name__ == "__main__":
|
| 172 |
import uvicorn
|
| 173 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|