Update app.py
Browse files
app.py
CHANGED
|
@@ -1,23 +1,45 @@
|
|
| 1 |
import os
|
| 2 |
import asyncio
|
| 3 |
import time
|
| 4 |
-
from typing import Optional
|
| 5 |
-
from datetime import datetime
|
| 6 |
import httpx
|
| 7 |
import trafilatura
|
| 8 |
import gradio as gr
|
| 9 |
from dateutil import parser as dateparser
|
| 10 |
-
from limits import parse
|
| 11 |
-
from limits.aio.storage import MemoryStorage
|
| 12 |
-
from limits.aio.strategies import MovingWindowRateLimiter
|
| 13 |
-
from analytics import record_request, last_n_days_df, last_n_days_avg_time_df
|
| 14 |
from fastapi import FastAPI
|
| 15 |
from pydantic import BaseModel
|
| 16 |
-
import google.generativeai as genai
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
# --- Core Search Logic
|
| 19 |
async def search_web_logic(query: str, serper_api_key: str, search_type: str, num_results: int) -> str:
|
| 20 |
-
# ... (Ye function bilkul waisa hi hai jaisa pehle tha, isme koi badlav nahi)
|
| 21 |
start_time = time.time()
|
| 22 |
if not serper_api_key: return "Error: Serper API Key is required."
|
| 23 |
num_results = max(1, min(20, num_results))
|
|
@@ -29,7 +51,7 @@ async def search_web_logic(query: str, serper_api_key: str, search_type: str, nu
|
|
| 29 |
resp = await client.post(endpoint, headers=headers, json=payload)
|
| 30 |
if resp.status_code != 200: return f"Error: Search API returned status {resp.status_code}."
|
| 31 |
results = resp.json().get("news" if search_type == "news" else "organic", [])
|
| 32 |
-
if not results: return f"No {search_type} results found."
|
| 33 |
urls = [r["link"] for r in results]
|
| 34 |
async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
|
| 35 |
tasks = [client.get(u) for u in urls]; responses = await asyncio.gather(*tasks, return_exceptions=True)
|
|
@@ -44,75 +66,102 @@ async def search_web_logic(query: str, serper_api_key: str, search_type: str, nu
|
|
| 44 |
else:
|
| 45 |
domain = meta["link"].split("/")[2].replace("www.", ""); chunk = f"## {meta['title']}\n**Domain:** {domain}\n**URL:** {meta['link']}\n\n{body.strip()}\n"
|
| 46 |
chunks.append(chunk)
|
| 47 |
-
if not chunks: return "Found results, but couldn't extract content."
|
| 48 |
summary = f"Successfully extracted content from {successful_extractions}/{len(results)} results.\n\n---\n\n"
|
| 49 |
await record_request(time.time() - start_time, num_results)
|
| 50 |
return summary + "\n---\n".join(chunks)
|
| 51 |
except Exception as e:
|
| 52 |
return f"An error occurred during web search: {str(e)}"
|
| 53 |
|
| 54 |
-
#
|
| 55 |
-
async def summarize_with_gemini(text_to_summarize: str, query: str, gemini_key: str, model_name: str) -> str:
|
| 56 |
try:
|
| 57 |
genai.configure(api_key=gemini_key)
|
| 58 |
model = genai.GenerativeModel(model_name)
|
|
|
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
USER'S QUERY: "{query}"
|
| 67 |
-
|
| 68 |
-
TEXT TO SUMMARIZE:
|
| 69 |
-
---
|
| 70 |
-
{text_to_summarize}
|
| 71 |
-
---
|
| 72 |
-
"""
|
| 73 |
-
|
| 74 |
response = await model.generate_content_async(prompt)
|
| 75 |
return response.text
|
| 76 |
except Exception as e:
|
| 77 |
return f"\n\n--- ⚠️ Gemini Summarization Failed ---\nError: {str(e)}\nReturning raw text instead."
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
# --- FastAPI App ---
|
| 80 |
app = FastAPI()
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
-
# <<< CHANGE: FastAPI request model ko naye fields ke sath update kiya gaya >>>
|
| 83 |
class SearchRequest(BaseModel):
|
| 84 |
query: str
|
| 85 |
serper_api_key: str
|
| 86 |
search_type: str = "search"
|
| 87 |
num_results: int = 4
|
| 88 |
-
gemini_api_key: Optional[str] = None
|
| 89 |
-
gemini_model: Optional[str] = "gemini-1.5-flash-latest"
|
|
|
|
| 90 |
|
| 91 |
@app.post("/api/search")
|
| 92 |
async def api_search(request: SearchRequest):
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
request.
|
| 96 |
)
|
|
|
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
)
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
-
# ---
|
| 112 |
-
|
| 113 |
-
|
| 114 |
|
| 115 |
-
# --- Server Startup ---
|
| 116 |
if __name__ == "__main__":
|
| 117 |
import uvicorn
|
| 118 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 1 |
import os
|
| 2 |
import asyncio
|
| 3 |
import time
|
| 4 |
+
from typing import Optional
|
| 5 |
+
from datetime import datetime, timezone
|
| 6 |
import httpx
|
| 7 |
import trafilatura
|
| 8 |
import gradio as gr
|
| 9 |
from dateutil import parser as dateparser
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
from fastapi import FastAPI
|
| 11 |
from pydantic import BaseModel
|
| 12 |
+
import google.generativeai as genai
|
| 13 |
+
# <<< MISSING IMPORT ADDED BACK >>>
|
| 14 |
+
from analytics import record_request, last_n_days_df, last_n_days_avg_time_df
|
| 15 |
+
|
| 16 |
+
# --- Prompts ---
|
| 17 |
+
PROMPT_NORMAL = """
|
| 18 |
+
Based on the user's original query, provide a concise summary (3-4 important bullet points) of the following text. Focus only on the most critical information.
|
| 19 |
+
USER'S QUERY: "{query}"
|
| 20 |
+
TEXT TO SUMMARIZE:
|
| 21 |
+
---
|
| 22 |
+
{context_text}
|
| 23 |
+
---
|
| 24 |
+
"""
|
| 25 |
+
PROMPT_DEEP = """
|
| 26 |
+
As a meticulous research analyst, your task is to synthesize the information from the provided web search results into a detailed and comprehensive report.
|
| 27 |
+
**Current Date:** {current_date}.
|
| 28 |
+
**VERY IMPORTANT:** Your top priority is to provide information relevant to this current date and the future. If the user's query is about a recurring event (like an exam), you MUST focus on the upcoming or current event.
|
| 29 |
+
**User's Original Query:** "{query}"
|
| 30 |
+
**Instructions:**
|
| 31 |
+
1. Combine information from different sources to create a coherent and detailed report.
|
| 32 |
+
2. Cite source URLs inline, like this: "(Source: http://...)." The URL is provided in the text.
|
| 33 |
+
3. At the end of your report, create a "## Sources" section and list all the unique URLs you used.
|
| 34 |
+
4. Use clear markdown with headings and bold text.
|
| 35 |
+
**Provided Search Results:**
|
| 36 |
+
---
|
| 37 |
+
{context_text}
|
| 38 |
+
---
|
| 39 |
+
"""
|
| 40 |
|
| 41 |
+
# --- Core Search Logic ---
|
| 42 |
async def search_web_logic(query: str, serper_api_key: str, search_type: str, num_results: int) -> str:
|
|
|
|
| 43 |
start_time = time.time()
|
| 44 |
if not serper_api_key: return "Error: Serper API Key is required."
|
| 45 |
num_results = max(1, min(20, num_results))
|
|
|
|
| 51 |
resp = await client.post(endpoint, headers=headers, json=payload)
|
| 52 |
if resp.status_code != 200: return f"Error: Search API returned status {resp.status_code}."
|
| 53 |
results = resp.json().get("news" if search_type == "news" else "organic", [])
|
| 54 |
+
if not results: return f"No {search_type} results found for '{query}'."
|
| 55 |
urls = [r["link"] for r in results]
|
| 56 |
async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
|
| 57 |
tasks = [client.get(u) for u in urls]; responses = await asyncio.gather(*tasks, return_exceptions=True)
|
|
|
|
| 66 |
else:
|
| 67 |
domain = meta["link"].split("/")[2].replace("www.", ""); chunk = f"## {meta['title']}\n**Domain:** {domain}\n**URL:** {meta['link']}\n\n{body.strip()}\n"
|
| 68 |
chunks.append(chunk)
|
| 69 |
+
if not chunks: return "Found results for '{query}', but couldn't extract content."
|
| 70 |
summary = f"Successfully extracted content from {successful_extractions}/{len(results)} results.\n\n---\n\n"
|
| 71 |
await record_request(time.time() - start_time, num_results)
|
| 72 |
return summary + "\n---\n".join(chunks)
|
| 73 |
except Exception as e:
|
| 74 |
return f"An error occurred during web search: {str(e)}"
|
| 75 |
|
| 76 |
+
# --- Gemini Summarization Logic ---
|
| 77 |
+
async def summarize_with_gemini(text_to_summarize: str, query: str, gemini_key: str, model_name: str, research_mode: str) -> str:
|
| 78 |
try:
|
| 79 |
genai.configure(api_key=gemini_key)
|
| 80 |
model = genai.GenerativeModel(model_name)
|
| 81 |
+
current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
| 82 |
|
| 83 |
+
if research_mode == 'deep':
|
| 84 |
+
prompt_template = PROMPT_DEEP
|
| 85 |
+
else: # Default to normal
|
| 86 |
+
prompt_template = PROMPT_NORMAL
|
| 87 |
+
|
| 88 |
+
prompt = prompt_template.format(query=query, context_text=text_to_summarize, current_date=current_date)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
response = await model.generate_content_async(prompt)
|
| 90 |
return response.text
|
| 91 |
except Exception as e:
|
| 92 |
return f"\n\n--- ⚠️ Gemini Summarization Failed ---\nError: {str(e)}\nReturning raw text instead."
|
| 93 |
|
| 94 |
+
# --- Main Orchestrator Function ---
|
| 95 |
+
async def search_and_summarize(query, serper_api_key, search_type, num_results, gemini_api_key, gemini_model, research_mode):
|
| 96 |
+
scraped_text = await search_web_logic(query, serper_api_key, search_type, num_results)
|
| 97 |
+
|
| 98 |
+
if gemini_api_key and "Error:" not in scraped_text:
|
| 99 |
+
summarized_text = await summarize_with_gemini(scraped_text, query, gemini_api_key, gemini_model, research_mode)
|
| 100 |
+
if "⚠️ Gemini Summarization Failed" in summarized_text:
|
| 101 |
+
return scraped_text + summarized_text
|
| 102 |
+
else:
|
| 103 |
+
return summarized_text
|
| 104 |
+
return scraped_text
|
| 105 |
+
|
| 106 |
# --- FastAPI App ---
|
| 107 |
app = FastAPI()
|
| 108 |
+
# Add CORS middleware if you plan to call the API from a different domain/frontend
|
| 109 |
+
# from fastapi.middleware.cors import CORSMiddleware
|
| 110 |
+
# app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
|
| 111 |
|
|
|
|
| 112 |
class SearchRequest(BaseModel):
|
| 113 |
query: str
|
| 114 |
serper_api_key: str
|
| 115 |
search_type: str = "search"
|
| 116 |
num_results: int = 4
|
| 117 |
+
gemini_api_key: Optional[str] = None
|
| 118 |
+
gemini_model: Optional[str] = "gemini-1.5-flash-latest"
|
| 119 |
+
research_mode: str = "normal"
|
| 120 |
|
| 121 |
@app.post("/api/search")
|
| 122 |
async def api_search(request: SearchRequest):
|
| 123 |
+
result = await search_and_summarize(
|
| 124 |
+
request.query, request.serper_api_key, request.search_type, request.num_results,
|
| 125 |
+
request.gemini_api_key, request.gemini_model, request.research_mode
|
| 126 |
)
|
| 127 |
+
return {"result": result}
|
| 128 |
|
| 129 |
+
# --- Gradio App ---
|
| 130 |
+
def create_gradio_app():
|
| 131 |
+
with gr.Blocks(title="Web Search & Summarize UI") as demo:
|
| 132 |
+
gr.Markdown("# 🔍 AI Search & Summarize")
|
| 133 |
+
with gr.Tabs():
|
| 134 |
+
with gr.Tab("App"):
|
| 135 |
+
gr.Markdown("### Step 1: Web Search")
|
| 136 |
+
query_input = gr.Textbox(label="Search Query")
|
| 137 |
+
serper_api_key_input = gr.Textbox(label="Your Serper API Key", type="password")
|
| 138 |
+
with gr.Row():
|
| 139 |
+
search_type_input = gr.Radio(["search", "news"], value="search", label="Search Type")
|
| 140 |
+
num_results_input = gr.Slider(1, 20, value=4, step=1, label="Number of Results")
|
| 141 |
+
|
| 142 |
+
gr.Markdown("### Step 2: AI Summarization")
|
| 143 |
+
research_mode_input = gr.Radio(["normal", "deep"], value="normal", label="Research Mode", info="Normal for fast summary, Deep for detailed report.")
|
| 144 |
+
gemini_api_key_input = gr.Textbox(label="Your Gemini API Key", type="password", placeholder="Leave empty to skip summarization")
|
| 145 |
+
gemini_model_input = gr.Textbox(label="Gemini Model", value="gemini-1.5-flash-latest")
|
| 146 |
+
search_button = gr.Button("Search & Summarize", variant="primary")
|
| 147 |
+
output = gr.Textbox(label="Result", lines=25, max_lines=40)
|
| 148 |
+
|
| 149 |
+
search_button.click(
|
| 150 |
+
fn=search_and_summarize,
|
| 151 |
+
inputs=[query_input, serper_api_key_input, search_type_input, num_results_input, gemini_api_key_input, gemini_model_input, research_mode_input],
|
| 152 |
+
outputs=output
|
| 153 |
+
)
|
| 154 |
+
with gr.Tab("Analytics"):
|
| 155 |
+
requests_plot = gr.BarPlot(x="date", y="count", title="Daily Requests")
|
| 156 |
+
avg_time_plot = gr.BarPlot(x="date", y="avg_time", title="Avg. Response Time (s)")
|
| 157 |
+
def update_analytics(): return last_n_days_df(14), last_n_days_avg_time_df(14)
|
| 158 |
+
demo.load(update_analytics, [], [requests_plot, avg_time_plot])
|
| 159 |
+
return demo
|
| 160 |
|
| 161 |
+
# --- Mount and Startup ---
|
| 162 |
+
gradio_ui = create_gradio_app()
|
| 163 |
+
app = gr.mount_gradio_app(app, gradio_ui, path="/")
|
| 164 |
|
|
|
|
| 165 |
if __name__ == "__main__":
|
| 166 |
import uvicorn
|
| 167 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|