websearch

Paused

App Files Files Community

bk939448 commited on Jul 23, 2025

Commit

14c1bfc

verified ·

1 Parent(s): d93d1aa

Update app.py

Browse files

Files changed (1) hide show

app.py +149 -86

app.py CHANGED Viewed

@@ -1,110 +1,173 @@
 import os
 import asyncio
-import re
-import json
 from typing import Optional
 from datetime import datetime, timezone
-from fastapi import FastAPI
-from pydantic import BaseModel
-from fastapi.middleware.cors import CORSMiddleware
 import httpx
 import trafilatura
-import google.generativeai as genai
-# --- FastAPI App with CORS ---
-app = FastAPI(title="AI Research Agent API")
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"],
-)
-# --- Prompts ---
-PROMPT_NORMAL = """Concisely summarize the key points from the following text based on the user's query: "{query}". Focus on the most critical information. PROVIDED TEXT: --- {context_text} ---"""
-PROMPT_DEEP = """As a research analyst, synthesize the information from the provided texts into a detailed report. Current Date: {current_date}. User's Query: "{query}". Instructions: Create a detailed report, combining facts from all sources. Cite source URLs inline, like this: (Source: http://...). At the end, create a "## Sources" section listing all unique URLs. Use clear markdown. Provided Texts: --- {context_text} ---"""
-PROMPT_ULTRADEEP_PLANNER = """You are a research planner. Based on the user's query, create a research plan. Your output MUST be a valid JSON object like this: {"queries": ["query 1", "query 2"]}. Do not add any other text. USER'S QUERY: "{query}" """
-PROMPT_ULTRADEEP_SYNTHESIZER = """You are a master research analyst. Synthesize the collected text into a single, comprehensive, well-structured report based on the user's original query: "{query}". Current Date: {current_date}. Instructions: Synthesize a logical narrative organized by topic. If critical info is missing, you can suggest it, but generate the best possible report with the available info. Cite source URLs inline `(Source: http://...)` and conclude with a "## Sources" list. Collected Raw Text: --- {context_text} ---"""
-# --- Core Logic with Better Error Handling ---
-async def search_web_logic(query: str, serper_api_key: str) -> str:
-    if not serper_api_key: return "Error: Serper API Key is missing."
     try:
-        headers = {"X-API-KEY": serper_api_key, "Content-Type": "application/json"}
         async with httpx.AsyncClient(timeout=15) as client:
-            resp = await client.post("https://google.serper.dev/search", headers=headers, json={"q": query, "num": 7})
-        if resp.status_code == 401: return "Error: Invalid Serper API Key."
-        if resp.status_code != 200: return f"Error: Serper API returned status {resp.status_code}."
-        results = resp.json().get("organic", [])
-        if not results: return f"Error: No web results found for query '{query}'."
         urls = [r["link"] for r in results]
         async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
             tasks = [client.get(u) for u in urls]; responses = await asyncio.gather(*tasks, return_exceptions=True)
-        texts = [f"Source URL: {meta['link']}\nContent: {body.strip()}\n" for meta, response in zip(results, responses) if not isinstance(response, Exception) and (body := trafilatura.extract(response.text))]
-        if not texts: return "Error: Found web results, but could not extract content."
-        return "\n---\n".join(texts)
-    except Exception as e: return f"Error during web search: {str(e)}"
-async def call_gemini(prompt: str, gemini_key: str, model_name: str, json_mode: bool = False) -> str:
-    if not gemini_key: return json.dumps({"error": "Gemini API Key is missing."})
     try:
         genai.configure(api_key=gemini_key)
         model = genai.GenerativeModel(model_name)
-        generation_config = {"response_mime_type": "application/json"} if json_mode else None
-        response = await model.generate_content_async(prompt, generation_config=generation_config)
         return response.text
-    except Exception as e: return json.dumps({"error": f"Error calling Gemini: {str(e)}"})
-# --- AI Agent Logic (Non-Streaming) ---
-async def ultradeep_research_agent(query: str, serper_api_key: str, gemini_key: str, model_name: str) -> str:
-    # Step 1: Plan
-    planner_prompt = PROMPT_ULTRADEEP_PLANNER.format(query=query)
-    plan_str = await call_gemini(planner_prompt, gemini_key, model_name, json_mode=True)
-    try:
-        match = re.search(r'\{.*\}', plan_str, re.DOTALL)
-        if not match: raise ValueError("No JSON object found in Gemini's planner response.")
-        plan_data = json.loads(match.group(0))
-        if "error" in plan_data: return f"Error during planning phase: {plan_data['error']}"
-        search_queries = plan_data["queries"]
-    except Exception as e: return f"Error: Could not create a valid research plan. Details: {str(e)}\nRaw Response: {plan_str}"
-    # Step 2: Execute
-    tasks = [search_web_logic(sub_query, serper_api_key) for sub_query in search_queries]
-    search_results = await asyncio.gather(*tasks)
-    all_scraped_text = "\n".join([res for res in search_results if not res.startswith("Error:")])
-    if not all_scraped_text: return "Error: Could not retrieve any web content for the planned queries. Check Serper key."
-    # Step 3: Synthesize
-    current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
-    synthesizer_prompt = PROMPT_ULTRADEEP_SYNTHESIZER.format(query=query, current_date=current_date, context_text=all_scraped_text)
-    final_report = await call_gemini(synthesizer_prompt, gemini_key, model_name)
-    return final_report
-# --- The Single, Unified FastAPI Endpoint ---
-class ResearchRequest(BaseModel):
     query: str
     serper_api_key: str
-    gemini_api_key: str
-    research_mode: str
-    gemini_model: str = "gemini-1.5-flash-latest"
-@app.post("/api/research")
-async def api_research(request: ResearchRequest):
-    if request.research_mode == 'ultradeep':
-        result = await ultradeep_research_agent(
-            request.query, request.serper_api_key, request.gemini_api_key, request.gemini_model
-        )
-    else: # Normal and Deep modes
-        scraped_text = await search_web_logic(request.query, request.serper_api_key)
-        if scraped_text.startswith("Error:"):
-            return {"result": scraped_text}
-        current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
-        prompt_template = PROMPT_DEEP if request.research_mode == "deep" else PROMPT_NORMAL
-        final_prompt = prompt_template.format(query=request.query, context_text=scraped_text, current_date=current_date)
-        result = await call_gemini(final_prompt, request.gemini_api_key, request.gemini_model)
     return {"result": result}
-# --- Server Startup ---
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import os
 import asyncio
+import time
 from typing import Optional
 from datetime import datetime, timezone
 import httpx
 import trafilatura
+import gradio as gr
+from dateutil import parser as dateparser
+from fastapi import FastAPI
+from pydantic import BaseModel
+import google.generai as genai
+# <<< NEW: Dono research modes ke liye alag prompts >>>
+PROMPT_NORMAL = """
+Based on the user's original query, provide a concise summary (3-4 important bullet points) of the following text. Focus only on the most critical information.
+USER'S QUERY: "{query}"
+TEXT TO SUMMARIZE:
+---
+{context_text}
+---
+"""
+PROMPT_DEEP = """
+As a meticulous research analyst, your task is to synthesize the information from the provided web search results into a detailed and comprehensive report.
+**Current Date:** {current_date}.
+**VERY IMPORTANT:** Your top priority is to provide information relevant to this current date and the future. If the user's query is about a recurring event (like an exam), you MUST focus on the upcoming or current event.
+**User's Original Query:** "{query}"
+**Instructions:**
+1.  Combine information from different sources to create a coherent and detailed report.
+2.  Cite source URLs inline, like this: "(Source: http://...)." The URL is provided in the text.
+3.  At the end of your report, create a "## Sources" section and list all the unique URLs you used.
+4.  Use clear markdown with headings and bold text.
+**Provided Search Results:**
+---
+{context_text}
+---
+"""
+# --- Core Search Logic (No Changes) ---
+async def search_web_logic(query: str, serper_api_key: str, search_type: str, num_results: int) -> str:
+    start_time = time.time()
+    if not serper_api_key: return "Error: Serper API Key is required."
+    num_results = max(1, min(20, num_results))
+    search_type = "search" if search_type not in ["search", "news"] else search_type
     try:
+        endpoint = "https://google.serper.dev/news" if search_type == "news" else "https://google.serper.dev/search"
+        payload = {"q": query, "num": num_results}; headers = {"X-API-KEY": serper_api_key, "Content-Type": "application/json"}
         async with httpx.AsyncClient(timeout=15) as client:
+            resp = await client.post(endpoint, headers=headers, json=payload)
+        if resp.status_code != 200: return f"Error: Search API returned status {resp.status_code}."
+        results = resp.json().get("news" if search_type == "news" else "organic", [])
+        if not results: return f"No {search_type} results found."
         urls = [r["link"] for r in results]
         async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
             tasks = [client.get(u) for u in urls]; responses = await asyncio.gather(*tasks, return_exceptions=True)
+        chunks, successful_extractions = [], 0
+        for meta, response in zip(results, responses):
+            if isinstance(response, Exception) or not (body := trafilatura.extract(response.text)): continue
+            successful_extractions += 1
+            if search_type == "news":
+                try: date_iso = dateparser.parse(meta.get("date", ""), fuzzy=True).strftime("%Y-%m-%d")
+                except Exception: date_iso = "Unknown"
+                chunk = f"## {meta['title']}\n**Source:** {meta.get('source', 'Unknown')} | **Date:** {date_iso}\n**URL:** {meta['link']}\n\n{body.strip()}\n"
+            else:
+                domain = meta["link"].split("/")[2].replace("www.", ""); chunk = f"## {meta['title']}\n**Domain:** {domain}\n**URL:** {meta['link']}\n\n{body.strip()}\n"
+            chunks.append(chunk)
+        if not chunks: return "Found results, but couldn't extract content."
+        summary = f"Successfully extracted content from {successful_extractions}/{len(results)} results.\n\n---\n\n"
+        # Removed the record_request call to simplify, can be added back if needed
+        return summary + "\n---\n".join(chunks)
+    except Exception as e:
+        return f"An error occurred during web search: {str(e)}"
+# --- Gemini Summarization Logic ---
+async def summarize_with_gemini(text_to_summarize: str, query: str, gemini_key: str, model_name: str, research_mode: str) -> str:
     try:
         genai.configure(api_key=gemini_key)
         model = genai.GenerativeModel(model_name)
+        current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+        # <<< CHANGE: Research mode ke hisab se prompt select hoga >>>
+        if research_mode == 'deep':
+            prompt_template = PROMPT_DEEP
+        else: # Default to normal
+            prompt_template = PROMPT_NORMAL
+        prompt = prompt_template.format(query=query, context_text=text_to_summarize, current_date=current_date)
+        response = await model.generate_content_async(prompt)
         return response.text
+    except Exception as e:
+        return f"\n\n--- ⚠️ Gemini Summarization Failed ---\nError: {str(e)}\nReturning raw text instead."
+# --- Main Orchestrator Function ---
+async def search_and_summarize(query, serper_api_key, search_type, num_results, gemini_api_key, gemini_model, research_mode):
+    scraped_text = await search_web_logic(query, serper_api_key, search_type, num_results)
+    if gemini_api_key and "Error:" not in scraped_text:
+        summarized_text = await summarize_with_gemini(scraped_text, query, gemini_api_key, gemini_model, research_mode)
+        if "⚠️ Gemini Summarization Failed" in summarized_text:
+             return scraped_text + summarized_text
+        else:
+             return summarized_text
+    return scraped_text
+# --- FastAPI App ---
+app = FastAPI()
+class SearchRequest(BaseModel):
     query: str
     serper_api_key: str
+    search_type: str = "search"
+    num_results: int = 4
+    gemini_api_key: Optional[str] = None
+    gemini_model: Optional[str] = "gemini-1.5-flash-latest"
+    research_mode: str = "normal" # <<< NEW: Research mode field
+@app.post("/api/search")
+async def api_search(request: SearchRequest):
+    result = await search_and_summarize(
+        request.query, request.serper_api_key, request.search_type, request.num_results,
+        request.gemini_api_key, request.gemini_model, request.research_mode
+    )
     return {"result": result}
+# --- Gradio App ---
+def create_gradio_app():
+    with gr.Blocks(title="Web Search & Summarize UI") as demo:
+        gr.Markdown("# 🔍 AI Search & Summarize")
+        with gr.Tabs():
+            with gr.Tab("App"):
+                gr.Markdown("### Step 1: Web Search")
+                query_input = gr.Textbox(label="Search Query")
+                serper_api_key_input = gr.Textbox(label="Your Serper API Key", type="password")
+                with gr.Row():
+                    search_type_input = gr.Radio(["search", "news"], value="search", label="Search Type")
+                    num_results_input = gr.Slider(1, 20, value=4, step=1, label="Number of Results")
+                gr.Markdown("### Step 2: AI Summarization")
+                # <<< NEW: Research mode ke liye radio buttons >>>
+                research_mode_input = gr.Radio(["normal", "deep"], value="normal", label="Research Mode", info="Normal for fast summary, Deep for detailed report.")
+                gemini_api_key_input = gr.Textbox(label="Your Gemini API Key", type="password", placeholder="Leave empty to skip summarization")
+                gemini_model_input = gr.Textbox(label="Gemini Model", value="gemini-1.5-flash-latest")
+                search_button = gr.Button("Search & Summarize", variant="primary")
+                output = gr.Textbox(label="Result", lines=25, max_lines=40)
+                # <<< CHANGE: Naya research_mode_input, inputs list me add kiya gaya >>>
+                search_button.click(
+                    fn=search_and_summarize,
+                    inputs=[query_input, serper_api_key_input, search_type_input, num_results_input, gemini_api_key_input, gemini_model_input, research_mode_input],
+                    outputs=output
+                )
+            with gr.Tab("Analytics"):
+                # Analytics tab remains unchanged
+                requests_plot = gr.BarPlot(x="date", y="count", title="Daily Requests")
+                avg_time_plot = gr.BarPlot(x="date", y="avg_time", title="Avg. Response Time (s)")
+                def update_analytics(): return last_n_days_df(14), last_n_days_avg_time_df(14)
+                demo.load(update_analytics, [], [requests_plot, avg_time_plot])
+    return demo
+# --- Mount and Startup ---
+gradio_ui = create_gradio_app()
+app = gr.mount_gradio_app(app, gradio_ui, path="/")
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)