bk939448 commited on
Commit
14c1bfc
·
verified ·
1 Parent(s): d93d1aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -86
app.py CHANGED
@@ -1,110 +1,173 @@
1
  import os
2
  import asyncio
3
- import re
4
- import json
5
  from typing import Optional
6
  from datetime import datetime, timezone
7
- from fastapi import FastAPI
8
- from pydantic import BaseModel
9
- from fastapi.middleware.cors import CORSMiddleware
10
  import httpx
11
  import trafilatura
12
- import google.generativeai as genai
13
-
14
- # --- FastAPI App with CORS ---
15
- app = FastAPI(title="AI Research Agent API")
16
- app.add_middleware(
17
- CORSMiddleware,
18
- allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"],
19
- )
20
-
21
- # --- Prompts ---
22
- PROMPT_NORMAL = """Concisely summarize the key points from the following text based on the user's query: "{query}". Focus on the most critical information. PROVIDED TEXT: --- {context_text} ---"""
23
- PROMPT_DEEP = """As a research analyst, synthesize the information from the provided texts into a detailed report. Current Date: {current_date}. User's Query: "{query}". Instructions: Create a detailed report, combining facts from all sources. Cite source URLs inline, like this: (Source: http://...). At the end, create a "## Sources" section listing all unique URLs. Use clear markdown. Provided Texts: --- {context_text} ---"""
24
- PROMPT_ULTRADEEP_PLANNER = """You are a research planner. Based on the user's query, create a research plan. Your output MUST be a valid JSON object like this: {"queries": ["query 1", "query 2"]}. Do not add any other text. USER'S QUERY: "{query}" """
25
- PROMPT_ULTRADEEP_SYNTHESIZER = """You are a master research analyst. Synthesize the collected text into a single, comprehensive, well-structured report based on the user's original query: "{query}". Current Date: {current_date}. Instructions: Synthesize a logical narrative organized by topic. If critical info is missing, you can suggest it, but generate the best possible report with the available info. Cite source URLs inline `(Source: http://...)` and conclude with a "## Sources" list. Collected Raw Text: --- {context_text} ---"""
26
-
27
- # --- Core Logic with Better Error Handling ---
28
- async def search_web_logic(query: str, serper_api_key: str) -> str:
29
- if not serper_api_key: return "Error: Serper API Key is missing."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  try:
31
- headers = {"X-API-KEY": serper_api_key, "Content-Type": "application/json"}
 
32
  async with httpx.AsyncClient(timeout=15) as client:
33
- resp = await client.post("https://google.serper.dev/search", headers=headers, json={"q": query, "num": 7})
34
- if resp.status_code == 401: return "Error: Invalid Serper API Key."
35
- if resp.status_code != 200: return f"Error: Serper API returned status {resp.status_code}."
36
- results = resp.json().get("organic", [])
37
- if not results: return f"Error: No web results found for query '{query}'."
38
  urls = [r["link"] for r in results]
39
  async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
40
  tasks = [client.get(u) for u in urls]; responses = await asyncio.gather(*tasks, return_exceptions=True)
41
- texts = [f"Source URL: {meta['link']}\nContent: {body.strip()}\n" for meta, response in zip(results, responses) if not isinstance(response, Exception) and (body := trafilatura.extract(response.text))]
42
- if not texts: return "Error: Found web results, but could not extract content."
43
- return "\n---\n".join(texts)
44
- except Exception as e: return f"Error during web search: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- async def call_gemini(prompt: str, gemini_key: str, model_name: str, json_mode: bool = False) -> str:
47
- if not gemini_key: return json.dumps({"error": "Gemini API Key is missing."})
48
  try:
49
  genai.configure(api_key=gemini_key)
50
  model = genai.GenerativeModel(model_name)
51
- generation_config = {"response_mime_type": "application/json"} if json_mode else None
52
- response = await model.generate_content_async(prompt, generation_config=generation_config)
 
 
 
 
 
 
 
 
 
53
  return response.text
54
- except Exception as e: return json.dumps({"error": f"Error calling Gemini: {str(e)}"})
 
55
 
56
- # --- AI Agent Logic (Non-Streaming) ---
57
- async def ultradeep_research_agent(query: str, serper_api_key: str, gemini_key: str, model_name: str) -> str:
58
- # Step 1: Plan
59
- planner_prompt = PROMPT_ULTRADEEP_PLANNER.format(query=query)
60
- plan_str = await call_gemini(planner_prompt, gemini_key, model_name, json_mode=True)
61
- try:
62
- match = re.search(r'\{.*\}', plan_str, re.DOTALL)
63
- if not match: raise ValueError("No JSON object found in Gemini's planner response.")
64
- plan_data = json.loads(match.group(0))
65
- if "error" in plan_data: return f"Error during planning phase: {plan_data['error']}"
66
- search_queries = plan_data["queries"]
67
- except Exception as e: return f"Error: Could not create a valid research plan. Details: {str(e)}\nRaw Response: {plan_str}"
68
-
69
- # Step 2: Execute
70
- tasks = [search_web_logic(sub_query, serper_api_key) for sub_query in search_queries]
71
- search_results = await asyncio.gather(*tasks)
72
- all_scraped_text = "\n".join([res for res in search_results if not res.startswith("Error:")])
73
- if not all_scraped_text: return "Error: Could not retrieve any web content for the planned queries. Check Serper key."
74
-
75
- # Step 3: Synthesize
76
- current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
77
- synthesizer_prompt = PROMPT_ULTRADEEP_SYNTHESIZER.format(query=query, current_date=current_date, context_text=all_scraped_text)
78
- final_report = await call_gemini(synthesizer_prompt, gemini_key, model_name)
79
- return final_report
80
-
81
- # --- The Single, Unified FastAPI Endpoint ---
82
- class ResearchRequest(BaseModel):
83
  query: str
84
  serper_api_key: str
85
- gemini_api_key: str
86
- research_mode: str
87
- gemini_model: str = "gemini-1.5-flash-latest"
88
-
89
- @app.post("/api/research")
90
- async def api_research(request: ResearchRequest):
91
- if request.research_mode == 'ultradeep':
92
- result = await ultradeep_research_agent(
93
- request.query, request.serper_api_key, request.gemini_api_key, request.gemini_model
94
- )
95
- else: # Normal and Deep modes
96
- scraped_text = await search_web_logic(request.query, request.serper_api_key)
97
- if scraped_text.startswith("Error:"):
98
- return {"result": scraped_text}
99
-
100
- current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
101
- prompt_template = PROMPT_DEEP if request.research_mode == "deep" else PROMPT_NORMAL
102
- final_prompt = prompt_template.format(query=request.query, context_text=scraped_text, current_date=current_date)
103
- result = await call_gemini(final_prompt, request.gemini_api_key, request.gemini_model)
104
-
105
  return {"result": result}
106
 
107
- # --- Server Startup ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  if __name__ == "__main__":
109
  import uvicorn
110
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  import os
2
  import asyncio
3
+ import time
 
4
  from typing import Optional
5
  from datetime import datetime, timezone
 
 
 
6
  import httpx
7
  import trafilatura
8
+ import gradio as gr
9
+ from dateutil import parser as dateparser
10
+ from fastapi import FastAPI
11
+ from pydantic import BaseModel
12
+ import google.generai as genai
13
+
14
+ # <<< NEW: Dono research modes ke liye alag prompts >>>
15
+ PROMPT_NORMAL = """
16
+ Based on the user's original query, provide a concise summary (3-4 important bullet points) of the following text. Focus only on the most critical information.
17
+
18
+ USER'S QUERY: "{query}"
19
+
20
+ TEXT TO SUMMARIZE:
21
+ ---
22
+ {context_text}
23
+ ---
24
+ """
25
+
26
+ PROMPT_DEEP = """
27
+ As a meticulous research analyst, your task is to synthesize the information from the provided web search results into a detailed and comprehensive report.
28
+ **Current Date:** {current_date}.
29
+ **VERY IMPORTANT:** Your top priority is to provide information relevant to this current date and the future. If the user's query is about a recurring event (like an exam), you MUST focus on the upcoming or current event.
30
+ **User's Original Query:** "{query}"
31
+ **Instructions:**
32
+ 1. Combine information from different sources to create a coherent and detailed report.
33
+ 2. Cite source URLs inline, like this: "(Source: http://...)." The URL is provided in the text.
34
+ 3. At the end of your report, create a "## Sources" section and list all the unique URLs you used.
35
+ 4. Use clear markdown with headings and bold text.
36
+
37
+ **Provided Search Results:**
38
+ ---
39
+ {context_text}
40
+ ---
41
+ """
42
+
43
+ # --- Core Search Logic (No Changes) ---
44
+ async def search_web_logic(query: str, serper_api_key: str, search_type: str, num_results: int) -> str:
45
+ start_time = time.time()
46
+ if not serper_api_key: return "Error: Serper API Key is required."
47
+ num_results = max(1, min(20, num_results))
48
+ search_type = "search" if search_type not in ["search", "news"] else search_type
49
  try:
50
+ endpoint = "https://google.serper.dev/news" if search_type == "news" else "https://google.serper.dev/search"
51
+ payload = {"q": query, "num": num_results}; headers = {"X-API-KEY": serper_api_key, "Content-Type": "application/json"}
52
  async with httpx.AsyncClient(timeout=15) as client:
53
+ resp = await client.post(endpoint, headers=headers, json=payload)
54
+ if resp.status_code != 200: return f"Error: Search API returned status {resp.status_code}."
55
+ results = resp.json().get("news" if search_type == "news" else "organic", [])
56
+ if not results: return f"No {search_type} results found."
 
57
  urls = [r["link"] for r in results]
58
  async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
59
  tasks = [client.get(u) for u in urls]; responses = await asyncio.gather(*tasks, return_exceptions=True)
60
+ chunks, successful_extractions = [], 0
61
+ for meta, response in zip(results, responses):
62
+ if isinstance(response, Exception) or not (body := trafilatura.extract(response.text)): continue
63
+ successful_extractions += 1
64
+ if search_type == "news":
65
+ try: date_iso = dateparser.parse(meta.get("date", ""), fuzzy=True).strftime("%Y-%m-%d")
66
+ except Exception: date_iso = "Unknown"
67
+ chunk = f"## {meta['title']}\n**Source:** {meta.get('source', 'Unknown')} | **Date:** {date_iso}\n**URL:** {meta['link']}\n\n{body.strip()}\n"
68
+ else:
69
+ domain = meta["link"].split("/")[2].replace("www.", ""); chunk = f"## {meta['title']}\n**Domain:** {domain}\n**URL:** {meta['link']}\n\n{body.strip()}\n"
70
+ chunks.append(chunk)
71
+ if not chunks: return "Found results, but couldn't extract content."
72
+ summary = f"Successfully extracted content from {successful_extractions}/{len(results)} results.\n\n---\n\n"
73
+ # Removed the record_request call to simplify, can be added back if needed
74
+ return summary + "\n---\n".join(chunks)
75
+ except Exception as e:
76
+ return f"An error occurred during web search: {str(e)}"
77
 
78
+ # --- Gemini Summarization Logic ---
79
+ async def summarize_with_gemini(text_to_summarize: str, query: str, gemini_key: str, model_name: str, research_mode: str) -> str:
80
  try:
81
  genai.configure(api_key=gemini_key)
82
  model = genai.GenerativeModel(model_name)
83
+ current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
84
+
85
+ # <<< CHANGE: Research mode ke hisab se prompt select hoga >>>
86
+ if research_mode == 'deep':
87
+ prompt_template = PROMPT_DEEP
88
+ else: # Default to normal
89
+ prompt_template = PROMPT_NORMAL
90
+
91
+ prompt = prompt_template.format(query=query, context_text=text_to_summarize, current_date=current_date)
92
+
93
+ response = await model.generate_content_async(prompt)
94
  return response.text
95
+ except Exception as e:
96
+ return f"\n\n--- ⚠️ Gemini Summarization Failed ---\nError: {str(e)}\nReturning raw text instead."
97
 
98
+ # --- Main Orchestrator Function ---
99
+ async def search_and_summarize(query, serper_api_key, search_type, num_results, gemini_api_key, gemini_model, research_mode):
100
+ scraped_text = await search_web_logic(query, serper_api_key, search_type, num_results)
101
+
102
+ if gemini_api_key and "Error:" not in scraped_text:
103
+ summarized_text = await summarize_with_gemini(scraped_text, query, gemini_api_key, gemini_model, research_mode)
104
+ if "⚠️ Gemini Summarization Failed" in summarized_text:
105
+ return scraped_text + summarized_text
106
+ else:
107
+ return summarized_text
108
+ return scraped_text
109
+
110
+ # --- FastAPI App ---
111
+ app = FastAPI()
112
+
113
+ class SearchRequest(BaseModel):
 
 
 
 
 
 
 
 
 
 
 
114
  query: str
115
  serper_api_key: str
116
+ search_type: str = "search"
117
+ num_results: int = 4
118
+ gemini_api_key: Optional[str] = None
119
+ gemini_model: Optional[str] = "gemini-1.5-flash-latest"
120
+ research_mode: str = "normal" # <<< NEW: Research mode field
121
+
122
+ @app.post("/api/search")
123
+ async def api_search(request: SearchRequest):
124
+ result = await search_and_summarize(
125
+ request.query, request.serper_api_key, request.search_type, request.num_results,
126
+ request.gemini_api_key, request.gemini_model, request.research_mode
127
+ )
 
 
 
 
 
 
 
 
128
  return {"result": result}
129
 
130
+ # --- Gradio App ---
131
+ def create_gradio_app():
132
+ with gr.Blocks(title="Web Search & Summarize UI") as demo:
133
+ gr.Markdown("# 🔍 AI Search & Summarize")
134
+ with gr.Tabs():
135
+ with gr.Tab("App"):
136
+ gr.Markdown("### Step 1: Web Search")
137
+ query_input = gr.Textbox(label="Search Query")
138
+ serper_api_key_input = gr.Textbox(label="Your Serper API Key", type="password")
139
+ with gr.Row():
140
+ search_type_input = gr.Radio(["search", "news"], value="search", label="Search Type")
141
+ num_results_input = gr.Slider(1, 20, value=4, step=1, label="Number of Results")
142
+
143
+ gr.Markdown("### Step 2: AI Summarization")
144
+ # <<< NEW: Research mode ke liye radio buttons >>>
145
+ research_mode_input = gr.Radio(["normal", "deep"], value="normal", label="Research Mode", info="Normal for fast summary, Deep for detailed report.")
146
+ gemini_api_key_input = gr.Textbox(label="Your Gemini API Key", type="password", placeholder="Leave empty to skip summarization")
147
+ gemini_model_input = gr.Textbox(label="Gemini Model", value="gemini-1.5-flash-latest")
148
+
149
+ search_button = gr.Button("Search & Summarize", variant="primary")
150
+ output = gr.Textbox(label="Result", lines=25, max_lines=40)
151
+
152
+ # <<< CHANGE: Naya research_mode_input, inputs list me add kiya gaya >>>
153
+ search_button.click(
154
+ fn=search_and_summarize,
155
+ inputs=[query_input, serper_api_key_input, search_type_input, num_results_input, gemini_api_key_input, gemini_model_input, research_mode_input],
156
+ outputs=output
157
+ )
158
+ with gr.Tab("Analytics"):
159
+ # Analytics tab remains unchanged
160
+ requests_plot = gr.BarPlot(x="date", y="count", title="Daily Requests")
161
+ avg_time_plot = gr.BarPlot(x="date", y="avg_time", title="Avg. Response Time (s)")
162
+ def update_analytics(): return last_n_days_df(14), last_n_days_avg_time_df(14)
163
+ demo.load(update_analytics, [], [requests_plot, avg_time_plot])
164
+
165
+ return demo
166
+
167
+ # --- Mount and Startup ---
168
+ gradio_ui = create_gradio_app()
169
+ app = gr.mount_gradio_app(app, gradio_ui, path="/")
170
+
171
  if __name__ == "__main__":
172
  import uvicorn
173
  uvicorn.run(app, host="0.0.0.0", port=7860)