bk939448 commited on
Commit
94fd0ac
·
verified ·
1 Parent(s): 7139420

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -49
app.py CHANGED
@@ -1,23 +1,45 @@
1
  import os
2
  import asyncio
3
  import time
4
- from typing import Optional, List
5
- from datetime import datetime
6
  import httpx
7
  import trafilatura
8
  import gradio as gr
9
  from dateutil import parser as dateparser
10
- from limits import parse
11
- from limits.aio.storage import MemoryStorage
12
- from limits.aio.strategies import MovingWindowRateLimiter
13
- from analytics import record_request, last_n_days_df, last_n_days_avg_time_df
14
  from fastapi import FastAPI
15
  from pydantic import BaseModel
16
- import google.generativeai as genai # <<< NEW IMPORT
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- # --- Core Search Logic (No Changes) ---
19
  async def search_web_logic(query: str, serper_api_key: str, search_type: str, num_results: int) -> str:
20
- # ... (Ye function bilkul waisa hi hai jaisa pehle tha, isme koi badlav nahi)
21
  start_time = time.time()
22
  if not serper_api_key: return "Error: Serper API Key is required."
23
  num_results = max(1, min(20, num_results))
@@ -29,7 +51,7 @@ async def search_web_logic(query: str, serper_api_key: str, search_type: str, nu
29
  resp = await client.post(endpoint, headers=headers, json=payload)
30
  if resp.status_code != 200: return f"Error: Search API returned status {resp.status_code}."
31
  results = resp.json().get("news" if search_type == "news" else "organic", [])
32
- if not results: return f"No {search_type} results found."
33
  urls = [r["link"] for r in results]
34
  async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
35
  tasks = [client.get(u) for u in urls]; responses = await asyncio.gather(*tasks, return_exceptions=True)
@@ -44,75 +66,102 @@ async def search_web_logic(query: str, serper_api_key: str, search_type: str, nu
44
  else:
45
  domain = meta["link"].split("/")[2].replace("www.", ""); chunk = f"## {meta['title']}\n**Domain:** {domain}\n**URL:** {meta['link']}\n\n{body.strip()}\n"
46
  chunks.append(chunk)
47
- if not chunks: return "Found results, but couldn't extract content."
48
  summary = f"Successfully extracted content from {successful_extractions}/{len(results)} results.\n\n---\n\n"
49
  await record_request(time.time() - start_time, num_results)
50
  return summary + "\n---\n".join(chunks)
51
  except Exception as e:
52
  return f"An error occurred during web search: {str(e)}"
53
 
54
- # <<< NEW: Gemini Summarization Logic >>>
55
- async def summarize_with_gemini(text_to_summarize: str, query: str, gemini_key: str, model_name: str) -> str:
56
  try:
57
  genai.configure(api_key=gemini_key)
58
  model = genai.GenerativeModel(model_name)
 
59
 
60
- prompt = f"""
61
- Based on the user's original query, provide a concise summary of the following text.
62
- Focus on the information that directly answers or relates to the query.
63
- Filter out any irrelevant details, advertisements, or boilerplate content.
64
- Present the final summary in clean, easy-to-read markdown format.
65
-
66
- USER'S QUERY: "{query}"
67
-
68
- TEXT TO SUMMARIZE:
69
- ---
70
- {text_to_summarize}
71
- ---
72
- """
73
-
74
  response = await model.generate_content_async(prompt)
75
  return response.text
76
  except Exception as e:
77
  return f"\n\n--- ⚠️ Gemini Summarization Failed ---\nError: {str(e)}\nReturning raw text instead."
78
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  # --- FastAPI App ---
80
  app = FastAPI()
 
 
 
81
 
82
- # <<< CHANGE: FastAPI request model ko naye fields ke sath update kiya gaya >>>
83
  class SearchRequest(BaseModel):
84
  query: str
85
  serper_api_key: str
86
  search_type: str = "search"
87
  num_results: int = 4
88
- gemini_api_key: Optional[str] = None # Optional field
89
- gemini_model: Optional[str] = "gemini-1.5-flash-latest" # Optional field with default
 
90
 
91
  @app.post("/api/search")
92
  async def api_search(request: SearchRequest):
93
- # Step 1: Web Search
94
- scraped_text = await search_web_logic(
95
- request.query, request.serper_api_key, request.search_type, request.num_results
96
  )
 
97
 
98
- # Step 2: (Optional) Summarize with Gemini
99
- if request.gemini_api_key and "Error:" not in scraped_text:
100
- summarized_text = await summarize_with_gemini(
101
- scraped_text, request.query, request.gemini_api_key, request.gemini_model
102
- )
103
- # Agar summarization fail hota hai, to original text wapas bhej dein
104
- if "⚠️ Gemini Summarization Failed" in summarized_text:
105
- return {"result": scraped_text + summarized_text}
106
- else:
107
- return {"result": summarized_text}
108
-
109
- return {"result": scraped_text}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
- # --- Gradio App (ab iski zaroorat nahi, lekin rakha ja sakta hai) ---
112
- # ... (Gradio UI ka code abhi ke liye comment out kar sakte hain ya hata sakte hain)
113
- # ... agar aapko UI bhi chahiye to hum use bhi update kar sakte hain
114
 
115
- # --- Server Startup ---
116
  if __name__ == "__main__":
117
  import uvicorn
118
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  import os
2
  import asyncio
3
  import time
4
+ from typing import Optional
5
+ from datetime import datetime, timezone
6
  import httpx
7
  import trafilatura
8
  import gradio as gr
9
  from dateutil import parser as dateparser
 
 
 
 
10
  from fastapi import FastAPI
11
  from pydantic import BaseModel
12
+ import google.generativeai as genai
13
+ # <<< MISSING IMPORT ADDED BACK >>>
14
+ from analytics import record_request, last_n_days_df, last_n_days_avg_time_df
15
+
16
+ # --- Prompts ---
17
+ PROMPT_NORMAL = """
18
+ Based on the user's original query, provide a concise summary (3-4 important bullet points) of the following text. Focus only on the most critical information.
19
+ USER'S QUERY: "{query}"
20
+ TEXT TO SUMMARIZE:
21
+ ---
22
+ {context_text}
23
+ ---
24
+ """
25
+ PROMPT_DEEP = """
26
+ As a meticulous research analyst, your task is to synthesize the information from the provided web search results into a detailed and comprehensive report.
27
+ **Current Date:** {current_date}.
28
+ **VERY IMPORTANT:** Your top priority is to provide information relevant to this current date and the future. If the user's query is about a recurring event (like an exam), you MUST focus on the upcoming or current event.
29
+ **User's Original Query:** "{query}"
30
+ **Instructions:**
31
+ 1. Combine information from different sources to create a coherent and detailed report.
32
+ 2. Cite source URLs inline, like this: "(Source: http://...)." The URL is provided in the text.
33
+ 3. At the end of your report, create a "## Sources" section and list all the unique URLs you used.
34
+ 4. Use clear markdown with headings and bold text.
35
+ **Provided Search Results:**
36
+ ---
37
+ {context_text}
38
+ ---
39
+ """
40
 
41
+ # --- Core Search Logic ---
42
  async def search_web_logic(query: str, serper_api_key: str, search_type: str, num_results: int) -> str:
 
43
  start_time = time.time()
44
  if not serper_api_key: return "Error: Serper API Key is required."
45
  num_results = max(1, min(20, num_results))
 
51
  resp = await client.post(endpoint, headers=headers, json=payload)
52
  if resp.status_code != 200: return f"Error: Search API returned status {resp.status_code}."
53
  results = resp.json().get("news" if search_type == "news" else "organic", [])
54
+ if not results: return f"No {search_type} results found for '{query}'."
55
  urls = [r["link"] for r in results]
56
  async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
57
  tasks = [client.get(u) for u in urls]; responses = await asyncio.gather(*tasks, return_exceptions=True)
 
66
  else:
67
  domain = meta["link"].split("/")[2].replace("www.", ""); chunk = f"## {meta['title']}\n**Domain:** {domain}\n**URL:** {meta['link']}\n\n{body.strip()}\n"
68
  chunks.append(chunk)
69
+ if not chunks: return "Found results for '{query}', but couldn't extract content."
70
  summary = f"Successfully extracted content from {successful_extractions}/{len(results)} results.\n\n---\n\n"
71
  await record_request(time.time() - start_time, num_results)
72
  return summary + "\n---\n".join(chunks)
73
  except Exception as e:
74
  return f"An error occurred during web search: {str(e)}"
75
 
76
+ # --- Gemini Summarization Logic ---
77
+ async def summarize_with_gemini(text_to_summarize: str, query: str, gemini_key: str, model_name: str, research_mode: str) -> str:
78
  try:
79
  genai.configure(api_key=gemini_key)
80
  model = genai.GenerativeModel(model_name)
81
+ current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
82
 
83
+ if research_mode == 'deep':
84
+ prompt_template = PROMPT_DEEP
85
+ else: # Default to normal
86
+ prompt_template = PROMPT_NORMAL
87
+
88
+ prompt = prompt_template.format(query=query, context_text=text_to_summarize, current_date=current_date)
 
 
 
 
 
 
 
 
89
  response = await model.generate_content_async(prompt)
90
  return response.text
91
  except Exception as e:
92
  return f"\n\n--- ⚠️ Gemini Summarization Failed ---\nError: {str(e)}\nReturning raw text instead."
93
 
94
+ # --- Main Orchestrator Function ---
95
+ async def search_and_summarize(query, serper_api_key, search_type, num_results, gemini_api_key, gemini_model, research_mode):
96
+ scraped_text = await search_web_logic(query, serper_api_key, search_type, num_results)
97
+
98
+ if gemini_api_key and "Error:" not in scraped_text:
99
+ summarized_text = await summarize_with_gemini(scraped_text, query, gemini_api_key, gemini_model, research_mode)
100
+ if "⚠️ Gemini Summarization Failed" in summarized_text:
101
+ return scraped_text + summarized_text
102
+ else:
103
+ return summarized_text
104
+ return scraped_text
105
+
106
  # --- FastAPI App ---
107
  app = FastAPI()
108
+ # Add CORS middleware if you plan to call the API from a different domain/frontend
109
+ # from fastapi.middleware.cors import CORSMiddleware
110
+ # app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
111
 
 
112
  class SearchRequest(BaseModel):
113
  query: str
114
  serper_api_key: str
115
  search_type: str = "search"
116
  num_results: int = 4
117
+ gemini_api_key: Optional[str] = None
118
+ gemini_model: Optional[str] = "gemini-1.5-flash-latest"
119
+ research_mode: str = "normal"
120
 
121
  @app.post("/api/search")
122
  async def api_search(request: SearchRequest):
123
+ result = await search_and_summarize(
124
+ request.query, request.serper_api_key, request.search_type, request.num_results,
125
+ request.gemini_api_key, request.gemini_model, request.research_mode
126
  )
127
+ return {"result": result}
128
 
129
+ # --- Gradio App ---
130
+ def create_gradio_app():
131
+ with gr.Blocks(title="Web Search & Summarize UI") as demo:
132
+ gr.Markdown("# 🔍 AI Search & Summarize")
133
+ with gr.Tabs():
134
+ with gr.Tab("App"):
135
+ gr.Markdown("### Step 1: Web Search")
136
+ query_input = gr.Textbox(label="Search Query")
137
+ serper_api_key_input = gr.Textbox(label="Your Serper API Key", type="password")
138
+ with gr.Row():
139
+ search_type_input = gr.Radio(["search", "news"], value="search", label="Search Type")
140
+ num_results_input = gr.Slider(1, 20, value=4, step=1, label="Number of Results")
141
+
142
+ gr.Markdown("### Step 2: AI Summarization")
143
+ research_mode_input = gr.Radio(["normal", "deep"], value="normal", label="Research Mode", info="Normal for fast summary, Deep for detailed report.")
144
+ gemini_api_key_input = gr.Textbox(label="Your Gemini API Key", type="password", placeholder="Leave empty to skip summarization")
145
+ gemini_model_input = gr.Textbox(label="Gemini Model", value="gemini-1.5-flash-latest")
146
+ search_button = gr.Button("Search & Summarize", variant="primary")
147
+ output = gr.Textbox(label="Result", lines=25, max_lines=40)
148
+
149
+ search_button.click(
150
+ fn=search_and_summarize,
151
+ inputs=[query_input, serper_api_key_input, search_type_input, num_results_input, gemini_api_key_input, gemini_model_input, research_mode_input],
152
+ outputs=output
153
+ )
154
+ with gr.Tab("Analytics"):
155
+ requests_plot = gr.BarPlot(x="date", y="count", title="Daily Requests")
156
+ avg_time_plot = gr.BarPlot(x="date", y="avg_time", title="Avg. Response Time (s)")
157
+ def update_analytics(): return last_n_days_df(14), last_n_days_avg_time_df(14)
158
+ demo.load(update_analytics, [], [requests_plot, avg_time_plot])
159
+ return demo
160
 
161
+ # --- Mount and Startup ---
162
+ gradio_ui = create_gradio_app()
163
+ app = gr.mount_gradio_app(app, gradio_ui, path="/")
164
 
 
165
  if __name__ == "__main__":
166
  import uvicorn
167
  uvicorn.run(app, host="0.0.0.0", port=7860)