websearch

Sleeping

App Files Files Community

bk939448 commited on Jul 23, 2025

Commit

d93d1aa

verified ·

1 Parent(s): 9dcdc67

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -17

app.py CHANGED Viewed

@@ -1,31 +1,108 @@
 import asyncio
 from fastapi import FastAPI
-from fastapi.responses import StreamingResponse
-from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 # --- FastAPI App with CORS ---
-app = FastAPI(title="Streaming Test App")
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"],
 )
-# --- Dummy Streaming Function ---
-async def dummy_streamer():
-    yield "STATUS: Connection established! Starting test...\n"
-    await asyncio.sleep(2)
-    for i in range(1, 6):
-        yield f"MESSAGE: Ping #{i} from server.\n"
-        await asyncio.sleep(1)
-    yield "FINAL: Test complete. Connection is working!"
-# --- Test Endpoint ---
-@app.post("/api/test-stream")
-async def api_test_stream():
-    return StreamingResponse(dummy_streamer(), media_type="text/event-stream")
 # --- Server Startup ---
 if __name__ == "__main__":

+import os
 import asyncio
+import re
+import json
+from typing import Optional
+from datetime import datetime, timezone
 from fastapi import FastAPI
 from pydantic import BaseModel
+from fastapi.middleware.cors import CORSMiddleware
+import httpx
+import trafilatura
+import google.generativeai as genai
 # --- FastAPI App with CORS ---
+app = FastAPI(title="AI Research Agent API")
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"],
 )
+# --- Prompts ---
+PROMPT_NORMAL = """Concisely summarize the key points from the following text based on the user's query: "{query}". Focus on the most critical information. PROVIDED TEXT: --- {context_text} ---"""
+PROMPT_DEEP = """As a research analyst, synthesize the information from the provided texts into a detailed report. Current Date: {current_date}. User's Query: "{query}". Instructions: Create a detailed report, combining facts from all sources. Cite source URLs inline, like this: (Source: http://...). At the end, create a "## Sources" section listing all unique URLs. Use clear markdown. Provided Texts: --- {context_text} ---"""
+PROMPT_ULTRADEEP_PLANNER = """You are a research planner. Based on the user's query, create a research plan. Your output MUST be a valid JSON object like this: {"queries": ["query 1", "query 2"]}. Do not add any other text. USER'S QUERY: "{query}" """
+PROMPT_ULTRADEEP_SYNTHESIZER = """You are a master research analyst. Synthesize the collected text into a single, comprehensive, well-structured report based on the user's original query: "{query}". Current Date: {current_date}. Instructions: Synthesize a logical narrative organized by topic. If critical info is missing, you can suggest it, but generate the best possible report with the available info. Cite source URLs inline `(Source: http://...)` and conclude with a "## Sources" list. Collected Raw Text: --- {context_text} ---"""
+# --- Core Logic with Better Error Handling ---
+async def search_web_logic(query: str, serper_api_key: str) -> str:
+    if not serper_api_key: return "Error: Serper API Key is missing."
+    try:
+        headers = {"X-API-KEY": serper_api_key, "Content-Type": "application/json"}
+        async with httpx.AsyncClient(timeout=15) as client:
+            resp = await client.post("https://google.serper.dev/search", headers=headers, json={"q": query, "num": 7})
+        if resp.status_code == 401: return "Error: Invalid Serper API Key."
+        if resp.status_code != 200: return f"Error: Serper API returned status {resp.status_code}."
+        results = resp.json().get("organic", [])
+        if not results: return f"Error: No web results found for query '{query}'."
+        urls = [r["link"] for r in results]
+        async with httpx.AsyncClient(timeout=20, follow_redirects=True) as client:
+            tasks = [client.get(u) for u in urls]; responses = await asyncio.gather(*tasks, return_exceptions=True)
+        texts = [f"Source URL: {meta['link']}\nContent: {body.strip()}\n" for meta, response in zip(results, responses) if not isinstance(response, Exception) and (body := trafilatura.extract(response.text))]
+        if not texts: return "Error: Found web results, but could not extract content."
+        return "\n---\n".join(texts)
+    except Exception as e: return f"Error during web search: {str(e)}"
+async def call_gemini(prompt: str, gemini_key: str, model_name: str, json_mode: bool = False) -> str:
+    if not gemini_key: return json.dumps({"error": "Gemini API Key is missing."})
+    try:
+        genai.configure(api_key=gemini_key)
+        model = genai.GenerativeModel(model_name)
+        generation_config = {"response_mime_type": "application/json"} if json_mode else None
+        response = await model.generate_content_async(prompt, generation_config=generation_config)
+        return response.text
+    except Exception as e: return json.dumps({"error": f"Error calling Gemini: {str(e)}"})
+# --- AI Agent Logic (Non-Streaming) ---
+async def ultradeep_research_agent(query: str, serper_api_key: str, gemini_key: str, model_name: str) -> str:
+    # Step 1: Plan
+    planner_prompt = PROMPT_ULTRADEEP_PLANNER.format(query=query)
+    plan_str = await call_gemini(planner_prompt, gemini_key, model_name, json_mode=True)
+    try:
+        match = re.search(r'\{.*\}', plan_str, re.DOTALL)
+        if not match: raise ValueError("No JSON object found in Gemini's planner response.")
+        plan_data = json.loads(match.group(0))
+        if "error" in plan_data: return f"Error during planning phase: {plan_data['error']}"
+        search_queries = plan_data["queries"]
+    except Exception as e: return f"Error: Could not create a valid research plan. Details: {str(e)}\nRaw Response: {plan_str}"
+    # Step 2: Execute
+    tasks = [search_web_logic(sub_query, serper_api_key) for sub_query in search_queries]
+    search_results = await asyncio.gather(*tasks)
+    all_scraped_text = "\n".join([res for res in search_results if not res.startswith("Error:")])
+    if not all_scraped_text: return "Error: Could not retrieve any web content for the planned queries. Check Serper key."
+    # Step 3: Synthesize
+    current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+    synthesizer_prompt = PROMPT_ULTRADEEP_SYNTHESIZER.format(query=query, current_date=current_date, context_text=all_scraped_text)
+    final_report = await call_gemini(synthesizer_prompt, gemini_key, model_name)
+    return final_report
+# --- The Single, Unified FastAPI Endpoint ---
+class ResearchRequest(BaseModel):
+    query: str
+    serper_api_key: str
+    gemini_api_key: str
+    research_mode: str
+    gemini_model: str = "gemini-1.5-flash-latest"
+@app.post("/api/research")
+async def api_research(request: ResearchRequest):
+    if request.research_mode == 'ultradeep':
+        result = await ultradeep_research_agent(
+            request.query, request.serper_api_key, request.gemini_api_key, request.gemini_model
+        )
+    else: # Normal and Deep modes
+        scraped_text = await search_web_logic(request.query, request.serper_api_key)
+        if scraped_text.startswith("Error:"):
+            return {"result": scraped_text}
+        current_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+        prompt_template = PROMPT_DEEP if request.research_mode == "deep" else PROMPT_NORMAL
+        final_prompt = prompt_template.format(query=request.query, context_text=scraped_text, current_date=current_date)
+        result = await call_gemini(final_prompt, request.gemini_api_key, request.gemini_model)
+    return {"result": result}
 # --- Server Startup ---
 if __name__ == "__main__":