Spaces:

vikramvasudevan
/

deep-research

Sleeping

App Files Files Community

vikramvasudevan commited on Jul 13, 2025

Commit

968cc77

verified ·

1 Parent(s): ae70c09

Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

deep_research.py +0 -0
formatter_agent.py +9 -0
main.py +49 -63
planner_agent.py +16 -0
reporting_agent.py +26 -0
research_manager.py +32 -0
search_agent.py +11 -0
validator_demo.py +63 -0

deep_research.py ADDED Viewed

File without changes

formatter_agent.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from agents import Agent
+formatter_agent = Agent(
+    name="FormatterAgent",
+    instructions="You are a helpful research assistant. Given a list of web searches, format them into a markdown report. Title this as Search Plan. Output only the markdown for the list of searches, no other text.",
+    model="gpt-4o-mini",
+    output_type=str,
+)

main.py CHANGED Viewed

@@ -1,63 +1,49 @@
-from json import load
-from openai import OpenAI
-from dotenv import load_dotenv
-from pydantic import BaseModel
-import gradio as gr
-import json
-class SearchOutput(BaseModel):
-    query: str
-    result: str
-class ValidatorOutput(BaseModel):
-    searchOutput: SearchOutput
-    is_valid: bool
-def search_agent(query: str) -> SearchOutput | None:
-    client = OpenAI()
-    response = client.chat.completions.parse(
-        model="gpt-4o-mini",
-        messages=[{"role": "user", "content": query}],
-        response_format=SearchOutput,
-    )
-    return response.choices[0].message.parsed
-def validate_search_results(search_results: str | SearchOutput | None) -> ValidatorOutput | None:
-    client = OpenAI()
-    if search_results is None:
-        return None
-    response = client.chat.completions.parse(
-        model="gpt-4o-mini",
-        messages=[{"role": "user", "content": search_results.model_dump_json() if isinstance(search_results, SearchOutput) else search_results}],
-        response_format=ValidatorOutput,
-    )
-    return response.choices[0].message.parsed
-def render_gradio_interface():
-    with gr.Blocks() as demo:
-        gr.Markdown("# Deep Research")
-        query = gr.Textbox(label="Query", value="What is the capital of France?")
-        searchButton = gr.Button("Search")
-        searchResults = gr.Textbox(label="Search Results")
-        validateButton = gr.Button("Validate")
-        validateResults = gr.Textbox(label="Validate Results")
-        query.submit(fn=search_agent, inputs=query, outputs=searchResults)
-        searchResults.change(fn=validate_search_results, inputs=searchResults, outputs=validateResults)
-        searchButton.click(fn=search_agent, inputs=query, outputs=searchResults)
-        validateButton.click(fn=validate_search_results, inputs=searchResults, outputs=validateResults)
-        demo.launch()
-def main():
-    print("Hello from deep-research!")
-    load_dotenv(override=True)
-    render_gradio_interface()
-    # search_results = search_agent("What is the capital of France?")
-    # print(search_results)
-    # isValid = validate_search_results(search_results)
-    # print(isValid)
-if __name__ == "__main__":
-    main()

+from dotenv import load_dotenv
+import asyncio
+from research_manager import ResearchManager
+import gradio as gr
+from planner_agent import WebSearchPlan
+from agents.tracing import trace
+load_dotenv(override=True)
+async def run(query: str):
+    with trace("deep-research"):
+        yield "Planning searches..."
+        search_plan = await ResearchManager().plan_searches(query)
+        yield "Formatting search plan..."
+        search_plan_markdown = ""
+        async for chunk in ResearchManager().format_search_plan(search_plan):
+            search_plan_markdown += chunk
+            yield search_plan_markdown
+        yield "Executing search plan..."
+        search_results = await ResearchManager().execute_search_plan(search_plan)
+        yield "Writing report..."
+        report = await ResearchManager().write_report(query, search_results)
+        yield report.markdown_report
+async def execute_search_plan(search_plan_str: str):
+    search_plan = WebSearchPlan.model_validate_json(search_plan_str)
+    results = await ResearchManager().execute_search_plan(search_plan)
+    return "\n\n".join(results)
+with gr.Blocks() as ui:
+    gr.Markdown("# Deep Research")
+    query_textbox = gr.Textbox(
+        label="What topic would you like to research?",
+        value="modern musical instruments",
+    )
+    run_button = gr.Button("Run", variant="primary")
+    search_plan_markdown = gr.Markdown(label="Search Plan")
+    run_button.click(
+        lambda: gr.update(interactive=False), inputs=None, outputs=run_button
+    ).then(fn=run, inputs=query_textbox, outputs=search_plan_markdown).then(
+        lambda: gr.update(interactive=True), inputs=None, outputs=run_button
+    )
+    query_textbox.submit(fn=run, inputs=query_textbox, outputs=search_plan_markdown)
+    ui.launch(inbrowser=True)

planner_agent.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from pydantic import BaseModel, Field
+from agents import Agent
+HOW_MANY_SEARCHES = 3
+class WebSearchPlan(BaseModel):
+    searches: list[str] = Field(description="A list of web searches to perform to best answer the query.")
+planner_agent = Agent(
+    name="PlannerAgent",
+    instructions=f"You are a helpful research assistant. Given a query, come up with a set of web searches \
+to perform to best answer the query. Output {HOW_MANY_SEARCHES} terms to query for. Give only the list as output, no other text.",
+    model="gpt-4o-mini",
+    output_type=WebSearchPlan,
+)

reporting_agent.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from agents import Agent
+from pydantic import BaseModel, Field
+INSTRUCTIONS = (
+    "You are a senior researcher tasked with writing a cohesive report for a research query. "
+    "You will be provided with the original query, and some initial research done by a research assistant.\n"
+    "You should first come up with an outline for the report that describes the structure and "
+    "flow of the report. Then, generate the report and return that as your final output.\n"
+    "The final output should be in markdown format, and it should be lengthy and detailed. Aim "
+    "for 5-10 pages of content, at least 1000 words."
+)
+class ReportData(BaseModel):
+    short_summary: str = Field(description="A short 2-3 sentence summary of the findings.")
+    markdown_report: str = Field(description="The final report")
+    follow_up_questions: list[str] = Field(description="Suggested topics to research further")
+reporting_agent = Agent(
+    name="Reporting agent",
+    instructions=INSTRUCTIONS,
+    model="gpt-4o-mini",
+    output_type=ReportData
+)

research_manager.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from agents import Runner
+from planner_agent import planner_agent, WebSearchPlan
+from formatter_agent import formatter_agent
+from openai.types.responses import ResponseTextDeltaEvent
+from search_agent import search_agent
+from reporting_agent import reporting_agent, ReportData
+import asyncio
+class ResearchManager:
+    async def plan_searches(self, query:str):
+        result = await Runner.run(planner_agent, f"Query: {query}")
+        return result.final_output_as(WebSearchPlan)
+    async def format_search_plan(self, search_plan: WebSearchPlan):
+        result = Runner.run_streamed(formatter_agent, search_plan.model_dump_json())
+        async for chunk in result.stream_events():
+            if chunk.type == "raw_response_event" and isinstance(chunk.data, ResponseTextDeltaEvent):
+                print(chunk.data.delta, end="", flush=True)
+                yield chunk.data.delta
+    async def search(self, search_term: str):
+        result = await Runner.run(search_agent, search_term)
+        return result.final_output_as(str)
+    async def execute_search_plan(self, search_plan: WebSearchPlan):
+       tasks = [self.search(search_term) for search_term in search_plan.searches]
+       results = await asyncio.gather(*tasks)
+       return results
+    async def write_report(self, query:str, search_results:list[str]):
+        result = await Runner.run(reporting_agent, f"Query: {query}\n\nSearch Results: {search_results}")
+        return result.final_output_as(ReportData)

search_agent.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from agents import Agent, WebSearchTool, ModelSettings
+INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web for that term and produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 words. Capture the main points. Write succintly, no need to have complete sentences or good grammar. This will be consumed by someone synthesizing a report, so its vital you capture the essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
+search_agent = Agent(
+    name="Search agent",
+    instructions=INSTRUCTIONS,
+    tools=[WebSearchTool(search_context_size="low")],
+    model="gpt-4o-mini",
+    model_settings=ModelSettings(tool_choice="required"),
+)

validator_demo.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from json import load
+from openai import OpenAI
+from dotenv import load_dotenv
+from pydantic import BaseModel
+import gradio as gr
+import json
+class SearchOutput(BaseModel):
+    query: str
+    result: str
+class ValidatorOutput(BaseModel):
+    searchOutput: SearchOutput
+    is_valid: bool
+def search_agent(query: str) -> SearchOutput | None:
+    client = OpenAI()
+    response = client.chat.completions.parse(
+        model="gpt-4o-mini",
+        messages=[{"role": "user", "content": query}],
+        response_format=SearchOutput,
+    )
+    return response.choices[0].message.parsed
+def validate_search_results(search_results: str | SearchOutput | None) -> ValidatorOutput | None:
+    client = OpenAI()
+    if search_results is None:
+        return None
+    response = client.chat.completions.parse(
+        model="gpt-4o-mini",
+        messages=[{"role": "user", "content": search_results.model_dump_json() if isinstance(search_results, SearchOutput) else search_results}],
+        response_format=ValidatorOutput,
+    )
+    return response.choices[0].message.parsed
+def render_gradio_interface():
+    with gr.Blocks() as demo:
+        gr.Markdown("# Deep Research")
+        query = gr.Textbox(label="Query", value="What is the capital of France?")
+        searchButton = gr.Button("Search")
+        searchResults = gr.Textbox(label="Search Results")
+        validateButton = gr.Button("Validate")
+        validateResults = gr.Textbox(label="Validate Results")
+        query.submit(fn=search_agent, inputs=query, outputs=searchResults)
+        searchResults.change(fn=validate_search_results, inputs=searchResults, outputs=validateResults)
+        searchButton.click(fn=search_agent, inputs=query, outputs=searchResults)
+        validateButton.click(fn=validate_search_results, inputs=searchResults, outputs=validateResults)
+        demo.launch()
+def main():
+    print("Hello from deep-research!")
+    load_dotenv(override=True)
+    render_gradio_interface()
+    # search_results = search_agent("What is the capital of France?")
+    # print(search_results)
+    # isValid = validate_search_results(search_results)
+    # print(isValid)
+if __name__ == "__main__":
+    main()