vikramvasudevan commited on
Commit
968cc77
·
verified ·
1 Parent(s): ae70c09

Upload folder using huggingface_hub

Browse files
deep_research.py ADDED
File without changes
formatter_agent.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from agents import Agent
2
+
3
+ formatter_agent = Agent(
4
+ name="FormatterAgent",
5
+ instructions="You are a helpful research assistant. Given a list of web searches, format them into a markdown report. Title this as Search Plan. Output only the markdown for the list of searches, no other text.",
6
+ model="gpt-4o-mini",
7
+ output_type=str,
8
+ )
9
+
main.py CHANGED
@@ -1,63 +1,49 @@
1
- from json import load
2
- from openai import OpenAI
3
- from dotenv import load_dotenv
4
- from pydantic import BaseModel
5
- import gradio as gr
6
- import json
7
-
8
- class SearchOutput(BaseModel):
9
- query: str
10
- result: str
11
-
12
- class ValidatorOutput(BaseModel):
13
- searchOutput: SearchOutput
14
- is_valid: bool
15
-
16
- def search_agent(query: str) -> SearchOutput | None:
17
- client = OpenAI()
18
- response = client.chat.completions.parse(
19
- model="gpt-4o-mini",
20
- messages=[{"role": "user", "content": query}],
21
- response_format=SearchOutput,
22
- )
23
- return response.choices[0].message.parsed
24
-
25
- def validate_search_results(search_results: str | SearchOutput | None) -> ValidatorOutput | None:
26
- client = OpenAI()
27
- if search_results is None:
28
- return None
29
- response = client.chat.completions.parse(
30
- model="gpt-4o-mini",
31
- messages=[{"role": "user", "content": search_results.model_dump_json() if isinstance(search_results, SearchOutput) else search_results}],
32
- response_format=ValidatorOutput,
33
- )
34
- return response.choices[0].message.parsed
35
-
36
- def render_gradio_interface():
37
- with gr.Blocks() as demo:
38
- gr.Markdown("# Deep Research")
39
- query = gr.Textbox(label="Query", value="What is the capital of France?")
40
- searchButton = gr.Button("Search")
41
- searchResults = gr.Textbox(label="Search Results")
42
- validateButton = gr.Button("Validate")
43
- validateResults = gr.Textbox(label="Validate Results")
44
-
45
- query.submit(fn=search_agent, inputs=query, outputs=searchResults)
46
- searchResults.change(fn=validate_search_results, inputs=searchResults, outputs=validateResults)
47
- searchButton.click(fn=search_agent, inputs=query, outputs=searchResults)
48
- validateButton.click(fn=validate_search_results, inputs=searchResults, outputs=validateResults)
49
-
50
- demo.launch()
51
-
52
- def main():
53
- print("Hello from deep-research!")
54
- load_dotenv(override=True)
55
- render_gradio_interface()
56
- # search_results = search_agent("What is the capital of France?")
57
- # print(search_results)
58
- # isValid = validate_search_results(search_results)
59
- # print(isValid)
60
-
61
-
62
- if __name__ == "__main__":
63
- main()
 
1
+ from dotenv import load_dotenv
2
+ import asyncio
3
+ from research_manager import ResearchManager
4
+ import gradio as gr
5
+ from planner_agent import WebSearchPlan
6
+ from agents.tracing import trace
7
+
8
+ load_dotenv(override=True)
9
+
10
+
11
+ async def run(query: str):
12
+ with trace("deep-research"):
13
+ yield "Planning searches..."
14
+ search_plan = await ResearchManager().plan_searches(query)
15
+ yield "Formatting search plan..."
16
+ search_plan_markdown = ""
17
+ async for chunk in ResearchManager().format_search_plan(search_plan):
18
+ search_plan_markdown += chunk
19
+ yield search_plan_markdown
20
+
21
+ yield "Executing search plan..."
22
+ search_results = await ResearchManager().execute_search_plan(search_plan)
23
+ yield "Writing report..."
24
+ report = await ResearchManager().write_report(query, search_results)
25
+ yield report.markdown_report
26
+
27
+
28
+ async def execute_search_plan(search_plan_str: str):
29
+ search_plan = WebSearchPlan.model_validate_json(search_plan_str)
30
+ results = await ResearchManager().execute_search_plan(search_plan)
31
+ return "\n\n".join(results)
32
+
33
+
34
+ with gr.Blocks() as ui:
35
+ gr.Markdown("# Deep Research")
36
+ query_textbox = gr.Textbox(
37
+ label="What topic would you like to research?",
38
+ value="modern musical instruments",
39
+ )
40
+ run_button = gr.Button("Run", variant="primary")
41
+ search_plan_markdown = gr.Markdown(label="Search Plan")
42
+ run_button.click(
43
+ lambda: gr.update(interactive=False), inputs=None, outputs=run_button
44
+ ).then(fn=run, inputs=query_textbox, outputs=search_plan_markdown).then(
45
+ lambda: gr.update(interactive=True), inputs=None, outputs=run_button
46
+ )
47
+ query_textbox.submit(fn=run, inputs=query_textbox, outputs=search_plan_markdown)
48
+
49
+ ui.launch(inbrowser=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
planner_agent.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from agents import Agent
3
+
4
+
5
+ HOW_MANY_SEARCHES = 3
6
+
7
+ class WebSearchPlan(BaseModel):
8
+ searches: list[str] = Field(description="A list of web searches to perform to best answer the query.")
9
+
10
+ planner_agent = Agent(
11
+ name="PlannerAgent",
12
+ instructions=f"You are a helpful research assistant. Given a query, come up with a set of web searches \
13
+ to perform to best answer the query. Output {HOW_MANY_SEARCHES} terms to query for. Give only the list as output, no other text.",
14
+ model="gpt-4o-mini",
15
+ output_type=WebSearchPlan,
16
+ )
reporting_agent.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agents import Agent
2
+ from pydantic import BaseModel, Field
3
+
4
+ INSTRUCTIONS = (
5
+ "You are a senior researcher tasked with writing a cohesive report for a research query. "
6
+ "You will be provided with the original query, and some initial research done by a research assistant.\n"
7
+ "You should first come up with an outline for the report that describes the structure and "
8
+ "flow of the report. Then, generate the report and return that as your final output.\n"
9
+ "The final output should be in markdown format, and it should be lengthy and detailed. Aim "
10
+ "for 5-10 pages of content, at least 1000 words."
11
+ )
12
+
13
+ class ReportData(BaseModel):
14
+ short_summary: str = Field(description="A short 2-3 sentence summary of the findings.")
15
+
16
+ markdown_report: str = Field(description="The final report")
17
+
18
+ follow_up_questions: list[str] = Field(description="Suggested topics to research further")
19
+
20
+
21
+ reporting_agent = Agent(
22
+ name="Reporting agent",
23
+ instructions=INSTRUCTIONS,
24
+ model="gpt-4o-mini",
25
+ output_type=ReportData
26
+ )
research_manager.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agents import Runner
2
+ from planner_agent import planner_agent, WebSearchPlan
3
+ from formatter_agent import formatter_agent
4
+ from openai.types.responses import ResponseTextDeltaEvent
5
+ from search_agent import search_agent
6
+ from reporting_agent import reporting_agent, ReportData
7
+ import asyncio
8
+
9
+ class ResearchManager:
10
+ async def plan_searches(self, query:str):
11
+ result = await Runner.run(planner_agent, f"Query: {query}")
12
+ return result.final_output_as(WebSearchPlan)
13
+
14
+ async def format_search_plan(self, search_plan: WebSearchPlan):
15
+ result = Runner.run_streamed(formatter_agent, search_plan.model_dump_json())
16
+ async for chunk in result.stream_events():
17
+ if chunk.type == "raw_response_event" and isinstance(chunk.data, ResponseTextDeltaEvent):
18
+ print(chunk.data.delta, end="", flush=True)
19
+ yield chunk.data.delta
20
+
21
+ async def search(self, search_term: str):
22
+ result = await Runner.run(search_agent, search_term)
23
+ return result.final_output_as(str)
24
+
25
+ async def execute_search_plan(self, search_plan: WebSearchPlan):
26
+ tasks = [self.search(search_term) for search_term in search_plan.searches]
27
+ results = await asyncio.gather(*tasks)
28
+ return results
29
+
30
+ async def write_report(self, query:str, search_results:list[str]):
31
+ result = await Runner.run(reporting_agent, f"Query: {query}\n\nSearch Results: {search_results}")
32
+ return result.final_output_as(ReportData)
search_agent.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agents import Agent, WebSearchTool, ModelSettings
2
+
3
+ INSTRUCTIONS = "You are a research assistant. Given a search term, you search the web for that term and produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 words. Capture the main points. Write succintly, no need to have complete sentences or good grammar. This will be consumed by someone synthesizing a report, so its vital you capture the essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
4
+
5
+ search_agent = Agent(
6
+ name="Search agent",
7
+ instructions=INSTRUCTIONS,
8
+ tools=[WebSearchTool(search_context_size="low")],
9
+ model="gpt-4o-mini",
10
+ model_settings=ModelSettings(tool_choice="required"),
11
+ )
validator_demo.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from json import load
2
+ from openai import OpenAI
3
+ from dotenv import load_dotenv
4
+ from pydantic import BaseModel
5
+ import gradio as gr
6
+ import json
7
+
8
+ class SearchOutput(BaseModel):
9
+ query: str
10
+ result: str
11
+
12
+ class ValidatorOutput(BaseModel):
13
+ searchOutput: SearchOutput
14
+ is_valid: bool
15
+
16
+ def search_agent(query: str) -> SearchOutput | None:
17
+ client = OpenAI()
18
+ response = client.chat.completions.parse(
19
+ model="gpt-4o-mini",
20
+ messages=[{"role": "user", "content": query}],
21
+ response_format=SearchOutput,
22
+ )
23
+ return response.choices[0].message.parsed
24
+
25
+ def validate_search_results(search_results: str | SearchOutput | None) -> ValidatorOutput | None:
26
+ client = OpenAI()
27
+ if search_results is None:
28
+ return None
29
+ response = client.chat.completions.parse(
30
+ model="gpt-4o-mini",
31
+ messages=[{"role": "user", "content": search_results.model_dump_json() if isinstance(search_results, SearchOutput) else search_results}],
32
+ response_format=ValidatorOutput,
33
+ )
34
+ return response.choices[0].message.parsed
35
+
36
+ def render_gradio_interface():
37
+ with gr.Blocks() as demo:
38
+ gr.Markdown("# Deep Research")
39
+ query = gr.Textbox(label="Query", value="What is the capital of France?")
40
+ searchButton = gr.Button("Search")
41
+ searchResults = gr.Textbox(label="Search Results")
42
+ validateButton = gr.Button("Validate")
43
+ validateResults = gr.Textbox(label="Validate Results")
44
+
45
+ query.submit(fn=search_agent, inputs=query, outputs=searchResults)
46
+ searchResults.change(fn=validate_search_results, inputs=searchResults, outputs=validateResults)
47
+ searchButton.click(fn=search_agent, inputs=query, outputs=searchResults)
48
+ validateButton.click(fn=validate_search_results, inputs=searchResults, outputs=validateResults)
49
+
50
+ demo.launch()
51
+
52
+ def main():
53
+ print("Hello from deep-research!")
54
+ load_dotenv(override=True)
55
+ render_gradio_interface()
56
+ # search_results = search_agent("What is the capital of France?")
57
+ # print(search_results)
58
+ # isValid = validate_search_results(search_results)
59
+ # print(isValid)
60
+
61
+
62
+ if __name__ == "__main__":
63
+ main()