Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .env.exampe +3 -0
- README.md +27 -12
- deep_research.py +45 -0
- email_agent.py +30 -0
- manager_agent.py +37 -0
- planner_agent.py +23 -0
- questions_generator_agent.py +10 -0
- requirements.txt +0 -0
- research_tools.py +102 -0
- search_agent.py +17 -0
- writer_agent.py +28 -0
.env.exampe
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
OPENAI_API_KEY=YOUR_OPENAI_API_KEY
|
| 2 |
+
SENDGRID_API_KEY=YOUR_SENDGRID_API_KEY
|
| 3 |
+
SENDGRID_SENDER_EMAIL=YOUR_SENDGRID_SENDER_EMAIL
|
README.md
CHANGED
|
@@ -1,12 +1,27 @@
|
|
| 1 |
-
---
|
| 2 |
-
title:
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: deep_research
|
| 3 |
+
app_file: deep_research.py
|
| 4 |
+
sdk: gradio
|
| 5 |
+
sdk_version: 5.34.2
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
# 🧠 Deep Research Agent (Modular)
|
| 9 |
+
|
| 10 |
+
This project is a modular and extended version of the deep research agent. Instead of running the entire research process in a single step, this system breaks it down into reusable **tool-like stages**, orchestrated by a central **Research Manager Agent**. This creates a more natural and interactive experience, similar to tools like ChatGPT.
|
| 11 |
+
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
## 🚀 What's different
|
| 15 |
+
|
| 16 |
+
- Each research stage is implemented as a standalone tool.
|
| 17 |
+
- Generates clarifying questions.
|
| 18 |
+
- A **manager agent** controls the flow and selects tools dynamically.
|
| 19 |
+
- Enables a **more conversational** research experience.
|
| 20 |
+
- Rather than hardcoding the user's email address in the script, the agent dynamically prompts the user for their address and uses SendGrid to send the report to that input
|
| 21 |
+
|
| 22 |
+
---
|
| 23 |
+
|
| 24 |
+
## 🛠️ Usage
|
| 25 |
+
|
| 26 |
+
- Just make sure you've defined the environment variables listed in the `.env.example` file.
|
| 27 |
+
- If you want to send emails to any address, you need to have a verified domain in SendGrid and use an email address from that domain in the SENDGRID_SENDER_EMAIL variable. Otherwise, you can use your verified single sender email address, but you may encounter issues when sending emails to recipients other than the sender address.
|
deep_research.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
from agents import Runner
|
| 4 |
+
from manager_agent import manager_agent
|
| 5 |
+
|
| 6 |
+
load_dotenv(override=True)
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
async def run_chat(user_message: str, chat_history: list):
|
| 10 |
+
chat_history.append({"role": "user", "content": user_message})
|
| 11 |
+
|
| 12 |
+
chat_history.append({"role": "assistant", "content": "Pensando..."})
|
| 13 |
+
yield chat_history, ""
|
| 14 |
+
|
| 15 |
+
messages = [{"role": message["role"], "content": message["content"]} for message in chat_history[:-1]]
|
| 16 |
+
|
| 17 |
+
result = await Runner.run(
|
| 18 |
+
manager_agent,
|
| 19 |
+
messages,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
chat_history[-1] = {"role": "assistant", "content": result.final_output}
|
| 23 |
+
yield chat_history, ""
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
with gr.Blocks() as ui:
|
| 27 |
+
chat = gr.Chatbot(type="messages", label="Agente de investigación profunda")
|
| 28 |
+
chat_history = gr.State([])
|
| 29 |
+
|
| 30 |
+
txt = gr.Textbox(placeholder="Escribe aquí…", show_label=False)
|
| 31 |
+
btn = gr.Button("Enviar")
|
| 32 |
+
|
| 33 |
+
btn.click(
|
| 34 |
+
fn=run_chat,
|
| 35 |
+
inputs=[txt, chat_history],
|
| 36 |
+
outputs=[chat, txt],
|
| 37 |
+
)
|
| 38 |
+
txt.submit(
|
| 39 |
+
fn=run_chat,
|
| 40 |
+
inputs=[txt, chat_history],
|
| 41 |
+
outputs=[chat, txt],
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
ui.launch(inbrowser=True)
|
| 45 |
+
|
email_agent.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import Dict
|
| 3 |
+
|
| 4 |
+
import sendgrid
|
| 5 |
+
from sendgrid.helpers.mail import Email, Mail, Content, To
|
| 6 |
+
from agents import Agent, function_tool
|
| 7 |
+
|
| 8 |
+
@function_tool
|
| 9 |
+
def send_email(subject: str, html_body: str, email_address: str) -> Dict[str, str]:
|
| 10 |
+
""" Send an email with the given subject and HTML body """
|
| 11 |
+
sg = sendgrid.SendGridAPIClient(api_key=os.environ.get('SENDGRID_API_KEY'))
|
| 12 |
+
from_email = Email(os.environ.get("SENDGRID_SENDER_EMAIL")) # put your verified sender here
|
| 13 |
+
to_email = To(email_address) # put your recipient here
|
| 14 |
+
content = Content("text/html", html_body)
|
| 15 |
+
mail = Mail(from_email, to_email, subject, content).get()
|
| 16 |
+
print(f"Sending email from {os.environ.get('SENDGRID_SENDER_EMAIL')} to {email_address}")
|
| 17 |
+
response = sg.client.mail.send.post(request_body=mail)
|
| 18 |
+
print("Email response", response.status_code)
|
| 19 |
+
return {"status": "success"}
|
| 20 |
+
|
| 21 |
+
INSTRUCTIONS = """You are able to send a nicely formatted HTML email based on a detailed report.
|
| 22 |
+
You will be provided with a detailed report. You should use your tool to send one email, providing the
|
| 23 |
+
report converted into clean, well presented HTML with an appropriate subject line."""
|
| 24 |
+
|
| 25 |
+
email_agent = Agent(
|
| 26 |
+
name="Email agent",
|
| 27 |
+
instructions=INSTRUCTIONS,
|
| 28 |
+
tools=[send_email],
|
| 29 |
+
model="gpt-4o-mini",
|
| 30 |
+
)
|
manager_agent.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from research_tools import plan_searches, perform_searches, write_report, send_email, generate_questions
|
| 2 |
+
from agents import Agent
|
| 3 |
+
|
| 4 |
+
INSTRUCTIONS = (
|
| 5 |
+
"You are a dedicated **Research Manager Agent**, designed to conduct in-depth research for users. "
|
| 6 |
+
"Your primary goal is to provide comprehensive and accurate reports based on their queries. "
|
| 7 |
+
"Follow these steps to manage the research process effectively:\n\n"
|
| 8 |
+
|
| 9 |
+
"1. **Clarify the Query:** When you receive a new query, your first step is to ensure full understanding. "
|
| 10 |
+
" **Generate precisely 5 specific clarification questions** to help refine the user's request. "
|
| 11 |
+
" Politely ask the user to answer these questions so you can perform the best possible search.\n\n"
|
| 12 |
+
|
| 13 |
+
"2. **Conduct Research:** Once the user has provided answers to your questions, proceed with the core research. "
|
| 14 |
+
" **Plan the necessary web searches, then execute them, and finally, synthesize your findings into a comprehensive research report.**\n\n"
|
| 15 |
+
|
| 16 |
+
"3. **Deliver and Offer Email:** After generating the report, present it to the user. "
|
| 17 |
+
" **Crucially, ask the user if they would like to receive this report via email.** "
|
| 18 |
+
" If they agree, politely request their email address and then send the report to that address. "
|
| 19 |
+
" If they decline the email, conclude the interaction gracefully without further action regarding email.\n"
|
| 20 |
+
|
| 21 |
+
"**Remember:** You are equipped with the following tools to accomplish these tasks: `generate_questions`, `plan_searches`, `perform_searches`, `write_report`, and `send_email`."
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
manager_tools = [
|
| 25 |
+
generate_questions,
|
| 26 |
+
plan_searches,
|
| 27 |
+
perform_searches,
|
| 28 |
+
write_report,
|
| 29 |
+
send_email,
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
manager_agent = Agent(
|
| 33 |
+
name="ManagerAgent",
|
| 34 |
+
instructions=INSTRUCTIONS,
|
| 35 |
+
tools=manager_tools,
|
| 36 |
+
model="gpt-4o-mini",
|
| 37 |
+
)
|
planner_agent.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from agents import Agent
|
| 3 |
+
|
| 4 |
+
HOW_MANY_SEARCHES = 5
|
| 5 |
+
|
| 6 |
+
INSTRUCTIONS = f"You are a helpful research assistant. Given a query, come up with a set of web searches \
|
| 7 |
+
to perform to best answer the query. Output {HOW_MANY_SEARCHES} terms to query for."
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class WebSearchItem(BaseModel):
|
| 11 |
+
reason: str = Field(description="Your reasoning for why this search is important to the query.")
|
| 12 |
+
query: str = Field(description="The search term to use for the web search.")
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class WebSearchPlan(BaseModel):
|
| 16 |
+
searches: list[WebSearchItem] = Field(description="A list of web searches to perform to best answer the query.")
|
| 17 |
+
|
| 18 |
+
planner_agent = Agent(
|
| 19 |
+
name="PlannerAgent",
|
| 20 |
+
instructions=INSTRUCTIONS,
|
| 21 |
+
model="gpt-4o-mini",
|
| 22 |
+
output_type=WebSearchPlan,
|
| 23 |
+
)
|
questions_generator_agent.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agents import Agent
|
| 2 |
+
|
| 3 |
+
INSTRUCTIONS = f"You are a helpful research assistant. Given a query, come up with a set of questions \
|
| 4 |
+
that can help you understand the query better and plan your research. Output a list of 5 questions."
|
| 5 |
+
|
| 6 |
+
questions_generator_agent = Agent(
|
| 7 |
+
name="QuestionsGeneratorAgent",
|
| 8 |
+
instructions=INSTRUCTIONS,
|
| 9 |
+
model="gpt-4o-mini",
|
| 10 |
+
)
|
requirements.txt
ADDED
|
Binary file (144 Bytes). View file
|
|
|
research_tools.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agents import Runner, trace, gen_trace_id
|
| 2 |
+
from search_agent import search_agent
|
| 3 |
+
from planner_agent import planner_agent, WebSearchItem, WebSearchPlan
|
| 4 |
+
from questions_generator_agent import questions_generator_agent
|
| 5 |
+
from writer_agent import writer_agent, ReportData
|
| 6 |
+
from email_agent import email_agent
|
| 7 |
+
import asyncio
|
| 8 |
+
from agents import function_tool
|
| 9 |
+
|
| 10 |
+
"""
|
| 11 |
+
async def run(self, query: str):
|
| 12 |
+
" Run the deep research process, yielding the status updates and the final report"
|
| 13 |
+
trace_id = gen_trace_id()
|
| 14 |
+
with trace("Research trace", trace_id=trace_id):
|
| 15 |
+
print(f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}")
|
| 16 |
+
yield f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}"
|
| 17 |
+
print("Starting research...")
|
| 18 |
+
search_plan_result = await self.plan_searches(query)
|
| 19 |
+
search_plan = search_plan_result.searches
|
| 20 |
+
user_email = search_plan_result.user_email
|
| 21 |
+
yield "Searches planned, starting to search..."
|
| 22 |
+
search_results = await self.perform_searches(search_plan)
|
| 23 |
+
yield "Searches complete, writing report..."
|
| 24 |
+
report = await self.write_report(query, search_results)
|
| 25 |
+
yield "Report written, sending email..."
|
| 26 |
+
await self.send_email(report, user_email)
|
| 27 |
+
yield "Email sent, research complete"
|
| 28 |
+
yield report.markdown_report
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
@function_tool
|
| 32 |
+
async def generate_questions(query: str) -> str:
|
| 33 |
+
""" Generate 5 questions to answer for the query """
|
| 34 |
+
print("Generating questions...")
|
| 35 |
+
result = await Runner.run(
|
| 36 |
+
questions_generator_agent,
|
| 37 |
+
f"Query: {query}",
|
| 38 |
+
)
|
| 39 |
+
print(f"Generated 5 questions...")
|
| 40 |
+
return result.final_output
|
| 41 |
+
|
| 42 |
+
@function_tool
|
| 43 |
+
async def plan_searches(query: str) -> WebSearchPlan:
|
| 44 |
+
""" Plan the searches to perform for the query """
|
| 45 |
+
print("Planning searches...")
|
| 46 |
+
result = await Runner.run(
|
| 47 |
+
planner_agent,
|
| 48 |
+
f"Query: {query}",
|
| 49 |
+
)
|
| 50 |
+
print(f"Will perform {len(result.final_output.searches)} searches")
|
| 51 |
+
return result.final_output_as(WebSearchPlan)
|
| 52 |
+
|
| 53 |
+
@function_tool
|
| 54 |
+
async def perform_searches(search_plan: WebSearchPlan) -> list[str]:
|
| 55 |
+
""" Perform the searches to perform for the query """
|
| 56 |
+
print("Searching...")
|
| 57 |
+
num_completed = 0
|
| 58 |
+
tasks = [asyncio.create_task(search(item)) for item in search_plan.searches]
|
| 59 |
+
results = []
|
| 60 |
+
for task in asyncio.as_completed(tasks):
|
| 61 |
+
result = await task
|
| 62 |
+
if result is not None:
|
| 63 |
+
results.append(result)
|
| 64 |
+
num_completed += 1
|
| 65 |
+
print(f"Searching... {num_completed}/{len(tasks)} completed")
|
| 66 |
+
print("Finished searching")
|
| 67 |
+
return results
|
| 68 |
+
|
| 69 |
+
async def search(item: WebSearchItem) -> str | None:
|
| 70 |
+
""" Perform a search for the query """
|
| 71 |
+
input = f"Search term: {item.query}\nReason for searching: {item.reason}"
|
| 72 |
+
try:
|
| 73 |
+
result = await Runner.run(
|
| 74 |
+
search_agent,
|
| 75 |
+
input,
|
| 76 |
+
)
|
| 77 |
+
return str(result.final_output)
|
| 78 |
+
except Exception:
|
| 79 |
+
return None
|
| 80 |
+
|
| 81 |
+
@function_tool
|
| 82 |
+
async def write_report(query: str, search_results: list[str]) -> ReportData:
|
| 83 |
+
""" Write the report for the query """
|
| 84 |
+
print("Thinking about report...")
|
| 85 |
+
input = f"Original query: {query}\nSummarized search results: {search_results}"
|
| 86 |
+
result = await Runner.run(
|
| 87 |
+
writer_agent,
|
| 88 |
+
input,
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
print("Finished writing report")
|
| 92 |
+
return result.final_output_as(ReportData)
|
| 93 |
+
|
| 94 |
+
@function_tool
|
| 95 |
+
async def send_email(report: ReportData, user_email: str) -> None:
|
| 96 |
+
print("Writing email...")
|
| 97 |
+
result = await Runner.run(
|
| 98 |
+
email_agent,
|
| 99 |
+
f"Report: {report.markdown_report}\nUser email: {user_email}",
|
| 100 |
+
)
|
| 101 |
+
print("Email sent")
|
| 102 |
+
return report
|
search_agent.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agents import Agent, WebSearchTool, ModelSettings
|
| 2 |
+
|
| 3 |
+
INSTRUCTIONS = (
|
| 4 |
+
"You are a research assistant. Given a search term, you search the web for that term and "
|
| 5 |
+
"produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 "
|
| 6 |
+
"words. Capture the main points. Write succintly, no need to have complete sentences or good "
|
| 7 |
+
"grammar. This will be consumed by someone synthesizing a report, so its vital you capture the "
|
| 8 |
+
"essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
search_agent = Agent(
|
| 12 |
+
name="Search agent",
|
| 13 |
+
instructions=INSTRUCTIONS,
|
| 14 |
+
tools=[WebSearchTool(search_context_size="low")],
|
| 15 |
+
model="gpt-4o-mini",
|
| 16 |
+
model_settings=ModelSettings(tool_choice="required"),
|
| 17 |
+
)
|
writer_agent.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from agents import Agent
|
| 3 |
+
|
| 4 |
+
INSTRUCTIONS = (
|
| 5 |
+
"You are a senior researcher tasked with writing a cohesive report for a research query. "
|
| 6 |
+
"You will be provided with the original query, and some initial research done by a research assistant.\n"
|
| 7 |
+
"You should first come up with an outline for the report that describes the structure and "
|
| 8 |
+
"flow of the report. Then, generate the report and return that as your final output.\n"
|
| 9 |
+
"The final output should be in markdown format, and it should be lengthy and detailed. Aim "
|
| 10 |
+
"for 5-10 pages of content, at least 1000 words."
|
| 11 |
+
"If the query includes an email address, please ignore it."
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class ReportData(BaseModel):
|
| 16 |
+
short_summary: str = Field(description="A short 2-3 sentence summary of the findings.")
|
| 17 |
+
|
| 18 |
+
markdown_report: str = Field(description="The final report")
|
| 19 |
+
|
| 20 |
+
follow_up_questions: list[str] = Field(description="Suggested topics to research further")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
writer_agent = Agent(
|
| 24 |
+
name="WriterAgent",
|
| 25 |
+
instructions=INSTRUCTIONS,
|
| 26 |
+
model="gpt-4o-mini",
|
| 27 |
+
output_type=ReportData,
|
| 28 |
+
)
|