Spaces:

Shekarss
/

Deep_research_agent

Runtime error

App Files Files Community

Shekarss commited on Sep 6, 2025

Commit

aa9134d

verified ·

1 Parent(s): 2bd4b8d

Upload 10 files

Browse files

Files changed (10) hide show

deep_research.py +25 -0
guardrail_agent.py +20 -0
model.py +9 -0
planner_agent.py +24 -0
question_agent.py +26 -0
refiner_agent.py +21 -0
research_manager.py +96 -0
search_agent.py +30 -0
test_gradio.py +45 -0
writer_agent.py +27 -0

deep_research.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import gradio as gr
+import asyncio
+from research_manager import ResearchManager
+from dotenv import load_dotenv
+load_dotenv(override=True)
+manager = ResearchManager()
+async def run(query: str):
+    async for chunk in ResearchManager().run(query):
+        yield chunk
+with gr.Blocks(theme=gr.themes.Default(primary_hue="sky")) as ui:
+    gr.Markdown("# Deep Research")
+    query_textbox = gr.Textbox(label="What topic would you like to research?")
+    run_button = gr.Button("Run", variant="primary")
+    report = gr.Markdown(label="Report")
+    run_button.click(fn=run, inputs=query_textbox, outputs=report)
+    query_textbox.submit(fn=run, inputs=query_textbox, outputs=report)
+ui.launch(inbrowser=True)

guardrail_agent.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from agents import Agent, output_guardrail, GuardrailFunctionOutput, Runner
+from pydantic import BaseModel, Field
+from model import model
+class NoCode(BaseModel):
+    is_code: bool = Field(description='Checks for code in the report')
+guard_agent = Agent(
+    name='guard_rail',
+    instructions='Checks whther the report has hidden code in it',
+    model=model,
+    output_type=NoCode
+)
+@output_guardrail
+async def check_no_code(ctx, agent, report):
+    result = await Runner.run(guard_agent, report, context=ctx.context)
+    is_code = result.final_output.is_code
+    return GuardrailFunctionOutput(output_info={'found code': 'An embedded code was found'}, tripwire_triggered=is_code)

model.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import os
+from agents import OpenAIChatCompletionsModel
+from openai import AsyncOpenAI
+from dotenv import load_dotenv
+load_dotenv(override=True)
+client = AsyncOpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.getenv('grok_key'))
+model = OpenAIChatCompletionsModel(model='meta-llama/llama-4-scout-17b-16e-instruct', openai_client=client)

planner_agent.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from model import model
+from agents import Agent
+from pydantic import BaseModel, Field
+HOW_MANY_SEARCHES = 3
+INSTRUCTIONS = f"You are a helpful research assistant. Given a query, come up with a set of web searches \
+to perform to best answer the query. Output {HOW_MANY_SEARCHES} terms to query for."
+class WebSearchItem(BaseModel):
+    reason: str = Field(description="Your reasoning for why this search is important to the query.")
+    query: str = Field(description="The search term to use for the web search.")
+class WebSearchPlan(BaseModel):
+    searches: list[WebSearchItem] = Field(description="A list of web searches to perform to best answer the query.")
+planner_agent = Agent(
+    name='Planner Agent',
+    instructions=INSTRUCTIONS,
+    model=model,
+    output_type=WebSearchPlan
+)

question_agent.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from agents import Agent, Runner, trace, output_guardrail, GuardrailFunctionOutput
+from pydantic import BaseModel, Field
+from typing import Dict, List
+from model import model
+INSTRUCTIONS = (
+    "You are a helpful research assistant. "
+    "Given a user query, generate a set of 3 insightful questions "
+    "to ask the user in order to facilitate detailed and in-depth planning."
+)
+class QuestionItem(BaseModel):
+    number: int = Field(description='Question Number')
+    question: str = Field(description='The question text based on user query')
+    answer: str | None = None
+class QuestionPlan(BaseModel):
+    questions: list[QuestionItem] = Field(description='List of question')
+question_agent = Agent(
+    name='question_agent',
+    instructions=INSTRUCTIONS,
+    model=model,
+    output_type=QuestionPlan
+)

refiner_agent.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from model import model
+from agents import Agent
+from pydantic import BaseModel, Field
+instructions = (
+    "Your role is to evaluate the research plan based on the user's query. "
+    "If the existing plan is incomplete, unclear, or insufficient, "
+    "You return is_valid a bool values, 'True' if accepted, 'False' if not"
+    "Provide proper feedback and suggestion why it was not a valid plan"
+)
+class ValidPlan(BaseModel):
+    is_valid: bool = Field(..., description="Whether the plan is valid. Return True if valid, False otherwise.")
+    feedback: str = Field("", description="If invalid, describe why the plan is insufficient and suggest improvements.")
+refiner_agent = Agent(
+    name='refiner_agent',
+    instructions=instructions,
+    model=model,
+    output_type=ValidPlan
+)

research_manager.py ADDED Viewed

	@@ -0,0 +1,96 @@

+from agents import Runner, trace, gen_trace_id
+from model import model
+from question_agent import question_agent, QuestionItem, QuestionPlan
+from search_agent import search_agent
+from planner_agent import planner_agent, WebSearchItem, WebSearchPlan
+from writer_agent import writer_agent, ReportData
+from refiner_agent import refiner_agent, ValidPlan
+import asyncio
+class ResearchManager:
+    async def run(self, query: str):
+        """ Run the deep research process, yielding the status updates and the final report"""
+        trace_id = gen_trace_id()
+        with trace('Research trace', trace_id=trace_id):
+            yield "Starting research"
+            question_plan: QuestionPlan = await self.ask_questions(query)
+            user_answers = {}
+            for q_item in question_plan.questions:
+                answer = input(f"{q_item.number}. {q_item.question}: ")
+                user_answers[q_item.number] = answer
+                q_item.answer = answer
+            yield f"Collected answers for {len(user_answers)} questions."
+            search_plan: WebSearchPlan = await self.plan_next(query, question_plan)
+            yield "Initial web search plan generated."
+            valid_plan: ValidPlan = await self.refine_plan(query, search_plan)
+            plan_num = 0
+            while not valid_plan.is_valid:
+                yield f"Plan not valid: {valid_plan.feedback}. Refining..."
+                search_plan: WebSearchPlan = await self.plan_next(query, question_plan)
+                valid_plan: ValidPlan = await self.refine_plan(query, search_plan)
+                if plan_num >= 4:
+                    break
+                plan_num += 1
+            yield "Plan validated and refined."
+            search_results = await self.web_search(search_plan)
+            yield "Web searches completed."
+            report: ReportData = await self.write_report(query, search_results)
+            yield report.markdown_report
+    async def ask_questions(self, query: str)->QuestionPlan:
+        """Given a user query, generate a set of 3 insightful questions to ask the user in order to facilitate detailed and in-depth planning."""
+        result = await Runner.run(question_agent, query)
+        return result.final_output_as(QuestionPlan)
+    async def plan_next(self, query:str, que_depth: QuestionPlan)->WebSearchPlan:
+        """Based on the user query and the questions they answered, you come up with a set of web searches to perform to best answer the query"""
+        answers_str = "; ".join([q.answer for q in que_depth.questions])
+        input_query = f'User query {query}. User answered questions {answers_str}'
+        result = await Runner.run(planner_agent, input_query)
+        return result.final_output_as(WebSearchPlan)
+    async def refine_plan(self, query:str, ex_plan: WebSearchPlan)->ValidPlan:
+        """Evaluate the research plan based on the user's query. """
+        all_plans = "; ".join([p.reason for p in ex_plan.searches])
+        input_plan = f'Validate the plan and provide whether its valid or not along with feedback and a bool of whether its valid or not. The plan {all_plans}'
+        result = await Runner.run(refiner_agent, input_plan)
+        return result.final_output_as(ValidPlan)
+    async def web_search(self, search_plan: WebSearchPlan)->list[str]:
+        """ Perform the searches to perform for the query """
+        num_completed = 0
+        tasks = [asyncio.create_task(self.search(item)) for item in search_plan.searches]
+        results = []
+        for task in asyncio.as_completed(tasks):
+            result = await task
+            if result is not None:
+                results.append(result)
+            num_completed += 1
+            print(f"Searching... {num_completed}/{len(tasks)} completed")
+        print("Finished searching")
+        return results
+    async def search(self, item: WebSearchItem)->str | None:
+        """ Perform a search for the query """
+        input = f"Search term: {item.query}\nReason for searching: {item.reason}"
+        try:
+            result = await Runner.run(search_agent, input)
+            return str(result.final_output)
+        except Exception:
+            return None
+    async def write_report(self, query: str, search_results: list[str]) -> ReportData:
+        """ Write the report for the query """
+        print("Thinking about report...")
+        input = f"Original query: {query}\nSummarized search results: {search_results}"
+        result = await Runner.run(writer_agent, input)
+        print("Finished writing report")
+        return result.final_output_as(ReportData)

search_agent.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from agents import Agent, ModelSettings, function_tool
+from duckduckgo_search import DDGS
+from model import model
+from writer_agent import writer_agent
+INSTRUCTIONS = (
+    "You are a research assistant. Given a search term, you search the web for that term and "
+    "produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 "
+    "words. Capture the main points. Write succintly, no need to have complete sentences or good "
+    "grammar. This will be consumed by someone synthesizing a report, so its vital you capture the "
+    "essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
+)
+@function_tool
+def duckduckgo_search(query: str, max_results: str = 5) -> list[dict]:
+    """Search the web using DuckDuckGo and return results."""
+    # Cast to int just in case the model passes a string
+    max_results = int(max_results)
+    with DDGS() as ddgs:
+        return [r for r in ddgs.text(query, max_results=max_results)]
+search_agent = Agent(
+    name="Search agent",
+    instructions=INSTRUCTIONS,
+    tools=[duckduckgo_search],
+    model=model,
+    handoff_description='After searching, handoff to writer_agent to generate a report',
+    handoffs=[writer_agent],
+    model_settings=ModelSettings(tool_choice="required"),
+)

test_gradio.py ADDED Viewed

	@@ -0,0 +1,45 @@

+#!/usr/bin/env python3
+"""
+Test script to verify the Gradio interface works correctly
+"""
+import asyncio
+from research_manager import ResearchManager
+async def test_research_flow():
+    """Test the research flow without Gradio to ensure it works"""
+    manager = ResearchManager()
+    # Test query
+    query = "The impact of AI on healthcare"
+    print("Testing research flow...")
+    print(f"Query: {query}")
+    print("-" * 50)
+    # First run - should return questions
+    print("Step 1: Getting questions...")
+    async for chunk in manager.run(query):
+        if isinstance(chunk, dict) and chunk.get("type") == "questions":
+            print(f"Generated {len(chunk['questions'])} questions:")
+            for q in chunk['questions']:
+                print(f"  {q.number}. {q.question}")
+            break
+        else:
+            print(f"Status: {chunk}")
+    print("\n" + "-" * 50)
+    # Simulate user answers
+    sample_answers = {
+        "1": "I'm interested in diagnostic accuracy improvements",
+        "2": "Looking at the last 5 years of developments",
+        "3": "Focus on both benefits and challenges"
+    }
+    print("Step 2: Running research with sample answers...")
+    async for chunk in manager.run(query, sample_answers):
+        print(f"Status: {chunk}")
+if __name__ == "__main__":
+    asyncio.run(test_research_flow())

writer_agent.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from model import model
+from agents import Agent
+from pydantic import BaseModel, Field
+from guardrail_agent import check_no_code
+INSTRUCTIONS = (
+    "You are a senior researcher tasked with writing a cohesive report for a research query. "
+    "You will be provided with the original query, and some initial research done by a research assistant.\n"
+    "You should first come up with an outline for the report that describes the structure and "
+    "flow of the report. Then, generate the report and return that as your final output.\n"
+    "The final output should be in markdown format, and it should be lengthy and detailed. Aim "
+    "for 5-10 pages of content, at least 1000 words."
+)
+class ReportData(BaseModel):
+    short_summary: str = Field(description="A short 2-3 sentence summary of the findings.")
+    markdown_report: str = Field(description="The final report atleast 1000 words")
+    follow_up_questions: list[str] = Field(description="Suggested topics to research further")
+writer_agent = Agent(
+    name="WriterAgent",
+    instructions=INSTRUCTIONS,
+    model=model,
+    output_type=ReportData
+)