Spaces:

Youhorng
/

deep_research_agent

Runtime error

App Files Files Community

Youhorng commited on Sep 26, 2025

Commit

45d075b

verified ·

1 Parent(s): 49e76ba

Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

.gitignore +183 -0
.python-version +1 -0
README.md +3 -9
main.py +6 -0
pyproject.toml +18 -0
requirements.txt +294 -0
src/clarifier_agent.py +27 -0
src/deep_research.py +297 -0
src/email_agent.py +51 -0
src/planner_agent.py +30 -0
src/research_manager.py +192 -0
src/serach_agent.py +24 -0
src/writer_agent.py +27 -0
uv.lock +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,183 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+.DS_Store
+# Ignore Crew engineering team output
+3_crew/engineering_team/output/
+# Ignore Accounts database in capstone project
+6_mcp/accounts.db
+6_mcp/memory/*.db

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.13

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: Deep Research Agent
-emoji: 🐨
-colorFrom: pink
-colorTo: indigo
 sdk: gradio
-sdk_version: 5.47.1
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: deep_research_agent
+app_file: src/deep_research.py
 sdk: gradio
+sdk_version: 5.46.1
 ---

main.py ADDED Viewed

	@@ -0,0 +1,6 @@

+def main():
+    print("Hello from deep-research-ai-agent!")
+if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,18 @@

+[project]
+name = "deep-research-ai-agent"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = [
+    "anthropic>=0.68.0",
+    "gradio>=5.46.1",
+    "httpx>=0.28.1",
+    "huggingface-hub[cli]>=0.35.0",
+    "ipywidgets>=8.1.7",
+    "mailjet-rest>=1.5.1",
+    "openai>=1.108.1",
+    "openai-agents>=0.3.1",
+    "python-dotenv>=1.1.1",
+    "requests>=2.32.5",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,294 @@

+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml -o requirements.txt
+aiofiles==24.1.0
+    # via gradio
+annotated-types==0.7.0
+    # via pydantic
+anthropic==0.68.0
+    # via deep-research-ai-agent (pyproject.toml)
+anyio==4.10.0
+    # via
+    #   anthropic
+    #   gradio
+    #   httpx
+    #   mcp
+    #   openai
+    #   sse-starlette
+    #   starlette
+asttokens==3.0.0
+    # via stack-data
+attrs==25.3.0
+    # via
+    #   jsonschema
+    #   referencing
+audioop-lts==0.2.2
+    # via gradio
+brotli==1.1.0
+    # via gradio
+certifi==2025.8.3
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+charset-normalizer==3.4.3
+    # via requests
+click==8.3.0
+    # via
+    #   typer
+    #   uvicorn
+colorama==0.4.6
+    # via griffe
+comm==0.2.3
+    # via ipywidgets
+decorator==5.2.1
+    # via ipython
+distro==1.9.0
+    # via
+    #   anthropic
+    #   openai
+docstring-parser==0.17.0
+    # via anthropic
+executing==2.2.1
+    # via stack-data
+fastapi==0.117.1
+    # via gradio
+ffmpy==0.6.1
+    # via gradio
+filelock==3.19.1
+    # via huggingface-hub
+fsspec==2025.9.0
+    # via
+    #   gradio-client
+    #   huggingface-hub
+gradio==5.46.1
+    # via deep-research-ai-agent (pyproject.toml)
+gradio-client==1.13.1
+    # via gradio
+griffe==1.14.0
+    # via openai-agents
+groovy==0.1.2
+    # via gradio
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+hf-xet==1.1.10
+    # via huggingface-hub
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via
+    #   deep-research-ai-agent (pyproject.toml)
+    #   anthropic
+    #   gradio
+    #   gradio-client
+    #   mcp
+    #   openai
+    #   safehttpx
+httpx-sse==0.4.1
+    # via mcp
+huggingface-hub==0.35.0
+    # via
+    #   deep-research-ai-agent (pyproject.toml)
+    #   gradio
+    #   gradio-client
+idna==3.10
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+inquirerpy==0.3.4
+    # via huggingface-hub
+ipython==9.5.0
+    # via ipywidgets
+ipython-pygments-lexers==1.1.1
+    # via ipython
+ipywidgets==8.1.7
+    # via deep-research-ai-agent (pyproject.toml)
+jedi==0.19.2
+    # via ipython
+jinja2==3.1.6
+    # via gradio
+jiter==0.11.0
+    # via
+    #   anthropic
+    #   openai
+jsonschema==4.25.1
+    # via mcp
+jsonschema-specifications==2025.9.1
+    # via jsonschema
+jupyterlab-widgets==3.0.15
+    # via ipywidgets
+mailjet-rest==1.5.1
+    # via deep-research-ai-agent (pyproject.toml)
+markdown-it-py==4.0.0
+    # via rich
+markupsafe==3.0.2
+    # via
+    #   gradio
+    #   jinja2
+matplotlib-inline==0.1.7
+    # via ipython
+mcp==1.14.1
+    # via openai-agents
+mdurl==0.1.2
+    # via markdown-it-py
+numpy==2.3.3
+    # via
+    #   gradio
+    #   pandas
+openai==1.108.1
+    # via
+    #   deep-research-ai-agent (pyproject.toml)
+    #   openai-agents
+openai-agents==0.3.1
+    # via deep-research-ai-agent (pyproject.toml)
+orjson==3.11.3
+    # via gradio
+packaging==25.0
+    # via
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+pandas==2.3.2
+    # via gradio
+parso==0.8.5
+    # via jedi
+pexpect==4.9.0
+    # via ipython
+pfzy==0.3.4
+    # via inquirerpy
+pillow==11.3.0
+    # via gradio
+prompt-toolkit==3.0.52
+    # via
+    #   inquirerpy
+    #   ipython
+ptyprocess==0.7.0
+    # via pexpect
+pure-eval==0.2.3
+    # via stack-data
+pydantic==2.11.9
+    # via
+    #   anthropic
+    #   fastapi
+    #   gradio
+    #   mcp
+    #   openai
+    #   openai-agents
+    #   pydantic-settings
+pydantic-core==2.33.2
+    # via pydantic
+pydantic-settings==2.10.1
+    # via mcp
+pydub==0.25.1
+    # via gradio
+pygments==2.19.2
+    # via
+    #   ipython
+    #   ipython-pygments-lexers
+    #   rich
+python-dateutil==2.9.0.post0
+    # via pandas
+python-dotenv==1.1.1
+    # via
+    #   deep-research-ai-agent (pyproject.toml)
+    #   pydantic-settings
+python-multipart==0.0.20
+    # via
+    #   gradio
+    #   mcp
+pytz==2025.2
+    # via pandas
+pyyaml==6.0.2
+    # via
+    #   gradio
+    #   huggingface-hub
+referencing==0.36.2
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+requests==2.32.5
+    # via
+    #   deep-research-ai-agent (pyproject.toml)
+    #   huggingface-hub
+    #   mailjet-rest
+    #   openai-agents
+rich==14.1.0
+    # via typer
+rpds-py==0.27.1
+    # via
+    #   jsonschema
+    #   referencing
+ruff==0.13.1
+    # via gradio
+safehttpx==0.1.6
+    # via gradio
+semantic-version==2.10.0
+    # via gradio
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via python-dateutil
+sniffio==1.3.1
+    # via
+    #   anthropic
+    #   anyio
+    #   openai
+sse-starlette==3.0.2
+    # via mcp
+stack-data==0.6.3
+    # via ipython
+starlette==0.48.0
+    # via
+    #   fastapi
+    #   gradio
+    #   mcp
+tomlkit==0.13.3
+    # via gradio
+tqdm==4.67.1
+    # via
+    #   huggingface-hub
+    #   openai
+traitlets==5.14.3
+    # via
+    #   ipython
+    #   ipywidgets
+    #   matplotlib-inline
+typer==0.19.1
+    # via gradio
+types-requests==2.32.4.20250913
+    # via openai-agents
+typing-extensions==4.15.0
+    # via
+    #   anthropic
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   openai
+    #   openai-agents
+    #   pydantic
+    #   pydantic-core
+    #   typer
+    #   typing-inspection
+typing-inspection==0.4.1
+    # via
+    #   pydantic
+    #   pydantic-settings
+tzdata==2025.2
+    # via pandas
+urllib3==2.5.0
+    # via
+    #   requests
+    #   types-requests
+uvicorn==0.36.0
+    # via
+    #   gradio
+    #   mcp
+wcwidth==0.2.13
+    # via prompt-toolkit
+websockets==15.0.1
+    # via gradio-client
+widgetsnbextension==4.0.14
+    # via ipywidgets

src/clarifier_agent.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# Import libraries
+from agents import Agent
+from pydantic import BaseModel, Field
+from typing import List
+# Define pydantic model to store questions for classifier agent
+class ClassifyingQuestions(BaseModel):
+    questions: List[str] = Field(description="Three classifying questions to better understand the user's query.")
+# Define instructions for the classifier agent
+CLASSIFIER_INSTRUCTIONS = (
+    "You are a research assistant. Your task is to ask 3 clarifying questions that help refine and understand "
+    "a research query better. After the user answers them, hand off control to the Research Coordinator to perform the full research.\n\n"
+    "Return your response in this exact format:\n"
+    "Question 1: [your first question]\n"
+    "Question 2: [your second question]\n"
+    "Question 3: [your third question]\n\n"
+    "Do not use any markdown formatting, bullet points, or numbering other than the format shown above. "
+    "Keep each question concise and focused on clarifying the research scope, methodology, or specific aspects of the query."
+)
+# Create the classifier_agent
+clarifier_agent = Agent(
+    name="Classifier Agent",
+    instructions=CLASSIFIER_INSTRUCTIONS,
+    output_type=ClassifyingQuestions,
+    model="gpt-4o-mini"
+)

src/deep_research.py ADDED Viewed

	@@ -0,0 +1,297 @@

+# Import libraries
+import gradio as gr
+import logging
+import time
+from datetime import datetime
+from collections import defaultdict
+from typing import Optional, List, Tuple
+from dotenv import load_dotenv
+from clarifier_agent import clarifier_agent
+from research_manager import ResearchManager
+from agents import Runner
+# Load environment variables
+load_dotenv(override=True)
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Create RateLimiter class to manage user session within the deep research app
+class RateLimiter:
+    def __init__(self, requests_per_minute: int = 2, daily_limit: int = 4):
+        self.requests_per_minute = requests_per_minute
+        self.daily_limit = daily_limit
+        # Track request timestamps and daily counts
+        self.request_time = defaultdict(list)
+        self.daily_counts = defaultdict(lambda: {"date": "", "count": 0})
+    # Get today's date
+    def get_today(self) -> str:
+        return datetime.now().strftime("%Y-%m-%d")
+    # Remove requests older than 1 minute
+    def cleanup_old_requests(self, user_id: str) -> None:
+        now = time.time()
+        self.request_time[user_id] = [
+            timestamp for timestamp in self.request_time[user_id]
+            if now - timestamp < 60
+        ]
+    # Check if user can make a new request
+    def check_limits(self, user_id: str) -> Tuple[bool, str]:
+        # Clean up the old requests
+        self.cleanup_old_requests(user_id)
+        # Check limit of request per minute
+        recent_requests = len(self.request_time[user_id])
+        if recent_requests >= self.requests_per_minute:
+            return False, f"Rate limit exceeded: Max {self.requests_per_minute} requests per minute."
+        # Check daily limit
+        today = self.get_today()
+        user_data = self.daily_counts[user_id]
+        if user_data["date"] != today:
+            user_data["date"] = today
+            user_data["count"] = 0
+        if user_data["count"] >= self.daily_limit:
+            return False, f"Daily limit exceeded: Max {self.daily_limit} requests per day."
+        # Record if new day
+        self.request_time[user_id].append(time.time())
+        user_data["count"] += 1
+        return True, "OK"
+# Create global rate limiter
+rate_limiter = RateLimiter(requests_per_minute=2, daily_limit=2)
+# Define a function to get user_id
+def get_user_id(request: Optional[gr.Request] = None) -> str:
+    if request is None:
+        return "anonymous"
+    try:
+        forwarded_for = request.headers.get("X-Forwarded-For")
+        if forwarded_for:
+            return forwarded_for.split(",")[0].strip()
+        client_host = getattr(request, 'host', None)
+        if client_host:
+            return client_host
+    except Exception as e:
+        logger.error(f"Error getting user ID: {str(e)}")
+    return "unknown_user"
+# Define a function to generate clarifying questions from clarifier_agent
+async def generate_clarification_questions(query: str, request: gr.Request = None) -> List[str]:
+    # Input validation
+    if not query or not query.strip():
+        return ["Please enter a research query first."]
+    # Rate limiting
+    user_id = get_user_id(request)
+    allowed, message = rate_limiter.check_limits(user_id)
+    if not allowed:
+        logger.info(f"Rate limit exceeded for user {user_id}: {message}")
+        return f"{message}"
+    try:
+        result = await Runner.run(clarifier_agent, input=query.strip())
+        questions = result.final_output.questions
+        # Validate the results
+        if not questions or len(questions) == 0:
+            return ["Could not generate questions. Please try again."]
+        logger.info(f"Generated {len(questions)} questions for user {user_id}")
+        return questions
+    except Exception as e:
+        logger.error(f"Error generating questions for user {user_id}: {str(e)}")
+        return ["Error generating questions. Please try again."]
+# Define a function to run the full research pipeline
+async def run_deep_research_pipeline(query: str, q1: str, q2: str, q3: str,
+                                     a1: str, a2: str, a3: str,
+                                     send_email: bool, recipient_email: str,
+                                     request: gr.Request = None):
+    # Input validation
+    if not query or not query.strip():
+        yield "❌ Please enter a research query first."
+        return
+    # Validate email
+    if send_email and not recipient_email:
+        yield "❌ Please enter a recipient email to send the report."
+        return
+    # Rate limiting
+    user_id = get_user_id(request)
+    allowed, message = rate_limiter.check_limits(user_id)
+    if not allowed:
+        yield f"❌ {message}"
+        return
+    # Collect questions and answers for research
+    questions = [q1.strip(), q2.strip(), q3.strip()]
+    answers = [a1.strip(), a2.strip(), a3.strip()]
+    # Keep only non-empty pairs
+    valid_pairs = [(q, a) for q, a in zip(questions, answers) if q and a]
+    # Run the research manager agent
+    research_manager = ResearchManager()
+    try:
+        valid_questions = [q for q, a in valid_pairs]
+        valid_answers = [a for q, a in valid_pairs]
+        logger.info(f"Starting research for user {user_id} with {len(valid_questions)} question-answer pairs")
+        async for step in research_manager.run_pipeline(
+            query,
+            questions,
+            answers,
+            send_email,
+            recipient_email
+        ):
+            yield step
+    except Exception as e:
+        logger.error(f"Error during research for user {user_id}: {str(e)}")
+        yield f"❌ Error during research: {str(e)}"
+        return
+# Define a function for gradio ui
+def create_ui() -> gr.Blocks:
+    with gr.Blocks(
+        theme=gr.themes.Default(primary_hue="blue"),
+        title="Deep Research Assistant"
+    ) as interface:
+        # Header
+        gr.Markdown("# 🔍 Deep Research Agent")
+        gr.Markdown("**Step 1:** Enter query → **Step 2:** Answer questions → **Step 3:** Get research report")
+        # Input section
+        with gr.Group():
+            query_input = gr.Textbox(
+                label = "What would you like to reserach?",
+                placeholder="Enter your research question here...",
+                lines=2
+            )
+            generate_btn = gr.Button(
+                "Generate Clarifying Questions",
+                variant="primary",
+                size="lg"
+            )
+        # Question section
+        with gr.Group():
+            gr.Markdown("### 📝 Clarifying Questions")
+            question_1 = gr.Textbox(label="Question 1", interactive=False)
+            answer_1 = gr.Textbox(label="Your Answer 1", placeholder="Enter your answer...")
+            question_2 = gr.Textbox(label="Question 2", interactive=False)
+            answer_2 = gr.Textbox(label="Your Answer 2", placeholder="Enter your answer...")
+            question_3 = gr.Textbox(label="Question 3", interactive=False)
+            answer_3 = gr.Textbox(label="Your Answer 3", placeholder="Enter your answer...")
+        # Email options
+        with gr.Group():
+            gr.Markdown("### 📧 Email Options")
+            send_email_checkbox = gr.Checkbox(label="Send report via email")
+            email_input = gr.Textbox(
+                label="Recipient Email",
+                placeholder="recipient@example.com",
+                visible=False
+            )
+        # Action button
+        research_btn = gr.Button(
+            "🚀 Start Research",
+            variant="secondary",
+            size="lg"
+        )
+        # Results
+        with gr.Group():
+            gr.Markdown("### 📄 Results")
+            results_output = gr.Markdown(
+                value="Results will appear here...",
+                height=400
+            )
+        # Event handlers
+        generate_btn.click(
+            fn=generate_clarification_questions,
+            inputs=[query_input],
+            outputs=[question_1, question_2, question_3]
+        )
+        send_email_checkbox.change(
+            fn=lambda checked: gr.update(visible=checked),
+            inputs=[send_email_checkbox],
+            outputs=[email_input]
+        )
+        research_btn.click(
+            fn=run_deep_research_pipeline,
+            inputs=[
+                query_input,
+                question_1, question_2, question_3,
+                answer_1, answer_2, answer_3,
+                send_email_checkbox, email_input
+            ],
+            outputs=[results_output]
+        )
+    return interface
+def main():
+    """Main application entry point"""
+    # Setup logging
+    logger.info("Starting Deep Research Agent...")
+    # Create and launch UI
+    interface = create_ui()
+    # Launch with sensible defaults
+    interface.launch(
+        server_name="127.0.0.1",  # Local access only (secure)
+        server_port=7860,         # Standard Gradio port
+        inbrowser=True,           # Open browser automatically
+        share=False,              # Don't create public link (secure)
+        show_error=True,          # Show detailed errors in UI
+        quiet=False               # Show startup logs
+    )
+if __name__ == "__main__":
+    main()

src/email_agent.py ADDED Viewed

	@@ -0,0 +1,51 @@

+# Import libraries
+import os
+import requests
+from mailjet_rest import Client
+from agents import Agent, function_tool
+# Create function tool to send email
+@function_tool
+def send_email(subject: str, html_body: str, to:str):
+    api_key = os.environ['MJ_APIKEY_PUBLIC']
+    api_secret = os.environ['MJ_APIKEY_PRIVATE']
+    # Create the mailjet client
+    mailjet = Client(auth=(api_key, api_secret), version='v3.1')
+    # Define the payload
+    data = {
+    'Messages': [
+                    {
+                            "From": {
+                                    "Email": "youhorng.kean@gmail.com"
+                            },
+                            "To": [
+                                    {
+                                            "Email": to
+                                    }
+                            ],
+                            "Subject": subject,
+                            "HTMLPart": html_body
+                    }
+            ]
+    }
+    # Send the email
+    result = mailjet.send.create(data=data)
+    return result.json()
+# Define instructions for the email agent
+EMAIL_INSTRUCTIONS = """You are able to send a nicely formatted HTML email based on a detailed report.
+You will be provided with a detailed report and a recipient email. Use your tool to send one email,
+providing the report as HTML with an appropriate subject line."""
+# Create the email_agent
+email_agent = Agent(
+    name="Email Agent",
+    instructions=EMAIL_INSTRUCTIONS,
+    tools=[send_email],
+    model="gpt-4o-mini"
+)

src/planner_agent.py ADDED Viewed

	@@ -0,0 +1,30 @@

+# Import libraries
+from pydantic import BaseModel, Field
+from typing import List
+from agents import Agent
+# Define the number of web searches
+HOW_MANY_SEARCHES = 3
+# Define instructions for the planner agent
+PLANNER_INSTRUCTIONS = f"You are a helpful research assistant. Given a query, come up with a set of web searches \
+to perform to best answer the query. Output {HOW_MANY_SEARCHES} terms to query for."
+# Create the pydantic model to store the planned searches
+class WebSearchItem(BaseModel):
+    reason: str = Field(description="Your reasoning for why this search is important to the query")
+    query: str = Field(description="The search term to use for the web search")
+class WebSearchPlan(BaseModel):
+    searches: List[WebSearchItem] = Field(description=f"A list of web searches to perform to best answer the query")
+# Create the planner_agent
+planner_agent = Agent(
+    name="Planner Agent",
+    instructions=PLANNER_INSTRUCTIONS,
+    model="gpt-4o-mini",
+    output_type=WebSearchPlan
+)

src/research_manager.py ADDED Viewed

	@@ -0,0 +1,192 @@

+# Import libraries
+from agents import Runner, trace, gen_trace_id
+from serach_agent import search_agent
+from writer_agent import writer_agent, ReportData
+from email_agent import email_agent
+from planner_agent import planner_agent, WebSearchItem, WebSearchPlan
+import asyncio
+from typing import Optional, List, Dict
+# Define the ResearchManager class
+class ResearchManager():
+    def __init__(self):
+        self.stats = {
+            "total_searches": 0
+        }
+    # Method to run the pipeline
+    async def run_pipeline(self, query: str, questions: List[str], answers: List[str], recipient_email: str, send_email: bool = False):
+        # Validate the input
+        is_valid, error_message = self.validate_input(query, questions, answers)
+        if not is_valid:
+            yield f"❌ Input validation failed: {error_message}"
+            return
+        # Email validation
+        if send_email and not recipient_email:
+            yield "❌ Email sending requested but no recipient email provided."
+            return
+        self.stats["total_searches"] += 1
+        # Execute the research pipeline
+        try:
+            async for step in self.execute_pipeline_research(query, questions, answers, recipient_email, send_email):
+                yield step
+        except Exception as e:
+            yield f"❌ Research pipeline failed: {str(e)}"
+            return
+    # Method to execute the research
+    async def execute_pipeline_research(self, query: str, questions: List[str], answers: List[str], recipient_email: str, send_email: bool = False):
+        # Setup tracing
+        trace_id = gen_trace_id()
+        with trace("Research Pipeline", trace_id=trace_id):
+            yield f"Trace: https://platform.openai.com/traces/trace?trace_id={trace_id}"
+            async for step in self.run_agents_step(query, questions, answers, recipient_email, send_email):
+                yield step
+    # Method to run each agent in the research pipeline
+    async def run_agents_step(self, query: str, questions: List[str], answers: List[str], recipient_email: str, send_email: bool = False):
+        # Execute individual pipeline steps
+        # Step 1: Planning
+        yield "Planning searches based on clarifications..."
+        search_plan = await self.plan_searches(query, questions, answers)
+        # Step 2: Searching
+        yield f"Starting {len(search_plan.searches)} searches..."
+        search_results = await self.perform_searches(search_plan)
+        # Step 3: Writing Report
+        yield "Analyzing search results and writing report..."
+        report = await self.write_report(query, search_results)
+        # Step 4: Sending Email (optional)
+        if send_email and recipient_email:
+            yield f"Sending report to {recipient_email}..."
+            await self.send_report_email(report, recipient_email)
+            yield f"Report sent to {recipient_email}."
+        else:
+            yield "Email sending skipped."
+        # Return final report
+        yield report.markdown_report
+    # Method to validate the input
+    def validate_input(self, query: str, questions: List[str], answers: List[str]) -> tuple[bool, str]: # Return a tuple of (is_valid, error_message)
+        # Validate input parameters
+        if not query or not query.strip():
+            return False, "Query cannot be empty"
+        if len(questions) != len(answers):
+            return False, f"Mismatch: {len(questions)} questions but {len(answers)} answers"
+        # Check for empty items
+        for i, (q, a) in enumerate(zip(questions, answers)):
+            if not q.strip():
+                return False, f"Question {i+1} is empty"
+            if not a.strip():
+                return False, f"Answer {i+1} is empty"
+        return True, ""
+    # Method to plan the searches
+    async def plan_searches(self, query: str, questions: List[str], answers: List[str]):
+        # Build structure prompt for the planner_agent
+        clarifying_context = "\n".join(f"Q: {q}\nA: {a}" for q, a in zip(questions, answers))
+        final_prompt = f"Query: {query}\n\nClarifications:\n{clarifying_context}"
+        try:
+            result = await Runner.run(planner_agent, final_prompt)
+            search_plan = result.final_output
+            # Validate the result of search plan
+            if not search_plan.searches:
+                raise ValueError("Planner agent returned no searches")
+            print(f"Planned Searches: {len(search_plan.searches)} searches")
+            return search_plan
+        except Exception as e:
+            raise Exception(f"Search Planner failed: {str(e)}")
+    # Method to perform all searches concurrently
+    async def perform_searches(self, search_plan: WebSearchPlan) -> List[str]:
+        # Define the total number of searches based on the search plan
+        num_searches = len(search_plan.searches)
+        # Create tasks for concurrent execution
+        tasks = [asyncio.create_task(self.search_web(item)) for item in search_plan.searches]
+        results = []
+        completed = 0
+        # Gather results as they complete
+        for task in asyncio.as_completed(tasks):
+            result = await task
+            if result is not None:
+                results.append(result)
+            completed += 1
+            print(f"Seraching... {completed}/{num_searches} completed")
+            self.stats["total_searches"] += 1
+        print("Finished all searches.")
+        return results
+    # Method to search the web for a single search item
+    async def search_web(self, item: WebSearchItem) -> Optional[str]:
+        # Perform single search based on the WebSearchItem (query, reason)
+        input_text = f"Search: {item.query}\nReason: {item.reason}"
+        try:
+            result = await Runner.run(search_agent, input_text)
+            result = result.final_output
+            return str(result)
+        except Exception as e:
+            print(f"Search failed for '{item.query}': {str(e)}")
+        return None
+    # Method to synthesize the report
+    async def write_report(self, query: str, search_results: List[str]) -> ReportData:
+        # Define input message for the writer agent
+        input_text = f"Original query: {query}\n\nSearch Results:\n" + "\n---\n".join(search_results)
+        try:
+            result  = await Runner.run(writer_agent, input_text)
+            report = result.final_output
+            # Validate the result
+            if not report.markdown_report or not report.short_summary:
+                raise ValueError("Writer agent returned incomplete report")
+            return report
+        except Exception as e:
+            raise Exception(f"Report Writing failed: {str(e)}")
+    # Method to send the report via email
+    async def send_report_email(self, report: ReportData, recipient_email: str) -> None:
+        # Define input message
+        input_text = f"""
+            Send the following research report as an email:
+            To: {recipient_email}
+            Body (HTML):
+            {report.markdown_report}
+        """
+        try:
+            await Runner.run(email_agent, input_text)
+            print(f"✅ Email sent to {recipient_email}")
+        except Exception as e:
+            raise Exception(f"Email sending failed: {str(e)}")

src/serach_agent.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# Import libraries
+from agents import (
+    Agent,
+    WebSearchTool,
+    ModelSettings
+)
+# Define instructions for the search agent
+SEARCH_INSTRUCTIONS = (
+    "You are a research assistant. Given a search term, you search the web for that term and "
+    "produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 "
+    "words. Capture the main points. Write succintly, no need to have complete sentences or good "
+    "grammar. This will be consumed by someone synthesizing a report, so its vital you capture the "
+    "essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
+)
+# Create the search_agent
+search_agent = Agent(
+    name="Web Searching Agent",
+    instructions=SEARCH_INSTRUCTIONS,
+    model="gpt-4o-mini",
+    tools=[WebSearchTool(search_context_size="low")],
+    model_settings=ModelSettings(tool_choice="required")
+)

src/writer_agent.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# Import libraries
+from pydantic import BaseModel, Field
+from agents import Agent
+# Define instructions for the writer agent
+WRITER_INSTRUCTIONS = (
+    "You are a senior researcher tasked with writing a cohesive report for a research query. "
+    "You will be provided with the original query, and some initial research done by a research assistant.\n"
+    "You should first come up with an outline for the report that describes the structure and "
+    "flow of the report. Then, generate the report and return that as your final output.\n"
+    "The final output should be in markdown format, and it should be lengthy and detailed. Aim "
+    "for 5-10 pages of content, at least 1000 words."
+)
+# Create the pydantic model to store the final report
+class ReportData(BaseModel):
+    short_summary: str = Field(description="A short 2-3 sentence summary of the findings")
+    markdown_report: str = Field(description="The final report")
+    follow_up_questions: str = Field(description="Suggested topics to research further")
+# Create the writer_agent
+writer_agent = Agent(
+    name="Report Writing Agent",
+    instructions=WRITER_INSTRUCTIONS,
+    model="gpt-4o-mini",
+    output_type=ReportData,
+)

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff