Youhorng commited on
Commit
45d075b
·
verified ·
1 Parent(s): 49e76ba

Upload folder using huggingface_hub

Browse files
.gitignore ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
175
+
176
+ .DS_Store
177
+
178
+ # Ignore Crew engineering team output
179
+ 3_crew/engineering_team/output/
180
+
181
+ # Ignore Accounts database in capstone project
182
+ 6_mcp/accounts.db
183
+ 6_mcp/memory/*.db
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.13
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Deep Research Agent
3
- emoji: 🐨
4
- colorFrom: pink
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 5.47.1
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: deep_research_agent
3
+ app_file: src/deep_research.py
 
 
4
  sdk: gradio
5
+ sdk_version: 5.46.1
 
 
6
  ---
 
 
main.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ def main():
2
+ print("Hello from deep-research-ai-agent!")
3
+
4
+
5
+ if __name__ == "__main__":
6
+ main()
pyproject.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "deep-research-ai-agent"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.13"
7
+ dependencies = [
8
+ "anthropic>=0.68.0",
9
+ "gradio>=5.46.1",
10
+ "httpx>=0.28.1",
11
+ "huggingface-hub[cli]>=0.35.0",
12
+ "ipywidgets>=8.1.7",
13
+ "mailjet-rest>=1.5.1",
14
+ "openai>=1.108.1",
15
+ "openai-agents>=0.3.1",
16
+ "python-dotenv>=1.1.1",
17
+ "requests>=2.32.5",
18
+ ]
requirements.txt ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile pyproject.toml -o requirements.txt
3
+ aiofiles==24.1.0
4
+ # via gradio
5
+ annotated-types==0.7.0
6
+ # via pydantic
7
+ anthropic==0.68.0
8
+ # via deep-research-ai-agent (pyproject.toml)
9
+ anyio==4.10.0
10
+ # via
11
+ # anthropic
12
+ # gradio
13
+ # httpx
14
+ # mcp
15
+ # openai
16
+ # sse-starlette
17
+ # starlette
18
+ asttokens==3.0.0
19
+ # via stack-data
20
+ attrs==25.3.0
21
+ # via
22
+ # jsonschema
23
+ # referencing
24
+ audioop-lts==0.2.2
25
+ # via gradio
26
+ brotli==1.1.0
27
+ # via gradio
28
+ certifi==2025.8.3
29
+ # via
30
+ # httpcore
31
+ # httpx
32
+ # requests
33
+ charset-normalizer==3.4.3
34
+ # via requests
35
+ click==8.3.0
36
+ # via
37
+ # typer
38
+ # uvicorn
39
+ colorama==0.4.6
40
+ # via griffe
41
+ comm==0.2.3
42
+ # via ipywidgets
43
+ decorator==5.2.1
44
+ # via ipython
45
+ distro==1.9.0
46
+ # via
47
+ # anthropic
48
+ # openai
49
+ docstring-parser==0.17.0
50
+ # via anthropic
51
+ executing==2.2.1
52
+ # via stack-data
53
+ fastapi==0.117.1
54
+ # via gradio
55
+ ffmpy==0.6.1
56
+ # via gradio
57
+ filelock==3.19.1
58
+ # via huggingface-hub
59
+ fsspec==2025.9.0
60
+ # via
61
+ # gradio-client
62
+ # huggingface-hub
63
+ gradio==5.46.1
64
+ # via deep-research-ai-agent (pyproject.toml)
65
+ gradio-client==1.13.1
66
+ # via gradio
67
+ griffe==1.14.0
68
+ # via openai-agents
69
+ groovy==0.1.2
70
+ # via gradio
71
+ h11==0.16.0
72
+ # via
73
+ # httpcore
74
+ # uvicorn
75
+ hf-xet==1.1.10
76
+ # via huggingface-hub
77
+ httpcore==1.0.9
78
+ # via httpx
79
+ httpx==0.28.1
80
+ # via
81
+ # deep-research-ai-agent (pyproject.toml)
82
+ # anthropic
83
+ # gradio
84
+ # gradio-client
85
+ # mcp
86
+ # openai
87
+ # safehttpx
88
+ httpx-sse==0.4.1
89
+ # via mcp
90
+ huggingface-hub==0.35.0
91
+ # via
92
+ # deep-research-ai-agent (pyproject.toml)
93
+ # gradio
94
+ # gradio-client
95
+ idna==3.10
96
+ # via
97
+ # anyio
98
+ # httpx
99
+ # requests
100
+ inquirerpy==0.3.4
101
+ # via huggingface-hub
102
+ ipython==9.5.0
103
+ # via ipywidgets
104
+ ipython-pygments-lexers==1.1.1
105
+ # via ipython
106
+ ipywidgets==8.1.7
107
+ # via deep-research-ai-agent (pyproject.toml)
108
+ jedi==0.19.2
109
+ # via ipython
110
+ jinja2==3.1.6
111
+ # via gradio
112
+ jiter==0.11.0
113
+ # via
114
+ # anthropic
115
+ # openai
116
+ jsonschema==4.25.1
117
+ # via mcp
118
+ jsonschema-specifications==2025.9.1
119
+ # via jsonschema
120
+ jupyterlab-widgets==3.0.15
121
+ # via ipywidgets
122
+ mailjet-rest==1.5.1
123
+ # via deep-research-ai-agent (pyproject.toml)
124
+ markdown-it-py==4.0.0
125
+ # via rich
126
+ markupsafe==3.0.2
127
+ # via
128
+ # gradio
129
+ # jinja2
130
+ matplotlib-inline==0.1.7
131
+ # via ipython
132
+ mcp==1.14.1
133
+ # via openai-agents
134
+ mdurl==0.1.2
135
+ # via markdown-it-py
136
+ numpy==2.3.3
137
+ # via
138
+ # gradio
139
+ # pandas
140
+ openai==1.108.1
141
+ # via
142
+ # deep-research-ai-agent (pyproject.toml)
143
+ # openai-agents
144
+ openai-agents==0.3.1
145
+ # via deep-research-ai-agent (pyproject.toml)
146
+ orjson==3.11.3
147
+ # via gradio
148
+ packaging==25.0
149
+ # via
150
+ # gradio
151
+ # gradio-client
152
+ # huggingface-hub
153
+ pandas==2.3.2
154
+ # via gradio
155
+ parso==0.8.5
156
+ # via jedi
157
+ pexpect==4.9.0
158
+ # via ipython
159
+ pfzy==0.3.4
160
+ # via inquirerpy
161
+ pillow==11.3.0
162
+ # via gradio
163
+ prompt-toolkit==3.0.52
164
+ # via
165
+ # inquirerpy
166
+ # ipython
167
+ ptyprocess==0.7.0
168
+ # via pexpect
169
+ pure-eval==0.2.3
170
+ # via stack-data
171
+ pydantic==2.11.9
172
+ # via
173
+ # anthropic
174
+ # fastapi
175
+ # gradio
176
+ # mcp
177
+ # openai
178
+ # openai-agents
179
+ # pydantic-settings
180
+ pydantic-core==2.33.2
181
+ # via pydantic
182
+ pydantic-settings==2.10.1
183
+ # via mcp
184
+ pydub==0.25.1
185
+ # via gradio
186
+ pygments==2.19.2
187
+ # via
188
+ # ipython
189
+ # ipython-pygments-lexers
190
+ # rich
191
+ python-dateutil==2.9.0.post0
192
+ # via pandas
193
+ python-dotenv==1.1.1
194
+ # via
195
+ # deep-research-ai-agent (pyproject.toml)
196
+ # pydantic-settings
197
+ python-multipart==0.0.20
198
+ # via
199
+ # gradio
200
+ # mcp
201
+ pytz==2025.2
202
+ # via pandas
203
+ pyyaml==6.0.2
204
+ # via
205
+ # gradio
206
+ # huggingface-hub
207
+ referencing==0.36.2
208
+ # via
209
+ # jsonschema
210
+ # jsonschema-specifications
211
+ requests==2.32.5
212
+ # via
213
+ # deep-research-ai-agent (pyproject.toml)
214
+ # huggingface-hub
215
+ # mailjet-rest
216
+ # openai-agents
217
+ rich==14.1.0
218
+ # via typer
219
+ rpds-py==0.27.1
220
+ # via
221
+ # jsonschema
222
+ # referencing
223
+ ruff==0.13.1
224
+ # via gradio
225
+ safehttpx==0.1.6
226
+ # via gradio
227
+ semantic-version==2.10.0
228
+ # via gradio
229
+ shellingham==1.5.4
230
+ # via typer
231
+ six==1.17.0
232
+ # via python-dateutil
233
+ sniffio==1.3.1
234
+ # via
235
+ # anthropic
236
+ # anyio
237
+ # openai
238
+ sse-starlette==3.0.2
239
+ # via mcp
240
+ stack-data==0.6.3
241
+ # via ipython
242
+ starlette==0.48.0
243
+ # via
244
+ # fastapi
245
+ # gradio
246
+ # mcp
247
+ tomlkit==0.13.3
248
+ # via gradio
249
+ tqdm==4.67.1
250
+ # via
251
+ # huggingface-hub
252
+ # openai
253
+ traitlets==5.14.3
254
+ # via
255
+ # ipython
256
+ # ipywidgets
257
+ # matplotlib-inline
258
+ typer==0.19.1
259
+ # via gradio
260
+ types-requests==2.32.4.20250913
261
+ # via openai-agents
262
+ typing-extensions==4.15.0
263
+ # via
264
+ # anthropic
265
+ # fastapi
266
+ # gradio
267
+ # gradio-client
268
+ # huggingface-hub
269
+ # openai
270
+ # openai-agents
271
+ # pydantic
272
+ # pydantic-core
273
+ # typer
274
+ # typing-inspection
275
+ typing-inspection==0.4.1
276
+ # via
277
+ # pydantic
278
+ # pydantic-settings
279
+ tzdata==2025.2
280
+ # via pandas
281
+ urllib3==2.5.0
282
+ # via
283
+ # requests
284
+ # types-requests
285
+ uvicorn==0.36.0
286
+ # via
287
+ # gradio
288
+ # mcp
289
+ wcwidth==0.2.13
290
+ # via prompt-toolkit
291
+ websockets==15.0.1
292
+ # via gradio-client
293
+ widgetsnbextension==4.0.14
294
+ # via ipywidgets
src/clarifier_agent.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import libraries
2
+ from agents import Agent
3
+ from pydantic import BaseModel, Field
4
+ from typing import List
5
+
6
+ # Define pydantic model to store questions for classifier agent
7
+ class ClassifyingQuestions(BaseModel):
8
+ questions: List[str] = Field(description="Three classifying questions to better understand the user's query.")
9
+
10
+ # Define instructions for the classifier agent
11
+ CLASSIFIER_INSTRUCTIONS = (
12
+ "You are a research assistant. Your task is to ask 3 clarifying questions that help refine and understand "
13
+ "a research query better. After the user answers them, hand off control to the Research Coordinator to perform the full research.\n\n"
14
+ "Return your response in this exact format:\n"
15
+ "Question 1: [your first question]\n"
16
+ "Question 2: [your second question]\n"
17
+ "Question 3: [your third question]\n\n"
18
+ "Do not use any markdown formatting, bullet points, or numbering other than the format shown above. "
19
+ "Keep each question concise and focused on clarifying the research scope, methodology, or specific aspects of the query."
20
+ )
21
+ # Create the classifier_agent
22
+ clarifier_agent = Agent(
23
+ name="Classifier Agent",
24
+ instructions=CLASSIFIER_INSTRUCTIONS,
25
+ output_type=ClassifyingQuestions,
26
+ model="gpt-4o-mini"
27
+ )
src/deep_research.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import libraries
2
+ import gradio as gr
3
+ import logging
4
+ import time
5
+ from datetime import datetime
6
+ from collections import defaultdict
7
+ from typing import Optional, List, Tuple
8
+ from dotenv import load_dotenv
9
+
10
+ from clarifier_agent import clarifier_agent
11
+ from research_manager import ResearchManager
12
+ from agents import Runner
13
+
14
+
15
+ # Load environment variables
16
+ load_dotenv(override=True)
17
+
18
+ # Setup logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ # Create RateLimiter class to manage user session within the deep research app
24
+ class RateLimiter:
25
+
26
+ def __init__(self, requests_per_minute: int = 2, daily_limit: int = 4):
27
+ self.requests_per_minute = requests_per_minute
28
+ self.daily_limit = daily_limit
29
+
30
+ # Track request timestamps and daily counts
31
+ self.request_time = defaultdict(list)
32
+ self.daily_counts = defaultdict(lambda: {"date": "", "count": 0})
33
+
34
+
35
+ # Get today's date
36
+ def get_today(self) -> str:
37
+ return datetime.now().strftime("%Y-%m-%d")
38
+
39
+
40
+ # Remove requests older than 1 minute
41
+ def cleanup_old_requests(self, user_id: str) -> None:
42
+ now = time.time()
43
+ self.request_time[user_id] = [
44
+ timestamp for timestamp in self.request_time[user_id]
45
+ if now - timestamp < 60
46
+ ]
47
+
48
+
49
+ # Check if user can make a new request
50
+ def check_limits(self, user_id: str) -> Tuple[bool, str]:
51
+ # Clean up the old requests
52
+ self.cleanup_old_requests(user_id)
53
+
54
+ # Check limit of request per minute
55
+ recent_requests = len(self.request_time[user_id])
56
+ if recent_requests >= self.requests_per_minute:
57
+ return False, f"Rate limit exceeded: Max {self.requests_per_minute} requests per minute."
58
+
59
+ # Check daily limit
60
+ today = self.get_today()
61
+ user_data = self.daily_counts[user_id]
62
+
63
+ if user_data["date"] != today:
64
+ user_data["date"] = today
65
+ user_data["count"] = 0
66
+
67
+ if user_data["count"] >= self.daily_limit:
68
+ return False, f"Daily limit exceeded: Max {self.daily_limit} requests per day."
69
+
70
+ # Record if new day
71
+ self.request_time[user_id].append(time.time())
72
+ user_data["count"] += 1
73
+
74
+ return True, "OK"
75
+
76
+
77
+ # Create global rate limiter
78
+ rate_limiter = RateLimiter(requests_per_minute=2, daily_limit=2)
79
+
80
+
81
+ # Define a function to get user_id
82
+ def get_user_id(request: Optional[gr.Request] = None) -> str:
83
+
84
+ if request is None:
85
+ return "anonymous"
86
+
87
+ try:
88
+ forwarded_for = request.headers.get("X-Forwarded-For")
89
+ if forwarded_for:
90
+ return forwarded_for.split(",")[0].strip()
91
+
92
+ client_host = getattr(request, 'host', None)
93
+ if client_host:
94
+ return client_host
95
+ except Exception as e:
96
+ logger.error(f"Error getting user ID: {str(e)}")
97
+
98
+ return "unknown_user"
99
+
100
+
101
+ # Define a function to generate clarifying questions from clarifier_agent
102
+ async def generate_clarification_questions(query: str, request: gr.Request = None) -> List[str]:
103
+
104
+ # Input validation
105
+ if not query or not query.strip():
106
+ return ["Please enter a research query first."]
107
+
108
+ # Rate limiting
109
+ user_id = get_user_id(request)
110
+ allowed, message = rate_limiter.check_limits(user_id)
111
+
112
+ if not allowed:
113
+ logger.info(f"Rate limit exceeded for user {user_id}: {message}")
114
+ return f"{message}"
115
+
116
+ try:
117
+ result = await Runner.run(clarifier_agent, input=query.strip())
118
+ questions = result.final_output.questions
119
+
120
+ # Validate the results
121
+ if not questions or len(questions) == 0:
122
+ return ["Could not generate questions. Please try again."]
123
+
124
+ logger.info(f"Generated {len(questions)} questions for user {user_id}")
125
+ return questions
126
+ except Exception as e:
127
+ logger.error(f"Error generating questions for user {user_id}: {str(e)}")
128
+ return ["Error generating questions. Please try again."]
129
+
130
+
131
+ # Define a function to run the full research pipeline
132
+ async def run_deep_research_pipeline(query: str, q1: str, q2: str, q3: str,
133
+ a1: str, a2: str, a3: str,
134
+ send_email: bool, recipient_email: str,
135
+ request: gr.Request = None):
136
+
137
+ # Input validation
138
+ if not query or not query.strip():
139
+ yield "❌ Please enter a research query first."
140
+ return
141
+
142
+ # Validate email
143
+ if send_email and not recipient_email:
144
+ yield "❌ Please enter a recipient email to send the report."
145
+ return
146
+
147
+ # Rate limiting
148
+ user_id = get_user_id(request)
149
+ allowed, message = rate_limiter.check_limits(user_id)
150
+
151
+ if not allowed:
152
+ yield f"❌ {message}"
153
+ return
154
+
155
+ # Collect questions and answers for research
156
+ questions = [q1.strip(), q2.strip(), q3.strip()]
157
+ answers = [a1.strip(), a2.strip(), a3.strip()]
158
+
159
+ # Keep only non-empty pairs
160
+ valid_pairs = [(q, a) for q, a in zip(questions, answers) if q and a]
161
+
162
+ # Run the research manager agent
163
+ research_manager = ResearchManager()
164
+
165
+ try:
166
+ valid_questions = [q for q, a in valid_pairs]
167
+ valid_answers = [a for q, a in valid_pairs]
168
+
169
+ logger.info(f"Starting research for user {user_id} with {len(valid_questions)} question-answer pairs")
170
+
171
+ async for step in research_manager.run_pipeline(
172
+ query,
173
+ questions,
174
+ answers,
175
+ send_email,
176
+ recipient_email
177
+ ):
178
+ yield step
179
+ except Exception as e:
180
+ logger.error(f"Error during research for user {user_id}: {str(e)}")
181
+ yield f"❌ Error during research: {str(e)}"
182
+ return
183
+
184
+
185
+ # Define a function for gradio ui
186
+ def create_ui() -> gr.Blocks:
187
+
188
+ with gr.Blocks(
189
+ theme=gr.themes.Default(primary_hue="blue"),
190
+ title="Deep Research Assistant"
191
+ ) as interface:
192
+
193
+ # Header
194
+ gr.Markdown("# 🔍 Deep Research Agent")
195
+ gr.Markdown("**Step 1:** Enter query → **Step 2:** Answer questions → **Step 3:** Get research report")
196
+
197
+ # Input section
198
+ with gr.Group():
199
+ query_input = gr.Textbox(
200
+ label = "What would you like to reserach?",
201
+ placeholder="Enter your research question here...",
202
+ lines=2
203
+ )
204
+
205
+ generate_btn = gr.Button(
206
+ "Generate Clarifying Questions",
207
+ variant="primary",
208
+ size="lg"
209
+ )
210
+
211
+ # Question section
212
+ with gr.Group():
213
+ gr.Markdown("### 📝 Clarifying Questions")
214
+
215
+ question_1 = gr.Textbox(label="Question 1", interactive=False)
216
+ answer_1 = gr.Textbox(label="Your Answer 1", placeholder="Enter your answer...")
217
+
218
+ question_2 = gr.Textbox(label="Question 2", interactive=False)
219
+ answer_2 = gr.Textbox(label="Your Answer 2", placeholder="Enter your answer...")
220
+
221
+ question_3 = gr.Textbox(label="Question 3", interactive=False)
222
+ answer_3 = gr.Textbox(label="Your Answer 3", placeholder="Enter your answer...")
223
+
224
+ # Email options
225
+ with gr.Group():
226
+ gr.Markdown("### 📧 Email Options")
227
+
228
+ send_email_checkbox = gr.Checkbox(label="Send report via email")
229
+ email_input = gr.Textbox(
230
+ label="Recipient Email",
231
+ placeholder="recipient@example.com",
232
+ visible=False
233
+ )
234
+
235
+ # Action button
236
+ research_btn = gr.Button(
237
+ "🚀 Start Research",
238
+ variant="secondary",
239
+ size="lg"
240
+ )
241
+
242
+ # Results
243
+ with gr.Group():
244
+ gr.Markdown("### 📄 Results")
245
+ results_output = gr.Markdown(
246
+ value="Results will appear here...",
247
+ height=400
248
+ )
249
+
250
+ # Event handlers
251
+ generate_btn.click(
252
+ fn=generate_clarification_questions,
253
+ inputs=[query_input],
254
+ outputs=[question_1, question_2, question_3]
255
+ )
256
+
257
+ send_email_checkbox.change(
258
+ fn=lambda checked: gr.update(visible=checked),
259
+ inputs=[send_email_checkbox],
260
+ outputs=[email_input]
261
+ )
262
+
263
+ research_btn.click(
264
+ fn=run_deep_research_pipeline,
265
+ inputs=[
266
+ query_input,
267
+ question_1, question_2, question_3,
268
+ answer_1, answer_2, answer_3,
269
+ send_email_checkbox, email_input
270
+ ],
271
+ outputs=[results_output]
272
+ )
273
+
274
+ return interface
275
+
276
+
277
+ def main():
278
+ """Main application entry point"""
279
+
280
+ # Setup logging
281
+ logger.info("Starting Deep Research Agent...")
282
+
283
+ # Create and launch UI
284
+ interface = create_ui()
285
+
286
+ # Launch with sensible defaults
287
+ interface.launch(
288
+ server_name="127.0.0.1", # Local access only (secure)
289
+ server_port=7860, # Standard Gradio port
290
+ inbrowser=True, # Open browser automatically
291
+ share=False, # Don't create public link (secure)
292
+ show_error=True, # Show detailed errors in UI
293
+ quiet=False # Show startup logs
294
+ )
295
+
296
+ if __name__ == "__main__":
297
+ main()
src/email_agent.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import libraries
2
+ import os
3
+ import requests
4
+ from mailjet_rest import Client
5
+ from agents import Agent, function_tool
6
+
7
+ # Create function tool to send email
8
+ @function_tool
9
+ def send_email(subject: str, html_body: str, to:str):
10
+ api_key = os.environ['MJ_APIKEY_PUBLIC']
11
+ api_secret = os.environ['MJ_APIKEY_PRIVATE']
12
+
13
+ # Create the mailjet client
14
+ mailjet = Client(auth=(api_key, api_secret), version='v3.1')
15
+
16
+ # Define the payload
17
+ data = {
18
+ 'Messages': [
19
+ {
20
+ "From": {
21
+ "Email": "youhorng.kean@gmail.com"
22
+ },
23
+ "To": [
24
+ {
25
+ "Email": to
26
+ }
27
+ ],
28
+ "Subject": subject,
29
+ "HTMLPart": html_body
30
+ }
31
+ ]
32
+ }
33
+
34
+ # Send the email
35
+ result = mailjet.send.create(data=data)
36
+
37
+ return result.json()
38
+
39
+
40
+ # Define instructions for the email agent
41
+ EMAIL_INSTRUCTIONS = """You are able to send a nicely formatted HTML email based on a detailed report.
42
+ You will be provided with a detailed report and a recipient email. Use your tool to send one email,
43
+ providing the report as HTML with an appropriate subject line."""
44
+
45
+ # Create the email_agent
46
+ email_agent = Agent(
47
+ name="Email Agent",
48
+ instructions=EMAIL_INSTRUCTIONS,
49
+ tools=[send_email],
50
+ model="gpt-4o-mini"
51
+ )
src/planner_agent.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import libraries
2
+ from pydantic import BaseModel, Field
3
+ from typing import List
4
+ from agents import Agent
5
+
6
+ # Define the number of web searches
7
+ HOW_MANY_SEARCHES = 3
8
+
9
+ # Define instructions for the planner agent
10
+ PLANNER_INSTRUCTIONS = f"You are a helpful research assistant. Given a query, come up with a set of web searches \
11
+ to perform to best answer the query. Output {HOW_MANY_SEARCHES} terms to query for."
12
+
13
+
14
+ # Create the pydantic model to store the planned searches
15
+ class WebSearchItem(BaseModel):
16
+ reason: str = Field(description="Your reasoning for why this search is important to the query")
17
+ query: str = Field(description="The search term to use for the web search")
18
+
19
+
20
+ class WebSearchPlan(BaseModel):
21
+ searches: List[WebSearchItem] = Field(description=f"A list of web searches to perform to best answer the query")
22
+
23
+
24
+ # Create the planner_agent
25
+ planner_agent = Agent(
26
+ name="Planner Agent",
27
+ instructions=PLANNER_INSTRUCTIONS,
28
+ model="gpt-4o-mini",
29
+ output_type=WebSearchPlan
30
+ )
src/research_manager.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import libraries
2
+ from agents import Runner, trace, gen_trace_id
3
+ from serach_agent import search_agent
4
+ from writer_agent import writer_agent, ReportData
5
+ from email_agent import email_agent
6
+ from planner_agent import planner_agent, WebSearchItem, WebSearchPlan
7
+ import asyncio
8
+ from typing import Optional, List, Dict
9
+
10
+
11
+ # Define the ResearchManager class
12
+ class ResearchManager():
13
+
14
+ def __init__(self):
15
+ self.stats = {
16
+ "total_searches": 0
17
+ }
18
+
19
+ # Method to run the pipeline
20
+ async def run_pipeline(self, query: str, questions: List[str], answers: List[str], recipient_email: str, send_email: bool = False):
21
+ # Validate the input
22
+ is_valid, error_message = self.validate_input(query, questions, answers)
23
+ if not is_valid:
24
+ yield f"❌ Input validation failed: {error_message}"
25
+ return
26
+
27
+ # Email validation
28
+ if send_email and not recipient_email:
29
+ yield "❌ Email sending requested but no recipient email provided."
30
+ return
31
+
32
+ self.stats["total_searches"] += 1
33
+
34
+ # Execute the research pipeline
35
+ try:
36
+ async for step in self.execute_pipeline_research(query, questions, answers, recipient_email, send_email):
37
+ yield step
38
+ except Exception as e:
39
+ yield f"❌ Research pipeline failed: {str(e)}"
40
+ return
41
+
42
+
43
+ # Method to execute the research
44
+ async def execute_pipeline_research(self, query: str, questions: List[str], answers: List[str], recipient_email: str, send_email: bool = False):
45
+ # Setup tracing
46
+ trace_id = gen_trace_id()
47
+ with trace("Research Pipeline", trace_id=trace_id):
48
+ yield f"Trace: https://platform.openai.com/traces/trace?trace_id={trace_id}"
49
+ async for step in self.run_agents_step(query, questions, answers, recipient_email, send_email):
50
+ yield step
51
+
52
+
53
+ # Method to run each agent in the research pipeline
54
+ async def run_agents_step(self, query: str, questions: List[str], answers: List[str], recipient_email: str, send_email: bool = False):
55
+ # Execute individual pipeline steps
56
+
57
+ # Step 1: Planning
58
+ yield "Planning searches based on clarifications..."
59
+ search_plan = await self.plan_searches(query, questions, answers)
60
+
61
+ # Step 2: Searching
62
+ yield f"Starting {len(search_plan.searches)} searches..."
63
+ search_results = await self.perform_searches(search_plan)
64
+
65
+ # Step 3: Writing Report
66
+ yield "Analyzing search results and writing report..."
67
+ report = await self.write_report(query, search_results)
68
+
69
+ # Step 4: Sending Email (optional)
70
+ if send_email and recipient_email:
71
+ yield f"Sending report to {recipient_email}..."
72
+ await self.send_report_email(report, recipient_email)
73
+ yield f"Report sent to {recipient_email}."
74
+ else:
75
+ yield "Email sending skipped."
76
+
77
+ # Return final report
78
+ yield report.markdown_report
79
+
80
+
81
+ # Method to validate the input
82
+ def validate_input(self, query: str, questions: List[str], answers: List[str]) -> tuple[bool, str]: # Return a tuple of (is_valid, error_message)
83
+ # Validate input parameters
84
+ if not query or not query.strip():
85
+ return False, "Query cannot be empty"
86
+
87
+ if len(questions) != len(answers):
88
+ return False, f"Mismatch: {len(questions)} questions but {len(answers)} answers"
89
+
90
+ # Check for empty items
91
+ for i, (q, a) in enumerate(zip(questions, answers)):
92
+ if not q.strip():
93
+ return False, f"Question {i+1} is empty"
94
+ if not a.strip():
95
+ return False, f"Answer {i+1} is empty"
96
+
97
+ return True, ""
98
+
99
+
100
+ # Method to plan the searches
101
+ async def plan_searches(self, query: str, questions: List[str], answers: List[str]):
102
+ # Build structure prompt for the planner_agent
103
+ clarifying_context = "\n".join(f"Q: {q}\nA: {a}" for q, a in zip(questions, answers))
104
+ final_prompt = f"Query: {query}\n\nClarifications:\n{clarifying_context}"
105
+
106
+ try:
107
+ result = await Runner.run(planner_agent, final_prompt)
108
+ search_plan = result.final_output
109
+
110
+ # Validate the result of search plan
111
+ if not search_plan.searches:
112
+ raise ValueError("Planner agent returned no searches")
113
+
114
+ print(f"Planned Searches: {len(search_plan.searches)} searches")
115
+ return search_plan
116
+ except Exception as e:
117
+ raise Exception(f"Search Planner failed: {str(e)}")
118
+
119
+
120
+ # Method to perform all searches concurrently
121
+ async def perform_searches(self, search_plan: WebSearchPlan) -> List[str]:
122
+ # Define the total number of searches based on the search plan
123
+ num_searches = len(search_plan.searches)
124
+
125
+ # Create tasks for concurrent execution
126
+ tasks = [asyncio.create_task(self.search_web(item)) for item in search_plan.searches]
127
+ results = []
128
+ completed = 0
129
+
130
+ # Gather results as they complete
131
+ for task in asyncio.as_completed(tasks):
132
+ result = await task
133
+ if result is not None:
134
+ results.append(result)
135
+ completed += 1
136
+ print(f"Seraching... {completed}/{num_searches} completed")
137
+ self.stats["total_searches"] += 1
138
+
139
+ print("Finished all searches.")
140
+
141
+ return results
142
+
143
+
144
+ # Method to search the web for a single search item
145
+ async def search_web(self, item: WebSearchItem) -> Optional[str]:
146
+ # Perform single search based on the WebSearchItem (query, reason)
147
+ input_text = f"Search: {item.query}\nReason: {item.reason}"
148
+
149
+ try:
150
+ result = await Runner.run(search_agent, input_text)
151
+ result = result.final_output
152
+ return str(result)
153
+ except Exception as e:
154
+ print(f"Search failed for '{item.query}': {str(e)}")
155
+ return None
156
+
157
+
158
+ # Method to synthesize the report
159
+ async def write_report(self, query: str, search_results: List[str]) -> ReportData:
160
+ # Define input message for the writer agent
161
+ input_text = f"Original query: {query}\n\nSearch Results:\n" + "\n---\n".join(search_results)
162
+
163
+ try:
164
+ result = await Runner.run(writer_agent, input_text)
165
+ report = result.final_output
166
+
167
+ # Validate the result
168
+ if not report.markdown_report or not report.short_summary:
169
+ raise ValueError("Writer agent returned incomplete report")
170
+
171
+ return report
172
+ except Exception as e:
173
+ raise Exception(f"Report Writing failed: {str(e)}")
174
+
175
+
176
+ # Method to send the report via email
177
+ async def send_report_email(self, report: ReportData, recipient_email: str) -> None:
178
+ # Define input message
179
+ input_text = f"""
180
+ Send the following research report as an email:
181
+ To: {recipient_email}
182
+
183
+ Body (HTML):
184
+ {report.markdown_report}
185
+ """
186
+
187
+ try:
188
+ await Runner.run(email_agent, input_text)
189
+ print(f"✅ Email sent to {recipient_email}")
190
+ except Exception as e:
191
+ raise Exception(f"Email sending failed: {str(e)}")
192
+
src/serach_agent.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import libraries
2
+ from agents import (
3
+ Agent,
4
+ WebSearchTool,
5
+ ModelSettings
6
+ )
7
+
8
+ # Define instructions for the search agent
9
+ SEARCH_INSTRUCTIONS = (
10
+ "You are a research assistant. Given a search term, you search the web for that term and "
11
+ "produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 "
12
+ "words. Capture the main points. Write succintly, no need to have complete sentences or good "
13
+ "grammar. This will be consumed by someone synthesizing a report, so its vital you capture the "
14
+ "essence and ignore any fluff. Do not include any additional commentary other than the summary itself."
15
+ )
16
+
17
+ # Create the search_agent
18
+ search_agent = Agent(
19
+ name="Web Searching Agent",
20
+ instructions=SEARCH_INSTRUCTIONS,
21
+ model="gpt-4o-mini",
22
+ tools=[WebSearchTool(search_context_size="low")],
23
+ model_settings=ModelSettings(tool_choice="required")
24
+ )
src/writer_agent.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import libraries
2
+ from pydantic import BaseModel, Field
3
+ from agents import Agent
4
+
5
+ # Define instructions for the writer agent
6
+ WRITER_INSTRUCTIONS = (
7
+ "You are a senior researcher tasked with writing a cohesive report for a research query. "
8
+ "You will be provided with the original query, and some initial research done by a research assistant.\n"
9
+ "You should first come up with an outline for the report that describes the structure and "
10
+ "flow of the report. Then, generate the report and return that as your final output.\n"
11
+ "The final output should be in markdown format, and it should be lengthy and detailed. Aim "
12
+ "for 5-10 pages of content, at least 1000 words."
13
+ )
14
+
15
+ # Create the pydantic model to store the final report
16
+ class ReportData(BaseModel):
17
+ short_summary: str = Field(description="A short 2-3 sentence summary of the findings")
18
+ markdown_report: str = Field(description="The final report")
19
+ follow_up_questions: str = Field(description="Suggested topics to research further")
20
+
21
+ # Create the writer_agent
22
+ writer_agent = Agent(
23
+ name="Report Writing Agent",
24
+ instructions=WRITER_INSTRUCTIONS,
25
+ model="gpt-4o-mini",
26
+ output_type=ReportData,
27
+ )
uv.lock ADDED
The diff for this file is too large to render. See raw diff