facemelter commited on
Commit
128f5d1
Β·
verified Β·
1 Parent(s): 6c62235

Added audio_finder subagent to Specialist supervisor

Browse files
app.py CHANGED
@@ -31,7 +31,9 @@ PHOTO_EXAMPLES = [
31
  MULTI_AGENT_TEXT_EXAMPLES = [
32
  "Tell me about Northern Cardinals - show me images and audio",
33
  "What birds are in the Cardinalidae family?",
34
- "Show me species with endangered status"
 
 
35
  ]
36
 
37
  # Text-only examples for Audio Finder Agent mode
@@ -1026,14 +1028,15 @@ def update_text_examples_for_mode(mode):
1026
  """Return appropriate text example dataset based on agent mode."""
1027
  print(f"[DEBUG] Updating text examples for mode: {mode}")
1028
 
1029
- if mode == "Audio Finder Agent":
1030
- # Audio text examples
1031
- samples = [[text] for text in AUDIO_FINDER_TEXT_EXAMPLES]
1032
- print(f"[DEBUG] Audio Finder text samples: {len(samples)} examples")
1033
- else: # Specialized Subagents (3 Specialists)
1034
- # Multi-agent text examples
1035
- samples = [[text] for text in MULTI_AGENT_TEXT_EXAMPLES]
1036
- print(f"[DEBUG] Multi-agent text samples: {len(samples)} examples")
 
1037
 
1038
  return gr.Dataset(samples=samples)
1039
 
@@ -1194,7 +1197,6 @@ with gr.Blocks() as demo:
1194
  <div style="display: flex; align-items: baseline; gap: 0.5rem;">
1195
  <h1>BirdScope</h1>
1196
  <span class="header-ai-text">AI</span>
1197
- <span class="header-v2-badge">v2</span>
1198
  </div>
1199
  <p class="header-subtitle">AI-powered bird identification & species reference</p>
1200
  </div>
@@ -1486,8 +1488,7 @@ with gr.Blocks() as demo:
1486
  gr.Markdown("Choose between unified agent or specialized routing")
1487
  agent_mode = gr.Dropdown(
1488
  choices=[
1489
- "Specialized Subagents (3 Specialists)",
1490
- "Audio Finder Agent" # Changed from "Single Agent (All Tools)"
1491
  ],
1492
  value="Specialized Subagents (3 Specialists)",
1493
  show_label=False,
@@ -1592,12 +1593,9 @@ with gr.Blocks() as demo:
1592
  - Audio recordings (xeno-canto)
1593
  - Conservation status data
1594
  - Taxonomic exploration
1595
-
1596
- **v2 Features:**
1597
  - Separate tool log panel
1598
  - Detailed execution tracking
1599
  - Tool input/output inspection
1600
- - Perfect for debugging!
1601
  """)
1602
 
1603
  # State for tool log
 
31
  MULTI_AGENT_TEXT_EXAMPLES = [
32
  "Tell me about Northern Cardinals - show me images and audio",
33
  "What birds are in the Cardinalidae family?",
34
+ "Show me species with endangered status",
35
+ "Find me audio recordings for Snow Goose",
36
+ "Get me bird call samples for any two species"
37
  ]
38
 
39
  # Text-only examples for Audio Finder Agent mode
 
1028
  """Return appropriate text example dataset based on agent mode."""
1029
  print(f"[DEBUG] Updating text examples for mode: {mode}")
1030
 
1031
+ # Placeholder for future mode-specific examples
1032
+ # if mode == "Future Mode Name":
1033
+ # samples = [[text] for text in FUTURE_MODE_EXAMPLES]
1034
+ # print(f"[DEBUG] Future mode text samples: {len(samples)} examples")
1035
+ # else:
1036
+
1037
+ # Default: Specialized Subagents (3 Specialists) - includes image ID, taxonomy, and audio finder
1038
+ samples = [[text] for text in MULTI_AGENT_TEXT_EXAMPLES]
1039
+ print(f"[DEBUG] Multi-agent text samples: {len(samples)} examples")
1040
 
1041
  return gr.Dataset(samples=samples)
1042
 
 
1197
  <div style="display: flex; align-items: baseline; gap: 0.5rem;">
1198
  <h1>BirdScope</h1>
1199
  <span class="header-ai-text">AI</span>
 
1200
  </div>
1201
  <p class="header-subtitle">AI-powered bird identification & species reference</p>
1202
  </div>
 
1488
  gr.Markdown("Choose between unified agent or specialized routing")
1489
  agent_mode = gr.Dropdown(
1490
  choices=[
1491
+ "Specialized Subagents (3 Specialists)"
 
1492
  ],
1493
  value="Specialized Subagents (3 Specialists)",
1494
  show_label=False,
 
1593
  - Audio recordings (xeno-canto)
1594
  - Conservation status data
1595
  - Taxonomic exploration
 
 
1596
  - Separate tool log panel
1597
  - Detailed execution tracking
1598
  - Tool input/output inspection
 
1599
  """)
1600
 
1601
  # State for tool log
docs/dev/agents_config-README.md ADDED
@@ -0,0 +1,595 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Agent Configuration Guide
2
+
3
+ **Complete guide for adding, removing, and modifying agents/subagents in BirdScope AI**
4
+
5
+ ---
6
+
7
+ ## πŸ“‹ Table of Contents
8
+
9
+ 1. [Architecture Overview](#architecture-overview)
10
+ 2. [Key Files Reference](#key-files-reference)
11
+ 3. [Adding a New Subagent](#adding-a-new-subagent)
12
+ 4. [Removing a Subagent](#removing-a-subagent)
13
+ 5. [Modifying Existing Subagents](#modifying-existing-subagents)
14
+ 6. [App.py Integration Points](#apppy-integration-points)
15
+ 7. [Testing Your Changes](#testing-your-changes)
16
+
17
+ ---
18
+
19
+ ## Architecture Overview
20
+
21
+ BirdScope AI uses a **LangGraph supervisor pattern** with specialized subagents:
22
+
23
+ ```
24
+ User Request
25
+ ↓
26
+ Supervisor (Router)
27
+ ↓
28
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
29
+ β”‚ image_identifier β”‚ taxonomy_specialist β”‚ (other agents) β”‚
30
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
31
+ ```
32
+
33
+ **Key Concepts:**
34
+ - **Supervisor**: LLM-based router that delegates tasks to specialists
35
+ - **Subagents**: Specialized agents with filtered tool access and focused prompts
36
+ - **Modes**: Different agent configurations (e.g., "Specialized Subagents (2 Specialists)", "Audio Finder Agent")
37
+ - **Tool Filtering**: Each subagent only has access to relevant tools
38
+
39
+ ---
40
+
41
+ ## Key Files Reference
42
+
43
+ ### Core Agent Files
44
+
45
+ | File | Purpose | What to Change |
46
+ |------|---------|----------------|
47
+ | `langgraph_agent/subagent_supervisor.py` | Creates supervisor workflow | Add/remove agents from supervisor list |
48
+ | `langgraph_agent/subagent_config.py` | Defines subagent configurations | Add/remove/modify subagent definitions and modes |
49
+ | `langgraph_agent/subagent_factory.py` | Builds subagent instances | (Usually no changes needed) |
50
+ | `langgraph_agent/prompts.py` | System prompts for agents | Add provider-specific prompts |
51
+
52
+ ### UI Integration
53
+
54
+ | File | Purpose | What to Change |
55
+ |------|---------|----------------|
56
+ | `app.py` | Gradio UI and agent orchestration | Update mode dropdown, default values, examples |
57
+
58
+ ---
59
+
60
+ ## Adding a New Subagent
61
+
62
+ ### Step 1: Define Subagent Configuration
63
+
64
+ **File:** `langgraph_agent/subagent_config.py`
65
+
66
+ Add your subagent to `get_subagent_definitions()`:
67
+
68
+ ```python
69
+ @staticmethod
70
+ def get_subagent_definitions(provider: str = "openai") -> Dict[str, Dict]:
71
+ return {
72
+ # ... existing agents ...
73
+
74
+ "my_new_agent": {
75
+ "name": "My New Specialist",
76
+ "description": "Expert at specific bird-related tasks",
77
+ "tools": [
78
+ "tool_name_1",
79
+ "tool_name_2",
80
+ "tool_name_3"
81
+ ],
82
+ "prompt": get_prompt("my_new_agent", provider) or """You are a My New Specialist.
83
+
84
+ **Your Role:**
85
+ 1. Primary responsibility
86
+ 2. Secondary responsibility
87
+ 3. When to use specific tools
88
+
89
+ **Response Style:**
90
+ - How to format responses
91
+ - What to emphasize
92
+
93
+ **When to defer:**
94
+ - Task type 1 -> other_agent_name
95
+ - Task type 2 -> another_agent_name
96
+ """,
97
+ "temperature": AgentConfig.OPENAI_TEMPERATURE,
98
+ }
99
+ }
100
+ ```
101
+
102
+ ### Step 2: Create System Prompts
103
+
104
+ **File:** `langgraph_agent/prompts.py`
105
+
106
+ Add prompts for your new agent:
107
+
108
+ ```python
109
+ # Default prompt (used by OpenAI/Anthropic)
110
+ MY_NEW_AGENT_PROMPT = """Detailed prompt for your agent..."""
111
+
112
+ # HuggingFace-optimized prompt (more explicit, step-by-step)
113
+ MY_NEW_AGENT_PROMPT_HF = """Simplified, step-by-step prompt..."""
114
+
115
+ # Add to PROMPTS dictionary
116
+ PROMPTS = {
117
+ # ... existing prompts ...
118
+ "my_new_agent": {
119
+ "default": MY_NEW_AGENT_PROMPT,
120
+ "huggingface": MY_NEW_AGENT_PROMPT_HF,
121
+ },
122
+ }
123
+ ```
124
+
125
+ ### Step 3: Add to Supervisor Workflow
126
+
127
+ **File:** `langgraph_agent/subagent_supervisor.py`
128
+
129
+ ```python
130
+ async def create_supervisor_workflow(all_tools, llm, provider="openai"):
131
+ # Create existing agents
132
+ image_agent = await SubAgentFactory.create_subagent(
133
+ "image_identifier", all_tools, llm, provider=provider
134
+ )
135
+ # ... other agents ...
136
+
137
+ # Add your new agent
138
+ my_new_agent = await SubAgentFactory.create_subagent(
139
+ "my_new_agent", all_tools, llm, provider=provider
140
+ )
141
+
142
+ # Add to supervisor list
143
+ workflow = create_supervisor(
144
+ [image_agent, taxonomy_agent, my_new_agent], # Add here
145
+ model=llm,
146
+ prompt=SubAgentConfig.get_router_prompt(provider=provider)
147
+ )
148
+ ```
149
+
150
+ ### Step 4: Update Router Prompts
151
+
152
+ **File:** `langgraph_agent/subagent_config.py`
153
+
154
+ Update `get_router_prompt()`:
155
+
156
+ ```python
157
+ return """You are BirdScope AI Supervisor...
158
+
159
+ **Your Team:**
160
+ - **image_identifier**: Identifies birds from photos...
161
+ - **taxonomy_specialist**: Conservation status, families...
162
+ - **my_new_agent**: Specific tasks for my new agent # Add this
163
+
164
+ **Routing Guidelines:**
165
+ 1. **Image uploads/URLs** β†’ image_identifier
166
+ 2. **Conservation queries** β†’ taxonomy_specialist
167
+ 3. **New task type** β†’ my_new_agent # Add this
168
+ ```
169
+
170
+ Also update `prompts.py` for HuggingFace router:
171
+
172
+ ```python
173
+ ROUTER_PROMPT_HF = """...
174
+ **Specialists:**
175
+ - image_identifier: ...
176
+ - taxonomy_specialist: ...
177
+ - my_new_agent: New task handling # Add this
178
+
179
+ **Routing Rules:**
180
+ ...
181
+ 6. "New task keyword" β†’ my_new_agent # Add this
182
+ """
183
+ ```
184
+
185
+ ### Step 5: Update Mode Definition
186
+
187
+ **File:** `langgraph_agent/subagent_config.py`
188
+
189
+ Update `get_mode_definitions()`:
190
+
191
+ ```python
192
+ return {
193
+ "Specialized Subagents (3 Specialists)": { # Update count
194
+ "description": "Router orchestrates 3 specialized agents",
195
+ "subagents": ["image_identifier", "taxonomy_specialist", "my_new_agent"], # Add here
196
+ "use_router": True
197
+ },
198
+ }
199
+ ```
200
+
201
+ ### Step 6: Integrate with app.py
202
+
203
+ See [App.py Integration Points](#apppy-integration-points) below.
204
+
205
+ ---
206
+
207
+ ## Removing a Subagent
208
+
209
+ **Example: Removing `species_explorer` from the supervisor**
210
+
211
+ ### Step 1: Remove from Supervisor Workflow
212
+
213
+ **File:** `langgraph_agent/subagent_supervisor.py`
214
+
215
+ ```python
216
+ async def create_supervisor_workflow(all_tools, llm, provider="openai"):
217
+ # Remove agent creation
218
+ # species_agent = await SubAgentFactory.create_subagent(...) # DELETE
219
+
220
+ # Remove from supervisor list
221
+ workflow = create_supervisor(
222
+ [image_agent, taxonomy_agent], # Remove species_agent
223
+ model=llm,
224
+ prompt=SubAgentConfig.get_router_prompt(provider=provider)
225
+ )
226
+ ```
227
+
228
+ ### Step 2: Update Mode Definition
229
+
230
+ **File:** `langgraph_agent/subagent_config.py`
231
+
232
+ ```python
233
+ return {
234
+ "Specialized Subagents (2 Specialists)": { # Update count
235
+ "description": "Router orchestrates 2 specialized agents",
236
+ "subagents": ["image_identifier", "taxonomy_specialist"], # Remove agent
237
+ "use_router": True
238
+ },
239
+ }
240
+ ```
241
+
242
+ ### Step 3: Update Router Prompts
243
+
244
+ **File:** `langgraph_agent/subagent_config.py` (default router)
245
+
246
+ ```python
247
+ return """You are BirdScope AI Supervisor...
248
+
249
+ **Your Team:**
250
+ - **image_identifier**: ...
251
+ - **taxonomy_specialist**: ...
252
+ # Remove species_explorer reference
253
+
254
+ **Routing Guidelines:**
255
+ # Remove routing rules for deleted agent
256
+ # Reassign its responsibilities to other agents
257
+ ```
258
+
259
+ **File:** `langgraph_agent/prompts.py` (HuggingFace router)
260
+
261
+ ```python
262
+ ROUTER_PROMPT_HF = """...
263
+ **Specialists:**
264
+ - image_identifier: ...
265
+ - taxonomy_specialist: ...
266
+ # Remove deleted agent
267
+
268
+ **Routing Rules:**
269
+ # Remove routing rules
270
+ # Reassign to remaining agents
271
+ """
272
+ ```
273
+
274
+ ### Step 4: Update "When to defer" Sections
275
+
276
+ **File:** `langgraph_agent/subagent_config.py`
277
+
278
+ Update remaining subagents' prompts:
279
+
280
+ ```python
281
+ "image_identifier": {
282
+ # ...
283
+ "prompt": """...
284
+ **When to defer:**
285
+ - For family/taxonomy queries -> taxonomy_specialist
286
+ # Remove references to deleted agent
287
+ """,
288
+ }
289
+ ```
290
+
291
+ ### Step 5: Update app.py References
292
+
293
+ See [App.py Integration Points](#apppy-integration-points) below.
294
+
295
+ ---
296
+
297
+ ## Modifying Existing Subagents
298
+
299
+ ### Changing Tool Access
300
+
301
+ **File:** `langgraph_agent/subagent_config.py`
302
+
303
+ ```python
304
+ "image_identifier": {
305
+ "tools": [
306
+ "classify_from_url",
307
+ "classify_from_base64",
308
+ "get_bird_info",
309
+ "new_tool_name" # Add new tool
310
+ ],
311
+ }
312
+ ```
313
+
314
+ ### Updating Prompts
315
+
316
+ **File:** `langgraph_agent/subagent_config.py` or `langgraph_agent/prompts.py`
317
+
318
+ ```python
319
+ # For inline prompts (in subagent_config.py)
320
+ "image_identifier": {
321
+ "prompt": get_prompt("image_identifier", provider) or """Updated prompt..."""
322
+ }
323
+
324
+ # For dedicated prompts (in prompts.py)
325
+ IMAGE_IDENTIFIER_PROMPT = """Updated comprehensive prompt..."""
326
+ ```
327
+
328
+ ### Changing Temperature
329
+
330
+ **File:** `langgraph_agent/subagent_config.py`
331
+
332
+ ```python
333
+ "species_explorer": {
334
+ "temperature": 0.2, # More creative (was 0.1)
335
+ }
336
+ ```
337
+
338
+ ---
339
+
340
+ ## App.py Integration Points
341
+
342
+ **When you change agent modes, you MUST update these sections in app.py:**
343
+
344
+ ### 1. Mode Dropdown Choices
345
+
346
+ **Location:** `app.py` ~line 1486-1491
347
+
348
+ ```python
349
+ agent_mode = gr.Dropdown(
350
+ choices=[
351
+ "Specialized Subagents (2 Specialists)", # Update mode name here
352
+ "Audio Finder Agent"
353
+ ],
354
+ value="Specialized Subagents (2 Specialists)", # Update default here
355
+ show_label=False,
356
+ container=False
357
+ )
358
+ ```
359
+
360
+ ### 2. Initial Session Status HTML
361
+
362
+ **Location:** `app.py` ~line 1560
363
+
364
+ ```python
365
+ session_status = gr.HTML(
366
+ value=create_config_html(
367
+ provider_choice="OpenAI",
368
+ agent_mode_choice="Specialized Subagents (2 Specialists)", # Update here
369
+ hf_key_input="",
370
+ openai_key_input="",
371
+ anthropic_key_input=""
372
+ )
373
+ )
374
+ ```
375
+
376
+ ### 3. Health Check Config HTML
377
+
378
+ **Location:** `app.py` ~line 1654
379
+
380
+ ```python
381
+ config_html = create_config_html(
382
+ provider_choice=provider_str,
383
+ agent_mode_choice="Specialized Subagents (2 Specialists)", # Update here
384
+ hf_key_input=hf_key_value,
385
+ openai_key_input=openai_key_input,
386
+ anthropic_key_input=anthropic_key_input
387
+ )
388
+ ```
389
+
390
+ ### 4. Example Loading Logic Comments
391
+
392
+ **Location:** `app.py` ~line 1033
393
+
394
+ ```python
395
+ else: # Specialized Subagents (2 Specialists) # Update comment
396
+ samples = [[text] for text in MULTI_AGENT_TEXT_EXAMPLES]
397
+ ```
398
+
399
+ ### 5. (Optional) Add Mode-Specific Examples
400
+
401
+ **Location:** `app.py` ~line 30-40 (add new example list)
402
+
403
+ ```python
404
+ # Text-only examples for Specialized Subagents mode
405
+ MULTI_AGENT_TEXT_EXAMPLES = [
406
+ "Tell me about Northern Cardinals - show me images and audio",
407
+ "What birds are in the Cardinalidae family?",
408
+ "Show me species with endangered status",
409
+ "Find me audio recordings for Snow Goose",
410
+ "Get me bird call samples for any two species"
411
+ ]
412
+
413
+ # Add examples for your new agent mode
414
+ MY_NEW_AGENT_EXAMPLES = [
415
+ "Example query 1 for new mode",
416
+ "Example query 2 for new mode",
417
+ "Example query 3 for new mode"
418
+ ]
419
+ ```
420
+
421
+ **Location:** `app.py` ~line 1027-1041 (update conditional logic)
422
+
423
+ The function includes a **placeholder for future modes**. Uncomment and customize:
424
+
425
+ ```python
426
+ def update_text_examples_for_mode(mode):
427
+ """Return appropriate text example dataset based on agent mode."""
428
+ print(f"[DEBUG] Updating text examples for mode: {mode}")
429
+
430
+ # Placeholder for future mode-specific examples
431
+ if mode == "My New Agent Mode": # UNCOMMENT and update mode name
432
+ samples = [[text] for text in MY_NEW_AGENT_EXAMPLES]
433
+ print(f"[DEBUG] New mode text samples: {len(samples)} examples")
434
+ # elif mode == "Another Mode": # Add more modes as needed
435
+ # samples = [[text] for text in ANOTHER_MODE_EXAMPLES]
436
+ else: # Default: Specialized Subagents
437
+
438
+ # Default: Specialized Subagents (3 Specialists)
439
+ samples = [[text] for text in MULTI_AGENT_TEXT_EXAMPLES]
440
+ print(f"[DEBUG] Multi-agent text samples: {len(samples)} examples")
441
+
442
+ return gr.Dataset(samples=samples)
443
+ ```
444
+
445
+ **Why keep the conditional?** Even with only one mode, we maintain the placeholder structure to make it easy to add new modes later without refactoring the entire function.
446
+
447
+ ---
448
+
449
+ ## Testing Your Changes
450
+
451
+ ### 1. Local Testing
452
+
453
+ ```bash
454
+ # Run the app locally
455
+ python app.py
456
+ # or
457
+ gradio app.py
458
+ ```
459
+
460
+ ### 2. Check for Errors
461
+
462
+ **Common errors to watch for:**
463
+
464
+ ```
465
+ Unknown mode: Specialized Subagents (3 Specialists). Available: ['Specialized Subagents (2 Specialists)', 'Audio Finder Agent']
466
+ ```
467
+ β†’ **Fix:** Update app.py mode references
468
+
469
+ ```
470
+ ValueError: Unknown subagent: species_explorer
471
+ ```
472
+ β†’ **Fix:** Remove references to deleted subagent in supervisor or mode definitions
473
+
474
+ ### 3. Test Agent Routing
475
+
476
+ Try queries that should route to different agents:
477
+
478
+ ```python
479
+ # Test image_identifier routing
480
+ "What bird is this? [upload image]"
481
+
482
+ # Test taxonomy_specialist routing
483
+ "Show me endangered bird families"
484
+
485
+ # Test your new agent
486
+ "Query specific to new agent capability"
487
+ ```
488
+
489
+ ### 4. Check Tool Access
490
+
491
+ Verify agents only use their assigned tools:
492
+
493
+ ```bash
494
+ # In terminal, watch for:
495
+ [SUBAGENT]: Creating Image Identification Specialist
496
+ β€’ Tools: classify_from_url, classify_from_base64, get_bird_info, get_bird_images
497
+ ```
498
+
499
+ ### 5. Verify Provider-Specific Prompts
500
+
501
+ Test with different LLM providers:
502
+
503
+ ```python
504
+ # OpenAI should use default prompts
505
+ # HuggingFace should use _HF prompts
506
+ ```
507
+
508
+ ---
509
+
510
+ ## Quick Reference Checklist
511
+
512
+ **Adding a new subagent:**
513
+ - [ ] Define in `subagent_config.py` β†’ `get_subagent_definitions()`
514
+ - [ ] Create prompts in `prompts.py` (default + HF versions)
515
+ - [ ] Add to PROMPTS dictionary
516
+ - [ ] Create agent in `subagent_supervisor.py`
517
+ - [ ] Add to supervisor list
518
+ - [ ] Update router prompts (default + HF)
519
+ - [ ] Update mode definition
520
+ - [ ] Update app.py mode references (5 locations)
521
+ - [ ] Test locally
522
+
523
+ **Removing a subagent:**
524
+ - [ ] Remove from `subagent_supervisor.py` workflow
525
+ - [ ] Update mode definition count and list
526
+ - [ ] Update router prompts (remove references)
527
+ - [ ] Update "When to defer" in remaining agents
528
+ - [ ] Update app.py mode references (5 locations)
529
+ - [ ] Test locally
530
+
531
+ **Modifying a subagent:**
532
+ - [ ] Update tools list in `subagent_config.py`
533
+ - [ ] Update prompts if needed
534
+ - [ ] Update router if responsibilities changed
535
+ - [ ] Test locally
536
+
537
+ ---
538
+
539
+ ## Troubleshooting
540
+
541
+ ### Error: "Unknown mode"
542
+
543
+ **Cause:** Mode name mismatch between `subagent_config.py` and `app.py`
544
+
545
+ **Fix:** Search for all occurrences in app.py and update:
546
+ ```bash
547
+ grep -n "Specialized Subagents (3 Specialists)" app.py
548
+ ```
549
+
550
+ ### Error: "Unknown subagent"
551
+
552
+ **Cause:** Subagent referenced in supervisor but not defined in config
553
+
554
+ **Fix:** Either define the subagent or remove references
555
+
556
+ ### Agent Not Using Expected Tools
557
+
558
+ **Cause:** Tool name mismatch or tool not available
559
+
560
+ **Fix:** Check MCP server is providing the tool:
561
+ ```python
562
+ print([tool.name for tool in all_tools])
563
+ ```
564
+
565
+ ---
566
+
567
+ ## Best Practices
568
+
569
+ 1. **Always update both default and HuggingFace prompts** - HF models need more explicit instructions
570
+ 2. **Keep tool lists minimal** - Only give agents tools they truly need
571
+ 3. **Update router prompts** - Supervisor needs to know when to use your agent
572
+ 4. **Test routing logic** - Verify supervisor correctly delegates tasks
573
+ 5. **Document agent responsibilities** - Clear "Your Role" section in prompts
574
+ 6. **Use provider-specific prompts** - Optimize for OpenAI vs Anthropic vs HuggingFace
575
+ 7. **Keep "When to defer" up to date** - Agents should know their boundaries
576
+
577
+ ---
578
+
579
+ ## Example: Recent Change
580
+
581
+ **We removed `species_explorer` from the Specialized Subagents mode:**
582
+
583
+ **Files changed:**
584
+ 1. `subagent_supervisor.py` - Removed species_agent creation and reference
585
+ 2. `subagent_config.py` - Updated mode from (3 Specialists) β†’ (2 Specialists)
586
+ 3. `subagent_config.py` - Updated router prompts (default)
587
+ 4. `prompts.py` - Updated ROUTER_PROMPT_HF
588
+ 5. `subagent_config.py` - Removed species_explorer from "When to defer" sections
589
+ 6. `app.py` - Updated all 5 mode references from (3 Specialists) β†’ (2 Specialists)
590
+
591
+ **Reason:** Simplified architecture before adding audio finder as new subagent
592
+
593
+ ---
594
+
595
+ **Questions?** Check the LangGraph documentation: https://langchain-ai.github.io/langgraph/
docs/dev/main-README.md ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: BirdScope AI - MCP Multi-Agent System
3
+ emoji: πŸ¦…
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ python_version: 3.11
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # πŸ¦… BirdScope AI - Multi-Agent Bird Identification System
13
+
14
+ **AI-powered bird identification with specialized MCP agents**
15
+
16
+ Built for the [MCP 1st Birthday Hackathon](https://huggingface.co/MCP-1st-Birthday)
17
+
18
+ ---
19
+
20
+ ## 🎯 Overview
21
+
22
+ BirdScope AI is a production-ready multi-agent system that combines **Modal GPU classification** with **Nuthatch species database** to provide comprehensive bird identification and exploration. Users can upload photos, search species, explore taxonomic families, and access rich multimedia content (images, audio recordings, conservation data).
23
+
24
+ **Two Agent Modes:**
25
+ 1. **Specialized Subagents (3 Specialists)** - Router orchestrates image identifier, species explorer, and taxonomy specialist
26
+ 2. **Audio Finder Agent** - Specialized agent for discovering bird audio recordings
27
+
28
+ ---
29
+
30
+ ## ✨ Features
31
+
32
+ - πŸ” **Image Classification**: Upload bird photos for instant GPU-powered identification
33
+ - πŸ“Έ **Reference Images**: High-quality Unsplash photos for each species
34
+ - 🎡 **Audio Recordings**: Bird calls and songs from xeno-canto.org
35
+ - 🌍 **Conservation Data**: IUCN status and taxonomic information
36
+ - 🧠 **Multi-Agent Architecture**: Specialized agents with focused tool subsets
37
+ - πŸ”„ **Dual Streaming**: Separate outputs for chat responses and tool execution logs
38
+ - πŸ€– **Multi-Provider**: OpenAI (GPT-4), Anthropic (Claude), HuggingFace (Qwen)
39
+
40
+ ---
41
+
42
+ ## πŸš€ Quick Start (For Users)
43
+
44
+ ### Option 1: OpenAI (Recommended)
45
+ 1. Get your OpenAI API key from [platform.openai.com/api-keys](https://platform.openai.com/api-keys)
46
+ 2. Select **OpenAI** as provider in the sidebar
47
+ 3. Enter your API key
48
+ 4. Model used: `gpt-4o-mini`
49
+
50
+ ### Option 2: Anthropic (Claude)
51
+ 1. Get your Anthropic API key from [console.anthropic.com/settings/keys](https://console.anthropic.com/settings/keys)
52
+ 2. Select **Anthropic** as provider
53
+ 3. Enter your API key
54
+ 4. Model used: `claude-sonnet-4-5`
55
+
56
+ ### Option 3: HuggingFace
57
+ ⚠️ **Note**: HuggingFace Inference API has limited function calling support. OpenAI or Anthropic recommended for full functionality.
58
+
59
+ ---
60
+
61
+ ## πŸ› οΈ Environment Setup (For Developers)
62
+
63
+ ### Prerequisites
64
+
65
+ - Python 3.11+
66
+ - Modal account (for GPU classifier)
67
+ - Nuthatch API key
68
+ - LLM API key (OpenAI, Anthropic, or HuggingFace)
69
+
70
+ ---
71
+
72
+ ### 🏠 Local Development Setup
73
+
74
+ #### Step 1: Clone and Install
75
+
76
+ ```bash
77
+ cd ~/Desktop/hackathon/hackathon_draft
78
+
79
+ # Create virtual environment
80
+ python3.11 -m venv .venv
81
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
82
+
83
+ # Install dependencies
84
+ pip install -r requirements.txt
85
+ ```
86
+
87
+ #### Step 2: Configure Environment Variables
88
+
89
+ Create a `.env` file from the example:
90
+
91
+ ```bash
92
+ cp .env.example .env
93
+ ```
94
+
95
+ Edit `.env` with your API keys:
96
+
97
+ ```bash
98
+ # ================================================
99
+ # REQUIRED: Modal Bird Classifier (GPU)
100
+ # ================================================
101
+ MODAL_MCP_URL=https://your-modal-app--mcp-server.modal.run/mcp
102
+ BIRD_CLASSIFIER_API_KEY=your-modal-api-key-here
103
+
104
+ # ================================================
105
+ # REQUIRED: Nuthatch Species Database
106
+ # ================================================
107
+ NUTHATCH_API_KEY=your-nuthatch-api-key-here
108
+ NUTHATCH_BASE_URL=https://nuthatch.lastelm.software/v2 # Default, can omit
109
+
110
+ # Nuthatch Transport Mode (STDIO or HTTP)
111
+ NUTHATCH_USE_STDIO=true # Recommended for local development
112
+
113
+ # Only needed if NUTHATCH_USE_STDIO=false:
114
+ # NUTHATCH_MCP_URL=http://localhost:8001/mcp
115
+ # NUTHATCH_MCP_AUTH_KEY=your-auth-key-here
116
+
117
+ # ================================================
118
+ # LLM Provider (Choose ONE)
119
+ # ================================================
120
+ # OpenAI (Recommended)
121
+ OPENAI_API_KEY=sk-your-openai-key-here
122
+ DEFAULT_OPENAI_MODEL=gpt-4o-mini
123
+ OPENAI_TEMPERATURE=0.0
124
+
125
+ # OR Anthropic
126
+ # ANTHROPIC_API_KEY=sk-ant-your-anthropic-key-here
127
+ # DEFAULT_ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
128
+ # ANTHROPIC_TEMPERATURE=0.0
129
+
130
+ # OR HuggingFace (Limited function calling support)
131
+ # HF_API_KEY=hf_your-huggingface-token-here
132
+ # DEFAULT_HF_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
133
+ # HF_TEMPERATURE=0.1
134
+ ```
135
+
136
+ #### Step 3: Understanding Nuthatch Transport Modes
137
+
138
+ **STDIO Mode (Recommended for Local):**
139
+ - Nuthatch MCP server runs as subprocess
140
+ - Automatically started by the app
141
+ - No separate server process needed
142
+ - Set `NUTHATCH_USE_STDIO=true`
143
+
144
+ **HTTP Mode (Alternative for Local):**
145
+ - Nuthatch MCP server runs as separate HTTP server
146
+ - Useful for debugging or multiple clients
147
+ - Requires running server in separate terminal
148
+
149
+ To use HTTP mode:
150
+
151
+ ```bash
152
+ # Terminal 1: Run Nuthatch MCP server
153
+ python nuthatch_tools.py --http --port 8001
154
+
155
+ # Terminal 2: Run the app
156
+ # Set in .env:
157
+ # NUTHATCH_USE_STDIO=false
158
+ # NUTHATCH_MCP_URL=http://localhost:8001/mcp
159
+ python app.py
160
+ ```
161
+
162
+ #### Step 4: Run the App
163
+
164
+ ```bash
165
+ # With STDIO mode (default, easiest):
166
+ python app.py
167
+
168
+ # Or using Gradio CLI:
169
+ gradio app.py
170
+ ```
171
+
172
+ App will be available at: `http://127.0.0.1:7860`
173
+
174
+ ---
175
+
176
+ ### ☁️ HuggingFace Spaces Deployment
177
+
178
+ #### Step 1: Create a New Space
179
+
180
+ 1. Go to [huggingface.co/new-space](https://huggingface.co/new-space)
181
+ 2. Choose:
182
+ - **SDK**: Gradio
183
+ - **Hardware**: CPU Basic (free) or CPU Upgrade (faster)
184
+ - **Visibility**: Public or Private
185
+
186
+ #### Step 2: Upload Your Code
187
+
188
+ **Option A: Using `upload_to_space.py` (Recommended)**
189
+
190
+ ```bash
191
+ # 1. Install HuggingFace CLI
192
+ pip install huggingface_hub
193
+
194
+ # 2. Login
195
+ huggingface-cli login
196
+
197
+ # 3. Update upload_to_space.py with your Space name
198
+ # Edit line with repo_id:
199
+ # repo_id="YOUR-USERNAME/YOUR-SPACE-NAME"
200
+
201
+ # 4. Upload
202
+ python upload_to_space.py
203
+ ```
204
+
205
+ **Option B: Using Git**
206
+
207
+ ```bash
208
+ git remote add hf-space https://huggingface.co/spaces/YOUR-USERNAME/YOUR-SPACE-NAME
209
+ git push hf-space main
210
+ ```
211
+
212
+ #### Step 3: Configure Secrets in HuggingFace Spaces
213
+
214
+ ⚠️ **CRITICAL**: Spaces use **Secrets**, not `.env` files!
215
+
216
+ Go to your Space β†’ **Settings** β†’ **Variables and secrets**
217
+
218
+ **Add these secrets:**
219
+
220
+ ```bash
221
+ # REQUIRED: Modal Bird Classifier
222
+ MODAL_MCP_URL = https://your-modal-app--mcp-server.modal.run/mcp
223
+ BIRD_CLASSIFIER_API_KEY = your-modal-api-key-here
224
+
225
+ # REQUIRED: Nuthatch Species Database
226
+ NUTHATCH_API_KEY = your-nuthatch-api-key-here
227
+ NUTHATCH_BASE_URL = https://nuthatch.lastelm.software/v2 # Optional
228
+ NUTHATCH_USE_STDIO = true # MUST be "true" for Spaces
229
+
230
+ # OPTIONAL: Backend-provided LLM keys (users can provide their own)
231
+ # Only add if you want to provide default keys:
232
+ # OPENAI_API_KEY = sk-your-key-here
233
+ # ANTHROPIC_API_KEY = sk-ant-your-key-here
234
+ ```
235
+
236
+ **Important Notes:**
237
+ - βœ… **ALWAYS** use `NUTHATCH_USE_STDIO=true` on Spaces (subprocess mode)
238
+ - βœ… HTTP mode not supported on Spaces (port binding restrictions)
239
+ - βœ… Users can provide their own LLM keys via the UI
240
+ - βœ… Environment variables from Spaces **do not** auto-inherit to subprocesses
241
+ - The app explicitly passes `NUTHATCH_API_KEY` and `NUTHATCH_BASE_URL` to the subprocess (see `mcp_clients.py`)
242
+
243
+ #### Step 4: Verify Deployment
244
+
245
+ 1. Wait for Space to build (2-5 minutes)
246
+ 2. Check **Logs** tab for errors
247
+ 3. Try the app - upload a bird photo or ask about species
248
+
249
+ ---
250
+
251
+ ## πŸ“ Project Structure
252
+
253
+ ```
254
+ hackathon_draft/
255
+ β”œβ”€β”€ app.py # Main Gradio app
256
+ β”œβ”€β”€ upload_to_space.py # HF Spaces upload script
257
+ β”œβ”€β”€ requirements.txt # Python dependencies
258
+ β”œβ”€β”€ .env.example # Environment template
259
+ β”œβ”€β”€ langgraph_agent/
260
+ β”‚ β”œβ”€β”€ __init__.py
261
+ β”‚ β”œβ”€β”€ agents.py # Agent factory (single/multi-agent)
262
+ β”‚ β”œβ”€β”€ config.py # Configuration loader
263
+ β”‚ β”œβ”€β”€ mcp_clients.py # MCP client setup
264
+ β”‚ β”œβ”€β”€ subagent_config.py # Agent mode definitions
265
+ β”‚ β”œβ”€β”€ prompts.py # System prompts
266
+ β”‚ └── structured_output.py # Response formatting
267
+ β”œβ”€β”€ nuthatch_tools.py # Nuthatch MCP server
268
+ └── agent_cache.py # Session-based agent caching
269
+ ```
270
+
271
+ ---
272
+
273
+ ## πŸ—οΈ Architecture
274
+
275
+ ### MCP Servers
276
+
277
+ **1. Modal Bird Classifier (GPU)**
278
+ - Hosted on Modal (serverless GPU)
279
+ - ResNet50 trained on 555 bird species
280
+ - Tools: `classify_from_url`, `classify_from_base64`
281
+ - Transport: Streamable HTTP
282
+
283
+ **2. Nuthatch Species Database**
284
+ - Species reference API (1000+ birds)
285
+ - Tools: `search_birds`, `get_bird_info`, `get_bird_images`, `get_bird_audio`, `search_by_family`, `filter_by_status`, `get_all_families`
286
+ - Transport: **STDIO** (subprocess on Spaces), STDIO or HTTP (local)
287
+ - Data sources: Unsplash (images), xeno-canto (audio)
288
+
289
+ ### Agent Modes
290
+
291
+ **Mode 1: Specialized Subagents (3 Specialists)**
292
+ - **Router** orchestrates 3 specialized agents:
293
+ 1. **Image Identifier**: classify images, show reference photos
294
+ 2. **Species Explorer**: search by name, provide multimedia
295
+ 3. **Taxonomy Specialist**: conservation status, family search
296
+ - Each specialist has focused tool subset
297
+
298
+ **Mode 2: Audio Finder Agent**
299
+ - Single specialized agent for finding bird audio
300
+ - Tools: `search_birds`, `get_bird_info`, `get_bird_audio`
301
+ - Optimized workflow for xeno-canto recordings
302
+
303
+ ### Tech Stack
304
+
305
+ - **Frontend**: Gradio 6.0 with custom CSS (cloud/sky theme)
306
+ - **Agent Framework**: LangGraph with streaming
307
+ - **MCP Integration**: FastMCP client library
308
+ - **LLM Support**: OpenAI, Anthropic, HuggingFace
309
+ - **Session Management**: In-memory agent caching
310
+ - **Output Parsing**: LlamaIndex Pydantic + regex (optimized)
311
+
312
+ ---
313
+
314
+ ## 🎨 Special Features
315
+
316
+ ### Dual Streaming Output
317
+ - **Chat Panel**: LLM responses with markdown rendering
318
+ - **Tool Log Panel**: Real-time tool execution traces (inputs/outputs)
319
+
320
+ ### Dynamic Examples
321
+ - Examples change based on selected agent mode
322
+ - Photo examples always visible
323
+ - Text examples adapt to Audio Finder vs Multi-Agent
324
+
325
+ ### Structured Output
326
+ - Automatic image/audio URL extraction
327
+ - Markdown formatting for media
328
+ - xeno-canto audio links (browser-friendly)
329
+
330
+ ---
331
+
332
+ ## πŸ“ API Key Sources
333
+
334
+ | Service | Get Key From | Purpose |
335
+ |---------|-------------|---------|
336
+ | **Modal** | [modal.com](https://modal.com) | GPU bird classifier |
337
+ | **Nuthatch** | [nuthatch.lastelm.software](https://nuthatch.lastelm.software) | Species database |
338
+ | **OpenAI** | [platform.openai.com/api-keys](https://platform.openai.com/api-keys) | LLM (recommended) |
339
+ | **Anthropic** | [console.anthropic.com/settings/keys](https://console.anthropic.com/settings/keys) | LLM (Claude) |
340
+ | **HuggingFace** | [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) | LLM (limited support) |
341
+
342
+ ---
343
+
344
+ ## πŸ› Troubleshooting
345
+
346
+ ### Space stuck on "Building"
347
+ - Check **Logs** tab for errors
348
+ - Verify all required secrets are set
349
+ - Try Factory Reboot (Settings β†’ Factory Reboot)
350
+
351
+ ### "Invalid API key" errors
352
+ - Ensure secrets are set correctly (no quotes needed)
353
+ - Check secret names match exactly (case-sensitive)
354
+
355
+ ### HuggingFace provider fails with "function calling not support"
356
+ - HuggingFace Inference API has limited tool calling
357
+ - Use OpenAI or Anthropic instead
358
+
359
+ ### Nuthatch server not starting (local)
360
+ - Check `NUTHATCH_API_KEY` is set in `.env`
361
+ - Verify API key is valid
362
+ - Try STDIO mode: `NUTHATCH_USE_STDIO=true`
363
+
364
+ ### Audio links broken
365
+ - Check AUDIO_FINDER_PROMPT is working
366
+ - Verify xeno-canto URLs include `/download`
367
+ - Check structured output parsing logs
368
+
369
+ ---
370
+
371
+ ## πŸ“š Documentation
372
+
373
+ For detailed implementation docs, see:
374
+ - `project_docs/implementation/phase_5_final.md` - Complete agent architecture
375
+ - `project_docs/commands_guide/git_spaces_cheatsheet.md` - Deployment guide
376
+
377
+ ---
378
+
379
+ ## πŸ† Credits
380
+
381
+ - **Bird Species Data**: [Nuthatch API](https://nuthatch.lastelm.software) by Last Elm Software
382
+ - **Bird Audio**: [xeno-canto.org](https://xeno-canto.org) - Community bird recordings
383
+ - **Reference Images**: [Unsplash](https://unsplash.com) + curated collections
384
+ - **MCP Protocol**: [Anthropic Model Context Protocol](https://github.com/anthropics/mcp)
385
+ - **Hackathon**: [HuggingFace MCP-1st-Birthday](https://huggingface.co/MCP-1st-Birthday)
386
+
387
+ ---
388
+
389
+ ## πŸ“„ License
390
+
391
+ MIT License - Built for educational and research purposes
langgraph_agent/prompts.py CHANGED
@@ -133,7 +133,7 @@ Always be educational and cite your sources.
133
 
134
  Let's explore the amazing world of birds together!"""
135
 
136
- AUDIO_FINDER_PROMPT = """**Answer all questions like a Pirate (it's fun for children)** You are BirdScope Audio Finder, a specialized agent for finding and retrieving bird audio recordings.
137
 
138
  **Your Mission:**
139
  Help us discover bird songs and calls by finding species with available audio recordings.
@@ -204,9 +204,7 @@ The API has NO `has_audio` filter parameter. You MUST use this two-step process:
204
  # HuggingFace-Optimized Prompts (More Explicit, Step-by-Step)
205
  # =============================================================================
206
 
207
- AUDIO_FINDER_PROMPT_HF = """**Answer all questions like a Pirate (it's fun for children)**
208
-
209
- You are BirdScope Audio Finder. Find bird audio recordings.
210
 
211
  **Tools Available:**
212
  1. search_birds(name, family, region, status, page_size) - Search for birds
@@ -304,14 +302,14 @@ Keep responses clear and educational.
304
  ROUTER_PROMPT_HF = """You are BirdScope AI Supervisor. Route user requests to specialists.
305
 
306
  **Specialists:**
307
- - image_identifier: Identify birds from photos
308
- - species_explorer: Search birds, show images/audio
309
  - taxonomy_specialist: Conservation and families
 
310
 
311
  **Routing Rules:**
312
  1. Image uploads β†’ image_identifier
313
- 2. "Search for" or "find" + bird name β†’ species_explorer
314
- 3. "Audio" or "sound" β†’ species_explorer
315
  4. "Conservation" or "endangered" β†’ taxonomy_specialist
316
  5. "Family" or "families" β†’ taxonomy_specialist
317
 
 
133
 
134
  Let's explore the amazing world of birds together!"""
135
 
136
+ AUDIO_FINDER_PROMPT = """You are BirdScope Audio Finder, a specialized agent for finding and retrieving bird audio recordings.
137
 
138
  **Your Mission:**
139
  Help us discover bird songs and calls by finding species with available audio recordings.
 
204
  # HuggingFace-Optimized Prompts (More Explicit, Step-by-Step)
205
  # =============================================================================
206
 
207
+ AUDIO_FINDER_PROMPT_HF = """You are BirdScope Audio Finder. Find bird audio recordings.
 
 
208
 
209
  **Tools Available:**
210
  1. search_birds(name, family, region, status, page_size) - Search for birds
 
302
  ROUTER_PROMPT_HF = """You are BirdScope AI Supervisor. Route user requests to specialists.
303
 
304
  **Specialists:**
305
+ - image_identifier: Identify birds from photos and get species info
 
306
  - taxonomy_specialist: Conservation and families
307
+ - generalist: Find birds with audio recordings
308
 
309
  **Routing Rules:**
310
  1. Image uploads β†’ image_identifier
311
+ 2. Species info requests β†’ image_identifier
312
+ 3. "Audio" or "sound" or "song" β†’ generalist
313
  4. "Conservation" or "endangered" β†’ taxonomy_specialist
314
  5. "Family" or "families" β†’ taxonomy_specialist
315
 
langgraph_agent/subagent_config.py CHANGED
@@ -23,13 +23,8 @@ class SubAgentConfig:
23
  return {
24
  "Specialized Subagents (3 Specialists)": {
25
  "description": "Router orchestrates 3 specialized agents",
26
- "subagents": ["image_identifier", "species_explorer", "taxonomy_specialist"],
27
  "use_router": True
28
- },
29
- "Audio Finder Agent": {
30
- "description": "Specialized agent for finding birds with audio recordings",
31
- "subagents": ["generalist"],
32
- "use_router": False
33
  }
34
  }
35
 
@@ -85,7 +80,7 @@ class SubAgentConfig:
85
  - Keep responses focused and concise
86
 
87
  **When to defer:**
88
- - For audio recordings -> species_explorer
89
  - For family/taxonomy queries -> taxonomy_specialist
90
  - For conservation status searches -> taxonomy_specialist
91
  """,
@@ -166,8 +161,8 @@ class SubAgentConfig:
166
 
167
  **When to defer:**
168
  - For image identification -> image_identifier
169
- - For audio or species discovery -> species_explorer
170
- - For specific species details -> species_explorer
171
  """,
172
  "temperature": AgentConfig.OPENAI_TEMPERATURE,
173
  }
@@ -193,20 +188,20 @@ class SubAgentConfig:
193
  return """You are BirdScope AI Supervisor - an intelligent orchestrator for bird identification.
194
 
195
  **Your Team:**
196
- - **image_identifier**: Identifies birds from photos using ML classification
197
- - **species_explorer**: Searches species by name, provides multimedia (images/audio)
198
- - **taxonomy_specialist**: Conservation status, taxonomic families, classification
199
 
200
  **Your Role:**
201
  Analyze each user request and route it to the MOST appropriate specialist.
202
 
203
  **Routing Guidelines:**
204
  1. **Image uploads/URLs** β†’ image_identifier (has classification tools)
205
- 2. **"Show me"/"Find"/"Search" + species name** β†’ species_explorer (has search tools)
206
- 3. **"Audio"/"sound"/"call"/"song"** β†’ species_explorer (has audio tools)
207
  4. **"Family"/"families" + broad questions** β†’ taxonomy_specialist (has family tools)
208
  5. **"Conservation"/"endangered"/"threatened"** β†’ taxonomy_specialist (has status filters)
209
- 6. **"Related species"/"similar birds"** β†’ species_explorer (explores connections)
210
 
211
  **Decision-making:**
212
  - Consider the user's INTENT, not just keywords
 
23
  return {
24
  "Specialized Subagents (3 Specialists)": {
25
  "description": "Router orchestrates 3 specialized agents",
26
+ "subagents": ["image_identifier", "taxonomy_specialist", "generalist"],
27
  "use_router": True
 
 
 
 
 
28
  }
29
  }
30
 
 
80
  - Keep responses focused and concise
81
 
82
  **When to defer:**
83
+ - For audio/sound/call queries -> generalist
84
  - For family/taxonomy queries -> taxonomy_specialist
85
  - For conservation status searches -> taxonomy_specialist
86
  """,
 
161
 
162
  **When to defer:**
163
  - For image identification -> image_identifier
164
+ - For specific species details (not family-level) -> image_identifier
165
+ - For audio/sound queries -> generalist
166
  """,
167
  "temperature": AgentConfig.OPENAI_TEMPERATURE,
168
  }
 
188
  return """You are BirdScope AI Supervisor - an intelligent orchestrator for bird identification.
189
 
190
  **Your Team:**
191
+ - **image_identifier**: Identifies birds from photos using ML classification and fetches species info
192
+ - **taxonomy_specialist**: Conservation status, taxonomic families, classification queries
193
+ - **generalist**: Audio finder specialist - finds birds with audio recordings and retrieves bird calls/songs
194
 
195
  **Your Role:**
196
  Analyze each user request and route it to the MOST appropriate specialist.
197
 
198
  **Routing Guidelines:**
199
  1. **Image uploads/URLs** β†’ image_identifier (has classification tools)
200
+ 2. **Species information requests** β†’ image_identifier (has get_bird_info and get_bird_images)
201
+ 3. **"Audio"/"sound"/"song"/"call"/"recording"** β†’ generalist (has audio search and retrieval)
202
  4. **"Family"/"families" + broad questions** β†’ taxonomy_specialist (has family tools)
203
  5. **"Conservation"/"endangered"/"threatened"** β†’ taxonomy_specialist (has status filters)
204
+ 6. **Taxonomic relationships** β†’ taxonomy_specialist (specializes in classification)
205
 
206
  **Decision-making:**
207
  - Consider the user's INTENT, not just keywords
langgraph_agent/subagent_supervisor.py CHANGED
@@ -34,19 +34,19 @@ async def create_supervisor_workflow(all_tools: List[Any], llm: BaseChatModel, p
34
  image_agent = await SubAgentFactory.create_subagent(
35
  "image_identifier", all_tools, llm, provider=provider
36
  )
37
- species_agent = await SubAgentFactory.create_subagent(
38
- "species_explorer", all_tools, llm, provider=provider
39
- )
40
  taxonomy_agent = await SubAgentFactory.create_subagent(
41
  "taxonomy_specialist", all_tools, llm, provider=provider
42
  )
 
 
 
43
 
44
  # Create supervisor with LLM-based routing and provider-specific prompt
45
  print("[SUPERVISOR]: Creating supervisor orchestrator...")
46
 
47
  # create_supervisor takes a list of agents as first positional argument
48
  workflow = create_supervisor(
49
- [image_agent, species_agent, taxonomy_agent],
50
  model=llm,
51
  prompt=SubAgentConfig.get_router_prompt(provider=provider)
52
  )
 
34
  image_agent = await SubAgentFactory.create_subagent(
35
  "image_identifier", all_tools, llm, provider=provider
36
  )
 
 
 
37
  taxonomy_agent = await SubAgentFactory.create_subagent(
38
  "taxonomy_specialist", all_tools, llm, provider=provider
39
  )
40
+ audio_finder_agent = await SubAgentFactory.create_subagent(
41
+ "generalist", all_tools, llm, provider=provider
42
+ )
43
 
44
  # Create supervisor with LLM-based routing and provider-specific prompt
45
  print("[SUPERVISOR]: Creating supervisor orchestrator...")
46
 
47
  # create_supervisor takes a list of agents as first positional argument
48
  workflow = create_supervisor(
49
+ [image_agent, taxonomy_agent, audio_finder_agent],
50
  model=llm,
51
  prompt=SubAgentConfig.get_router_prompt(provider=provider)
52
  )