Spaces:

MCP-1st-Birthday
/

BirdScopeAI

Paused

App Files Files Community

facemelter commited on Nov 30, 2025

Commit

128f5d1

verified ·

1 Parent(s): 6c62235

Added audio_finder subagent to Specialist supervisor

Browse files

Files changed (6) hide show

app.py +13 -15
docs/dev/agents_config-README.md +595 -0
docs/dev/main-README.md +391 -0
langgraph_agent/prompts.py +6 -8
langgraph_agent/subagent_config.py +10 -15
langgraph_agent/subagent_supervisor.py +4 -4

app.py CHANGED Viewed

@@ -31,7 +31,9 @@ PHOTO_EXAMPLES = [
 MULTI_AGENT_TEXT_EXAMPLES = [
     "Tell me about Northern Cardinals - show me images and audio",
     "What birds are in the Cardinalidae family?",
-    "Show me species with endangered status"
 ]
 # Text-only examples for Audio Finder Agent mode
@@ -1026,14 +1028,15 @@ def update_text_examples_for_mode(mode):
     """Return appropriate text example dataset based on agent mode."""
     print(f"[DEBUG] Updating text examples for mode: {mode}")
-    if mode == "Audio Finder Agent":
-        # Audio text examples
-        samples = [[text] for text in AUDIO_FINDER_TEXT_EXAMPLES]
-        print(f"[DEBUG] Audio Finder text samples: {len(samples)} examples")
-    else:  # Specialized Subagents (3 Specialists)
-        # Multi-agent text examples
-        samples = [[text] for text in MULTI_AGENT_TEXT_EXAMPLES]
-        print(f"[DEBUG] Multi-agent text samples: {len(samples)} examples")
     return gr.Dataset(samples=samples)
@@ -1194,7 +1197,6 @@ with gr.Blocks() as demo:
                     <div style="display: flex; align-items: baseline; gap: 0.5rem;">
                         <h1>BirdScope</h1>
                         <span class="header-ai-text">AI</span>
-                        <span class="header-v2-badge">v2</span>
                     </div>
                     <p class="header-subtitle">AI-powered bird identification & species reference</p>
                 </div>
@@ -1486,8 +1488,7 @@ with gr.Blocks() as demo:
                     gr.Markdown("Choose between unified agent or specialized routing")
                     agent_mode = gr.Dropdown(
                         choices=[
-                            "Specialized Subagents (3 Specialists)",
-                            "Audio Finder Agent"  # Changed from "Single Agent (All Tools)"
                         ],
                         value="Specialized Subagents (3 Specialists)",
                         show_label=False,
@@ -1592,12 +1593,9 @@ with gr.Blocks() as demo:
                     - Audio recordings (xeno-canto)
                     - Conservation status data
                     - Taxonomic exploration
-                    **v2 Features:**
                     - Separate tool log panel
                     - Detailed execution tracking
                     - Tool input/output inspection
-                    - Perfect for debugging!
                     """)
     # State for tool log

 MULTI_AGENT_TEXT_EXAMPLES = [
     "Tell me about Northern Cardinals - show me images and audio",
     "What birds are in the Cardinalidae family?",
+    "Show me species with endangered status",
+    "Find me audio recordings for Snow Goose",
+    "Get me bird call samples for any two species"
 ]
 # Text-only examples for Audio Finder Agent mode
     """Return appropriate text example dataset based on agent mode."""
     print(f"[DEBUG] Updating text examples for mode: {mode}")
+    # Placeholder for future mode-specific examples
+    # if mode == "Future Mode Name":
+    #     samples = [[text] for text in FUTURE_MODE_EXAMPLES]
+    #     print(f"[DEBUG] Future mode text samples: {len(samples)} examples")
+    # else:
+    # Default: Specialized Subagents (3 Specialists) - includes image ID, taxonomy, and audio finder
+    samples = [[text] for text in MULTI_AGENT_TEXT_EXAMPLES]
+    print(f"[DEBUG] Multi-agent text samples: {len(samples)} examples")
     return gr.Dataset(samples=samples)
                     <div style="display: flex; align-items: baseline; gap: 0.5rem;">
                         <h1>BirdScope</h1>
                         <span class="header-ai-text">AI</span>
                     </div>
                     <p class="header-subtitle">AI-powered bird identification & species reference</p>
                 </div>
                     gr.Markdown("Choose between unified agent or specialized routing")
                     agent_mode = gr.Dropdown(
                         choices=[
+                            "Specialized Subagents (3 Specialists)"
                         ],
                         value="Specialized Subagents (3 Specialists)",
                         show_label=False,
                     - Audio recordings (xeno-canto)
                     - Conservation status data
                     - Taxonomic exploration
                     - Separate tool log panel
                     - Detailed execution tracking
                     - Tool input/output inspection
                     """)
     # State for tool log

docs/dev/agents_config-README.md ADDED Viewed

	@@ -0,0 +1,595 @@

+# Agent Configuration Guide
+**Complete guide for adding, removing, and modifying agents/subagents in BirdScope AI**
+---
+## 📋 Table of Contents
+1. [Architecture Overview](#architecture-overview)
+2. [Key Files Reference](#key-files-reference)
+3. [Adding a New Subagent](#adding-a-new-subagent)
+4. [Removing a Subagent](#removing-a-subagent)
+5. [Modifying Existing Subagents](#modifying-existing-subagents)
+6. [App.py Integration Points](#apppy-integration-points)
+7. [Testing Your Changes](#testing-your-changes)
+---
+## Architecture Overview
+BirdScope AI uses a **LangGraph supervisor pattern** with specialized subagents:
+```
+User Request
+    ↓
+Supervisor (Router)
+    ↓
+┌───────────────┬──────────────────┬─────────────────┐
+│ image_identifier │ taxonomy_specialist │ (other agents) │
+└───────────────┴──────────────────┴─────────────────┘
+```
+**Key Concepts:**
+- **Supervisor**: LLM-based router that delegates tasks to specialists
+- **Subagents**: Specialized agents with filtered tool access and focused prompts
+- **Modes**: Different agent configurations (e.g., "Specialized Subagents (2 Specialists)", "Audio Finder Agent")
+- **Tool Filtering**: Each subagent only has access to relevant tools
+---
+## Key Files Reference
+### Core Agent Files
+| File | Purpose | What to Change |
+|------|---------|----------------|
+| `langgraph_agent/subagent_supervisor.py` | Creates supervisor workflow | Add/remove agents from supervisor list |
+| `langgraph_agent/subagent_config.py` | Defines subagent configurations | Add/remove/modify subagent definitions and modes |
+| `langgraph_agent/subagent_factory.py` | Builds subagent instances | (Usually no changes needed) |
+| `langgraph_agent/prompts.py` | System prompts for agents | Add provider-specific prompts |
+### UI Integration
+| File | Purpose | What to Change |
+|------|---------|----------------|
+| `app.py` | Gradio UI and agent orchestration | Update mode dropdown, default values, examples |
+---
+## Adding a New Subagent
+### Step 1: Define Subagent Configuration
+**File:** `langgraph_agent/subagent_config.py`
+Add your subagent to `get_subagent_definitions()`:
+```python
+@staticmethod
+def get_subagent_definitions(provider: str = "openai") -> Dict[str, Dict]:
+    return {
+        # ... existing agents ...
+        "my_new_agent": {
+            "name": "My New Specialist",
+            "description": "Expert at specific bird-related tasks",
+            "tools": [
+                "tool_name_1",
+                "tool_name_2",
+                "tool_name_3"
+            ],
+            "prompt": get_prompt("my_new_agent", provider) or """You are a My New Specialist.
+**Your Role:**
+1. Primary responsibility
+2. Secondary responsibility
+3. When to use specific tools
+**Response Style:**
+- How to format responses
+- What to emphasize
+**When to defer:**
+- Task type 1 -> other_agent_name
+- Task type 2 -> another_agent_name
+""",
+            "temperature": AgentConfig.OPENAI_TEMPERATURE,
+        }
+    }
+```
+### Step 2: Create System Prompts
+**File:** `langgraph_agent/prompts.py`
+Add prompts for your new agent:
+```python
+# Default prompt (used by OpenAI/Anthropic)
+MY_NEW_AGENT_PROMPT = """Detailed prompt for your agent..."""
+# HuggingFace-optimized prompt (more explicit, step-by-step)
+MY_NEW_AGENT_PROMPT_HF = """Simplified, step-by-step prompt..."""
+# Add to PROMPTS dictionary
+PROMPTS = {
+    # ... existing prompts ...
+    "my_new_agent": {
+        "default": MY_NEW_AGENT_PROMPT,
+        "huggingface": MY_NEW_AGENT_PROMPT_HF,
+    },
+}
+```
+### Step 3: Add to Supervisor Workflow
+**File:** `langgraph_agent/subagent_supervisor.py`
+```python
+async def create_supervisor_workflow(all_tools, llm, provider="openai"):
+    # Create existing agents
+    image_agent = await SubAgentFactory.create_subagent(
+        "image_identifier", all_tools, llm, provider=provider
+    )
+    # ... other agents ...
+    # Add your new agent
+    my_new_agent = await SubAgentFactory.create_subagent(
+        "my_new_agent", all_tools, llm, provider=provider
+    )
+    # Add to supervisor list
+    workflow = create_supervisor(
+        [image_agent, taxonomy_agent, my_new_agent],  # Add here
+        model=llm,
+        prompt=SubAgentConfig.get_router_prompt(provider=provider)
+    )
+```
+### Step 4: Update Router Prompts
+**File:** `langgraph_agent/subagent_config.py`
+Update `get_router_prompt()`:
+```python
+return """You are BirdScope AI Supervisor...
+**Your Team:**
+- **image_identifier**: Identifies birds from photos...
+- **taxonomy_specialist**: Conservation status, families...
+- **my_new_agent**: Specific tasks for my new agent  # Add this
+**Routing Guidelines:**
+1. **Image uploads/URLs** → image_identifier
+2. **Conservation queries** → taxonomy_specialist
+3. **New task type** → my_new_agent  # Add this
+```
+Also update `prompts.py` for HuggingFace router:
+```python
+ROUTER_PROMPT_HF = """...
+**Specialists:**
+- image_identifier: ...
+- taxonomy_specialist: ...
+- my_new_agent: New task handling  # Add this
+**Routing Rules:**
+...
+6. "New task keyword" → my_new_agent  # Add this
+"""
+```
+### Step 5: Update Mode Definition
+**File:** `langgraph_agent/subagent_config.py`
+Update `get_mode_definitions()`:
+```python
+return {
+    "Specialized Subagents (3 Specialists)": {  # Update count
+        "description": "Router orchestrates 3 specialized agents",
+        "subagents": ["image_identifier", "taxonomy_specialist", "my_new_agent"],  # Add here
+        "use_router": True
+    },
+}
+```
+### Step 6: Integrate with app.py
+See [App.py Integration Points](#apppy-integration-points) below.
+---
+## Removing a Subagent
+**Example: Removing `species_explorer` from the supervisor**
+### Step 1: Remove from Supervisor Workflow
+**File:** `langgraph_agent/subagent_supervisor.py`
+```python
+async def create_supervisor_workflow(all_tools, llm, provider="openai"):
+    # Remove agent creation
+    # species_agent = await SubAgentFactory.create_subagent(...)  # DELETE
+    # Remove from supervisor list
+    workflow = create_supervisor(
+        [image_agent, taxonomy_agent],  # Remove species_agent
+        model=llm,
+        prompt=SubAgentConfig.get_router_prompt(provider=provider)
+    )
+```
+### Step 2: Update Mode Definition
+**File:** `langgraph_agent/subagent_config.py`
+```python
+return {
+    "Specialized Subagents (2 Specialists)": {  # Update count
+        "description": "Router orchestrates 2 specialized agents",
+        "subagents": ["image_identifier", "taxonomy_specialist"],  # Remove agent
+        "use_router": True
+    },
+}
+```
+### Step 3: Update Router Prompts
+**File:** `langgraph_agent/subagent_config.py` (default router)
+```python
+return """You are BirdScope AI Supervisor...
+**Your Team:**
+- **image_identifier**: ...
+- **taxonomy_specialist**: ...
+# Remove species_explorer reference
+**Routing Guidelines:**
+# Remove routing rules for deleted agent
+# Reassign its responsibilities to other agents
+```
+**File:** `langgraph_agent/prompts.py` (HuggingFace router)
+```python
+ROUTER_PROMPT_HF = """...
+**Specialists:**
+- image_identifier: ...
+- taxonomy_specialist: ...
+# Remove deleted agent
+**Routing Rules:**
+# Remove routing rules
+# Reassign to remaining agents
+"""
+```
+### Step 4: Update "When to defer" Sections
+**File:** `langgraph_agent/subagent_config.py`
+Update remaining subagents' prompts:
+```python
+"image_identifier": {
+    # ...
+    "prompt": """...
+    **When to defer:**
+    - For family/taxonomy queries -> taxonomy_specialist
+    # Remove references to deleted agent
+    """,
+}
+```
+### Step 5: Update app.py References
+See [App.py Integration Points](#apppy-integration-points) below.
+---
+## Modifying Existing Subagents
+### Changing Tool Access
+**File:** `langgraph_agent/subagent_config.py`
+```python
+"image_identifier": {
+    "tools": [
+        "classify_from_url",
+        "classify_from_base64",
+        "get_bird_info",
+        "new_tool_name"  # Add new tool
+    ],
+}
+```
+### Updating Prompts
+**File:** `langgraph_agent/subagent_config.py` or `langgraph_agent/prompts.py`
+```python
+# For inline prompts (in subagent_config.py)
+"image_identifier": {
+    "prompt": get_prompt("image_identifier", provider) or """Updated prompt..."""
+}
+# For dedicated prompts (in prompts.py)
+IMAGE_IDENTIFIER_PROMPT = """Updated comprehensive prompt..."""
+```
+### Changing Temperature
+**File:** `langgraph_agent/subagent_config.py`
+```python
+"species_explorer": {
+    "temperature": 0.2,  # More creative (was 0.1)
+}
+```
+---
+## App.py Integration Points
+**When you change agent modes, you MUST update these sections in app.py:**
+### 1. Mode Dropdown Choices
+**Location:** `app.py` ~line 1486-1491
+```python
+agent_mode = gr.Dropdown(
+    choices=[
+        "Specialized Subagents (2 Specialists)",  # Update mode name here
+        "Audio Finder Agent"
+    ],
+    value="Specialized Subagents (2 Specialists)",  # Update default here
+    show_label=False,
+    container=False
+)
+```
+### 2. Initial Session Status HTML
+**Location:** `app.py` ~line 1560
+```python
+session_status = gr.HTML(
+    value=create_config_html(
+        provider_choice="OpenAI",
+        agent_mode_choice="Specialized Subagents (2 Specialists)",  # Update here
+        hf_key_input="",
+        openai_key_input="",
+        anthropic_key_input=""
+    )
+)
+```
+### 3. Health Check Config HTML
+**Location:** `app.py` ~line 1654
+```python
+config_html = create_config_html(
+    provider_choice=provider_str,
+    agent_mode_choice="Specialized Subagents (2 Specialists)",  # Update here
+    hf_key_input=hf_key_value,
+    openai_key_input=openai_key_input,
+    anthropic_key_input=anthropic_key_input
+)
+```
+### 4. Example Loading Logic Comments
+**Location:** `app.py` ~line 1033
+```python
+else:  # Specialized Subagents (2 Specialists)  # Update comment
+    samples = [[text] for text in MULTI_AGENT_TEXT_EXAMPLES]
+```
+### 5. (Optional) Add Mode-Specific Examples
+**Location:** `app.py` ~line 30-40 (add new example list)
+```python
+# Text-only examples for Specialized Subagents mode
+MULTI_AGENT_TEXT_EXAMPLES = [
+    "Tell me about Northern Cardinals - show me images and audio",
+    "What birds are in the Cardinalidae family?",
+    "Show me species with endangered status",
+    "Find me audio recordings for Snow Goose",
+    "Get me bird call samples for any two species"
+]
+# Add examples for your new agent mode
+MY_NEW_AGENT_EXAMPLES = [
+    "Example query 1 for new mode",
+    "Example query 2 for new mode",
+    "Example query 3 for new mode"
+]
+```
+**Location:** `app.py` ~line 1027-1041 (update conditional logic)
+The function includes a **placeholder for future modes**. Uncomment and customize:
+```python
+def update_text_examples_for_mode(mode):
+    """Return appropriate text example dataset based on agent mode."""
+    print(f"[DEBUG] Updating text examples for mode: {mode}")
+    # Placeholder for future mode-specific examples
+    if mode == "My New Agent Mode":  # UNCOMMENT and update mode name
+        samples = [[text] for text in MY_NEW_AGENT_EXAMPLES]
+        print(f"[DEBUG] New mode text samples: {len(samples)} examples")
+    # elif mode == "Another Mode":  # Add more modes as needed
+    #     samples = [[text] for text in ANOTHER_MODE_EXAMPLES]
+    else:  # Default: Specialized Subagents
+    # Default: Specialized Subagents (3 Specialists)
+    samples = [[text] for text in MULTI_AGENT_TEXT_EXAMPLES]
+    print(f"[DEBUG] Multi-agent text samples: {len(samples)} examples")
+    return gr.Dataset(samples=samples)
+```
+**Why keep the conditional?** Even with only one mode, we maintain the placeholder structure to make it easy to add new modes later without refactoring the entire function.
+---
+## Testing Your Changes
+### 1. Local Testing
+```bash
+# Run the app locally
+python app.py
+# or
+gradio app.py
+```
+### 2. Check for Errors
+**Common errors to watch for:**
+```
+Unknown mode: Specialized Subagents (3 Specialists). Available: ['Specialized Subagents (2 Specialists)', 'Audio Finder Agent']
+```
+→ **Fix:** Update app.py mode references
+```
+ValueError: Unknown subagent: species_explorer
+```
+→ **Fix:** Remove references to deleted subagent in supervisor or mode definitions
+### 3. Test Agent Routing
+Try queries that should route to different agents:
+```python
+# Test image_identifier routing
+"What bird is this? [upload image]"
+# Test taxonomy_specialist routing
+"Show me endangered bird families"
+# Test your new agent
+"Query specific to new agent capability"
+```
+### 4. Check Tool Access
+Verify agents only use their assigned tools:
+```bash
+# In terminal, watch for:
+[SUBAGENT]: Creating Image Identification Specialist
+  • Tools: classify_from_url, classify_from_base64, get_bird_info, get_bird_images
+```
+### 5. Verify Provider-Specific Prompts
+Test with different LLM providers:
+```python
+# OpenAI should use default prompts
+# HuggingFace should use _HF prompts
+```
+---
+## Quick Reference Checklist
+**Adding a new subagent:**
+- [ ] Define in `subagent_config.py` → `get_subagent_definitions()`
+- [ ] Create prompts in `prompts.py` (default + HF versions)
+- [ ] Add to PROMPTS dictionary
+- [ ] Create agent in `subagent_supervisor.py`
+- [ ] Add to supervisor list
+- [ ] Update router prompts (default + HF)
+- [ ] Update mode definition
+- [ ] Update app.py mode references (5 locations)
+- [ ] Test locally
+**Removing a subagent:**
+- [ ] Remove from `subagent_supervisor.py` workflow
+- [ ] Update mode definition count and list
+- [ ] Update router prompts (remove references)
+- [ ] Update "When to defer" in remaining agents
+- [ ] Update app.py mode references (5 locations)
+- [ ] Test locally
+**Modifying a subagent:**
+- [ ] Update tools list in `subagent_config.py`
+- [ ] Update prompts if needed
+- [ ] Update router if responsibilities changed
+- [ ] Test locally
+---
+## Troubleshooting
+### Error: "Unknown mode"
+**Cause:** Mode name mismatch between `subagent_config.py` and `app.py`
+**Fix:** Search for all occurrences in app.py and update:
+```bash
+grep -n "Specialized Subagents (3 Specialists)" app.py
+```
+### Error: "Unknown subagent"
+**Cause:** Subagent referenced in supervisor but not defined in config
+**Fix:** Either define the subagent or remove references
+### Agent Not Using Expected Tools
+**Cause:** Tool name mismatch or tool not available
+**Fix:** Check MCP server is providing the tool:
+```python
+print([tool.name for tool in all_tools])
+```
+---
+## Best Practices
+1. **Always update both default and HuggingFace prompts** - HF models need more explicit instructions
+2. **Keep tool lists minimal** - Only give agents tools they truly need
+3. **Update router prompts** - Supervisor needs to know when to use your agent
+4. **Test routing logic** - Verify supervisor correctly delegates tasks
+5. **Document agent responsibilities** - Clear "Your Role" section in prompts
+6. **Use provider-specific prompts** - Optimize for OpenAI vs Anthropic vs HuggingFace
+7. **Keep "When to defer" up to date** - Agents should know their boundaries
+---
+## Example: Recent Change
+**We removed `species_explorer` from the Specialized Subagents mode:**
+**Files changed:**
+1. `subagent_supervisor.py` - Removed species_agent creation and reference
+2. `subagent_config.py` - Updated mode from (3 Specialists) → (2 Specialists)
+3. `subagent_config.py` - Updated router prompts (default)
+4. `prompts.py` - Updated ROUTER_PROMPT_HF
+5. `subagent_config.py` - Removed species_explorer from "When to defer" sections
+6. `app.py` - Updated all 5 mode references from (3 Specialists) → (2 Specialists)
+**Reason:** Simplified architecture before adding audio finder as new subagent
+---
+**Questions?** Check the LangGraph documentation: https://langchain-ai.github.io/langgraph/

docs/dev/main-README.md ADDED Viewed

	@@ -0,0 +1,391 @@

+---
+title: BirdScope AI - MCP Multi-Agent System
+emoji: 🦅
+colorFrom: green
+colorTo: blue
+sdk: gradio
+python_version: 3.11
+app_file: app.py
+pinned: false
+---
+# 🦅 BirdScope AI - Multi-Agent Bird Identification System
+**AI-powered bird identification with specialized MCP agents**
+Built for the [MCP 1st Birthday Hackathon](https://huggingface.co/MCP-1st-Birthday)
+---
+## 🎯 Overview
+BirdScope AI is a production-ready multi-agent system that combines **Modal GPU classification** with **Nuthatch species database** to provide comprehensive bird identification and exploration. Users can upload photos, search species, explore taxonomic families, and access rich multimedia content (images, audio recordings, conservation data).
+**Two Agent Modes:**
+1. **Specialized Subagents (3 Specialists)** - Router orchestrates image identifier, species explorer, and taxonomy specialist
+2. **Audio Finder Agent** - Specialized agent for discovering bird audio recordings
+---
+## ✨ Features
+- 🔍 **Image Classification**: Upload bird photos for instant GPU-powered identification
+- 📸 **Reference Images**: High-quality Unsplash photos for each species
+- 🎵 **Audio Recordings**: Bird calls and songs from xeno-canto.org
+- 🌍 **Conservation Data**: IUCN status and taxonomic information
+- 🧠 **Multi-Agent Architecture**: Specialized agents with focused tool subsets
+- 🔄 **Dual Streaming**: Separate outputs for chat responses and tool execution logs
+- 🤖 **Multi-Provider**: OpenAI (GPT-4), Anthropic (Claude), HuggingFace (Qwen)
+---
+## 🚀 Quick Start (For Users)
+### Option 1: OpenAI (Recommended)
+1. Get your OpenAI API key from [platform.openai.com/api-keys](https://platform.openai.com/api-keys)
+2. Select **OpenAI** as provider in the sidebar
+3. Enter your API key
+4. Model used: `gpt-4o-mini`
+### Option 2: Anthropic (Claude)
+1. Get your Anthropic API key from [console.anthropic.com/settings/keys](https://console.anthropic.com/settings/keys)
+2. Select **Anthropic** as provider
+3. Enter your API key
+4. Model used: `claude-sonnet-4-5`
+### Option 3: HuggingFace
+⚠️ **Note**: HuggingFace Inference API has limited function calling support. OpenAI or Anthropic recommended for full functionality.
+---
+## 🛠️ Environment Setup (For Developers)
+### Prerequisites
+- Python 3.11+
+- Modal account (for GPU classifier)
+- Nuthatch API key
+- LLM API key (OpenAI, Anthropic, or HuggingFace)
+---
+### 🏠 Local Development Setup
+#### Step 1: Clone and Install
+```bash
+cd ~/Desktop/hackathon/hackathon_draft
+# Create virtual environment
+python3.11 -m venv .venv
+source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+# Install dependencies
+pip install -r requirements.txt
+```
+#### Step 2: Configure Environment Variables
+Create a `.env` file from the example:
+```bash
+cp .env.example .env
+```
+Edit `.env` with your API keys:
+```bash
+# ================================================
+# REQUIRED: Modal Bird Classifier (GPU)
+# ================================================
+MODAL_MCP_URL=https://your-modal-app--mcp-server.modal.run/mcp
+BIRD_CLASSIFIER_API_KEY=your-modal-api-key-here
+# ================================================
+# REQUIRED: Nuthatch Species Database
+# ================================================
+NUTHATCH_API_KEY=your-nuthatch-api-key-here
+NUTHATCH_BASE_URL=https://nuthatch.lastelm.software/v2  # Default, can omit
+# Nuthatch Transport Mode (STDIO or HTTP)
+NUTHATCH_USE_STDIO=true  # Recommended for local development
+# Only needed if NUTHATCH_USE_STDIO=false:
+# NUTHATCH_MCP_URL=http://localhost:8001/mcp
+# NUTHATCH_MCP_AUTH_KEY=your-auth-key-here
+# ================================================
+# LLM Provider (Choose ONE)
+# ================================================
+# OpenAI (Recommended)
+OPENAI_API_KEY=sk-your-openai-key-here
+DEFAULT_OPENAI_MODEL=gpt-4o-mini
+OPENAI_TEMPERATURE=0.0
+# OR Anthropic
+# ANTHROPIC_API_KEY=sk-ant-your-anthropic-key-here
+# DEFAULT_ANTHROPIC_MODEL=claude-sonnet-4-5-20250929
+# ANTHROPIC_TEMPERATURE=0.0
+# OR HuggingFace (Limited function calling support)
+# HF_API_KEY=hf_your-huggingface-token-here
+# DEFAULT_HF_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
+# HF_TEMPERATURE=0.1
+```
+#### Step 3: Understanding Nuthatch Transport Modes
+**STDIO Mode (Recommended for Local):**
+- Nuthatch MCP server runs as subprocess
+- Automatically started by the app
+- No separate server process needed
+- Set `NUTHATCH_USE_STDIO=true`
+**HTTP Mode (Alternative for Local):**
+- Nuthatch MCP server runs as separate HTTP server
+- Useful for debugging or multiple clients
+- Requires running server in separate terminal
+To use HTTP mode:
+```bash
+# Terminal 1: Run Nuthatch MCP server
+python nuthatch_tools.py --http --port 8001
+# Terminal 2: Run the app
+# Set in .env:
+# NUTHATCH_USE_STDIO=false
+# NUTHATCH_MCP_URL=http://localhost:8001/mcp
+python app.py
+```
+#### Step 4: Run the App
+```bash
+# With STDIO mode (default, easiest):
+python app.py
+# Or using Gradio CLI:
+gradio app.py
+```
+App will be available at: `http://127.0.0.1:7860`
+---
+### ☁️ HuggingFace Spaces Deployment
+#### Step 1: Create a New Space
+1. Go to [huggingface.co/new-space](https://huggingface.co/new-space)
+2. Choose:
+   - **SDK**: Gradio
+   - **Hardware**: CPU Basic (free) or CPU Upgrade (faster)
+   - **Visibility**: Public or Private
+#### Step 2: Upload Your Code
+**Option A: Using `upload_to_space.py` (Recommended)**
+```bash
+# 1. Install HuggingFace CLI
+pip install huggingface_hub
+# 2. Login
+huggingface-cli login
+# 3. Update upload_to_space.py with your Space name
+# Edit line with repo_id:
+# repo_id="YOUR-USERNAME/YOUR-SPACE-NAME"
+# 4. Upload
+python upload_to_space.py
+```
+**Option B: Using Git**
+```bash
+git remote add hf-space https://huggingface.co/spaces/YOUR-USERNAME/YOUR-SPACE-NAME
+git push hf-space main
+```
+#### Step 3: Configure Secrets in HuggingFace Spaces
+⚠️ **CRITICAL**: Spaces use **Secrets**, not `.env` files!
+Go to your Space → **Settings** → **Variables and secrets**
+**Add these secrets:**
+```bash
+# REQUIRED: Modal Bird Classifier
+MODAL_MCP_URL = https://your-modal-app--mcp-server.modal.run/mcp
+BIRD_CLASSIFIER_API_KEY = your-modal-api-key-here
+# REQUIRED: Nuthatch Species Database
+NUTHATCH_API_KEY = your-nuthatch-api-key-here
+NUTHATCH_BASE_URL = https://nuthatch.lastelm.software/v2  # Optional
+NUTHATCH_USE_STDIO = true  # MUST be "true" for Spaces
+# OPTIONAL: Backend-provided LLM keys (users can provide their own)
+# Only add if you want to provide default keys:
+# OPENAI_API_KEY = sk-your-key-here
+# ANTHROPIC_API_KEY = sk-ant-your-key-here
+```
+**Important Notes:**
+- ✅ **ALWAYS** use `NUTHATCH_USE_STDIO=true` on Spaces (subprocess mode)
+- ✅ HTTP mode not supported on Spaces (port binding restrictions)
+- ✅ Users can provide their own LLM keys via the UI
+- ✅ Environment variables from Spaces **do not** auto-inherit to subprocesses
+  - The app explicitly passes `NUTHATCH_API_KEY` and `NUTHATCH_BASE_URL` to the subprocess (see `mcp_clients.py`)
+#### Step 4: Verify Deployment
+1. Wait for Space to build (2-5 minutes)
+2. Check **Logs** tab for errors
+3. Try the app - upload a bird photo or ask about species
+---
+## 📁 Project Structure
+```
+hackathon_draft/
+├── app.py                      # Main Gradio app
+├── upload_to_space.py          # HF Spaces upload script
+├── requirements.txt            # Python dependencies
+├── .env.example                # Environment template
+├── langgraph_agent/
+│   ├── __init__.py
+│   ├── agents.py               # Agent factory (single/multi-agent)
+│   ├── config.py               # Configuration loader
+│   ├── mcp_clients.py          # MCP client setup
+│   ├── subagent_config.py      # Agent mode definitions
+│   ├── prompts.py              # System prompts
+│   └── structured_output.py    # Response formatting
+├── nuthatch_tools.py           # Nuthatch MCP server
+└── agent_cache.py              # Session-based agent caching
+```
+---
+## 🏗️ Architecture
+### MCP Servers
+**1. Modal Bird Classifier (GPU)**
+- Hosted on Modal (serverless GPU)
+- ResNet50 trained on 555 bird species
+- Tools: `classify_from_url`, `classify_from_base64`
+- Transport: Streamable HTTP
+**2. Nuthatch Species Database**
+- Species reference API (1000+ birds)
+- Tools: `search_birds`, `get_bird_info`, `get_bird_images`, `get_bird_audio`, `search_by_family`, `filter_by_status`, `get_all_families`
+- Transport: **STDIO** (subprocess on Spaces), STDIO or HTTP (local)
+- Data sources: Unsplash (images), xeno-canto (audio)
+### Agent Modes
+**Mode 1: Specialized Subagents (3 Specialists)**
+- **Router** orchestrates 3 specialized agents:
+  1. **Image Identifier**: classify images, show reference photos
+  2. **Species Explorer**: search by name, provide multimedia
+  3. **Taxonomy Specialist**: conservation status, family search
+- Each specialist has focused tool subset
+**Mode 2: Audio Finder Agent**
+- Single specialized agent for finding bird audio
+- Tools: `search_birds`, `get_bird_info`, `get_bird_audio`
+- Optimized workflow for xeno-canto recordings
+### Tech Stack
+- **Frontend**: Gradio 6.0 with custom CSS (cloud/sky theme)
+- **Agent Framework**: LangGraph with streaming
+- **MCP Integration**: FastMCP client library
+- **LLM Support**: OpenAI, Anthropic, HuggingFace
+- **Session Management**: In-memory agent caching
+- **Output Parsing**: LlamaIndex Pydantic + regex (optimized)
+---
+## 🎨 Special Features
+### Dual Streaming Output
+- **Chat Panel**: LLM responses with markdown rendering
+- **Tool Log Panel**: Real-time tool execution traces (inputs/outputs)
+### Dynamic Examples
+- Examples change based on selected agent mode
+- Photo examples always visible
+- Text examples adapt to Audio Finder vs Multi-Agent
+### Structured Output
+- Automatic image/audio URL extraction
+- Markdown formatting for media
+- xeno-canto audio links (browser-friendly)
+---
+## 📝 API Key Sources
+| Service | Get Key From | Purpose |
+|---------|-------------|---------|
+| **Modal** | [modal.com](https://modal.com) | GPU bird classifier |
+| **Nuthatch** | [nuthatch.lastelm.software](https://nuthatch.lastelm.software) | Species database |
+| **OpenAI** | [platform.openai.com/api-keys](https://platform.openai.com/api-keys) | LLM (recommended) |
+| **Anthropic** | [console.anthropic.com/settings/keys](https://console.anthropic.com/settings/keys) | LLM (Claude) |
+| **HuggingFace** | [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) | LLM (limited support) |
+---
+## 🐛 Troubleshooting
+### Space stuck on "Building"
+- Check **Logs** tab for errors
+- Verify all required secrets are set
+- Try Factory Reboot (Settings → Factory Reboot)
+### "Invalid API key" errors
+- Ensure secrets are set correctly (no quotes needed)
+- Check secret names match exactly (case-sensitive)
+### HuggingFace provider fails with "function calling not support"
+- HuggingFace Inference API has limited tool calling
+- Use OpenAI or Anthropic instead
+### Nuthatch server not starting (local)
+- Check `NUTHATCH_API_KEY` is set in `.env`
+- Verify API key is valid
+- Try STDIO mode: `NUTHATCH_USE_STDIO=true`
+### Audio links broken
+- Check AUDIO_FINDER_PROMPT is working
+- Verify xeno-canto URLs include `/download`
+- Check structured output parsing logs
+---
+## 📚 Documentation
+For detailed implementation docs, see:
+- `project_docs/implementation/phase_5_final.md` - Complete agent architecture
+- `project_docs/commands_guide/git_spaces_cheatsheet.md` - Deployment guide
+---
+## 🏆 Credits
+- **Bird Species Data**: [Nuthatch API](https://nuthatch.lastelm.software) by Last Elm Software
+- **Bird Audio**: [xeno-canto.org](https://xeno-canto.org) - Community bird recordings
+- **Reference Images**: [Unsplash](https://unsplash.com) + curated collections
+- **MCP Protocol**: [Anthropic Model Context Protocol](https://github.com/anthropics/mcp)
+- **Hackathon**: [HuggingFace MCP-1st-Birthday](https://huggingface.co/MCP-1st-Birthday)
+---
+## 📄 License
+MIT License - Built for educational and research purposes

langgraph_agent/prompts.py CHANGED Viewed

@@ -133,7 +133,7 @@ Always be educational and cite your sources.
 Let's explore the amazing world of birds together!"""
-AUDIO_FINDER_PROMPT = """**Answer all questions like a Pirate (it's fun for children)** You are BirdScope Audio Finder, a specialized agent for finding and retrieving bird audio recordings.
 **Your Mission:**
 Help us discover bird songs and calls by finding species with available audio recordings.
@@ -204,9 +204,7 @@ The API has NO `has_audio` filter parameter. You MUST use this two-step process:
 # HuggingFace-Optimized Prompts (More Explicit, Step-by-Step)
 # =============================================================================
-AUDIO_FINDER_PROMPT_HF = """**Answer all questions like a Pirate (it's fun for children)**
-You are BirdScope Audio Finder. Find bird audio recordings.
 **Tools Available:**
 1. search_birds(name, family, region, status, page_size) - Search for birds
@@ -304,14 +302,14 @@ Keep responses clear and educational.
 ROUTER_PROMPT_HF = """You are BirdScope AI Supervisor. Route user requests to specialists.
 **Specialists:**
-- image_identifier: Identify birds from photos
-- species_explorer: Search birds, show images/audio
 - taxonomy_specialist: Conservation and families
 **Routing Rules:**
 1. Image uploads → image_identifier
-2. "Search for" or "find" + bird name → species_explorer
-3. "Audio" or "sound" → species_explorer
 4. "Conservation" or "endangered" → taxonomy_specialist
 5. "Family" or "families" → taxonomy_specialist

 Let's explore the amazing world of birds together!"""
+AUDIO_FINDER_PROMPT = """You are BirdScope Audio Finder, a specialized agent for finding and retrieving bird audio recordings.
 **Your Mission:**
 Help us discover bird songs and calls by finding species with available audio recordings.
 # HuggingFace-Optimized Prompts (More Explicit, Step-by-Step)
 # =============================================================================
+AUDIO_FINDER_PROMPT_HF = """You are BirdScope Audio Finder. Find bird audio recordings.
 **Tools Available:**
 1. search_birds(name, family, region, status, page_size) - Search for birds
 ROUTER_PROMPT_HF = """You are BirdScope AI Supervisor. Route user requests to specialists.
 **Specialists:**
+- image_identifier: Identify birds from photos and get species info
 - taxonomy_specialist: Conservation and families
+- generalist: Find birds with audio recordings
 **Routing Rules:**
 1. Image uploads → image_identifier
+2. Species info requests → image_identifier
+3. "Audio" or "sound" or "song" → generalist
 4. "Conservation" or "endangered" → taxonomy_specialist
 5. "Family" or "families" → taxonomy_specialist

langgraph_agent/subagent_config.py CHANGED Viewed

@@ -23,13 +23,8 @@ class SubAgentConfig:
         return {
             "Specialized Subagents (3 Specialists)": {
                 "description": "Router orchestrates 3 specialized agents",
-                "subagents": ["image_identifier", "species_explorer", "taxonomy_specialist"],
                 "use_router": True
-            },
-            "Audio Finder Agent": {
-                "description": "Specialized agent for finding birds with audio recordings",
-                "subagents": ["generalist"],
-                "use_router": False
             }
         }
@@ -85,7 +80,7 @@ class SubAgentConfig:
 - Keep responses focused and concise
 **When to defer:**
-- For audio recordings -> species_explorer
 - For family/taxonomy queries -> taxonomy_specialist
 - For conservation status searches -> taxonomy_specialist
 """,
@@ -166,8 +161,8 @@ class SubAgentConfig:
 **When to defer:**
 - For image identification -> image_identifier
-- For audio or species discovery -> species_explorer
-- For specific species details -> species_explorer
 """,
                 "temperature": AgentConfig.OPENAI_TEMPERATURE,
             }
@@ -193,20 +188,20 @@ class SubAgentConfig:
         return """You are BirdScope AI Supervisor - an intelligent orchestrator for bird identification.
 **Your Team:**
-- **image_identifier**: Identifies birds from photos using ML classification
-- **species_explorer**: Searches species by name, provides multimedia (images/audio)
-- **taxonomy_specialist**: Conservation status, taxonomic families, classification
 **Your Role:**
 Analyze each user request and route it to the MOST appropriate specialist.
 **Routing Guidelines:**
 1. **Image uploads/URLs** → image_identifier (has classification tools)
-2. **"Show me"/"Find"/"Search" + species name** → species_explorer (has search tools)
-3. **"Audio"/"sound"/"call"/"song"** → species_explorer (has audio tools)
 4. **"Family"/"families" + broad questions** → taxonomy_specialist (has family tools)
 5. **"Conservation"/"endangered"/"threatened"** → taxonomy_specialist (has status filters)
-6. **"Related species"/"similar birds"** → species_explorer (explores connections)
 **Decision-making:**
 - Consider the user's INTENT, not just keywords

         return {
             "Specialized Subagents (3 Specialists)": {
                 "description": "Router orchestrates 3 specialized agents",
+                "subagents": ["image_identifier", "taxonomy_specialist", "generalist"],
                 "use_router": True
             }
         }
 - Keep responses focused and concise
 **When to defer:**
+- For audio/sound/call queries -> generalist
 - For family/taxonomy queries -> taxonomy_specialist
 - For conservation status searches -> taxonomy_specialist
 """,
 **When to defer:**
 - For image identification -> image_identifier
+- For specific species details (not family-level) -> image_identifier
+- For audio/sound queries -> generalist
 """,
                 "temperature": AgentConfig.OPENAI_TEMPERATURE,
             }
         return """You are BirdScope AI Supervisor - an intelligent orchestrator for bird identification.
 **Your Team:**
+- **image_identifier**: Identifies birds from photos using ML classification and fetches species info
+- **taxonomy_specialist**: Conservation status, taxonomic families, classification queries
+- **generalist**: Audio finder specialist - finds birds with audio recordings and retrieves bird calls/songs
 **Your Role:**
 Analyze each user request and route it to the MOST appropriate specialist.
 **Routing Guidelines:**
 1. **Image uploads/URLs** → image_identifier (has classification tools)
+2. **Species information requests** → image_identifier (has get_bird_info and get_bird_images)
+3. **"Audio"/"sound"/"song"/"call"/"recording"** → generalist (has audio search and retrieval)
 4. **"Family"/"families" + broad questions** → taxonomy_specialist (has family tools)
 5. **"Conservation"/"endangered"/"threatened"** → taxonomy_specialist (has status filters)
+6. **Taxonomic relationships** → taxonomy_specialist (specializes in classification)
 **Decision-making:**
 - Consider the user's INTENT, not just keywords

langgraph_agent/subagent_supervisor.py CHANGED Viewed

@@ -34,19 +34,19 @@ async def create_supervisor_workflow(all_tools: List[Any], llm: BaseChatModel, p
     image_agent = await SubAgentFactory.create_subagent(
         "image_identifier", all_tools, llm, provider=provider
     )
-    species_agent = await SubAgentFactory.create_subagent(
-        "species_explorer", all_tools, llm, provider=provider
-    )
     taxonomy_agent = await SubAgentFactory.create_subagent(
         "taxonomy_specialist", all_tools, llm, provider=provider
     )
     # Create supervisor with LLM-based routing and provider-specific prompt
     print("[SUPERVISOR]: Creating supervisor orchestrator...")
     # create_supervisor takes a list of agents as first positional argument
     workflow = create_supervisor(
-        [image_agent, species_agent, taxonomy_agent],
         model=llm,
         prompt=SubAgentConfig.get_router_prompt(provider=provider)
     )

     image_agent = await SubAgentFactory.create_subagent(
         "image_identifier", all_tools, llm, provider=provider
     )
     taxonomy_agent = await SubAgentFactory.create_subagent(
         "taxonomy_specialist", all_tools, llm, provider=provider
     )
+    audio_finder_agent = await SubAgentFactory.create_subagent(
+        "generalist", all_tools, llm, provider=provider
+    )
     # Create supervisor with LLM-based routing and provider-specific prompt
     print("[SUPERVISOR]: Creating supervisor orchestrator...")
     # create_supervisor takes a list of agents as first positional argument
     workflow = create_supervisor(
+        [image_agent, taxonomy_agent, audio_finder_agent],
         model=llm,
         prompt=SubAgentConfig.get_router_prompt(provider=provider)
     )