BirdScopeAI / app.py
facemelter's picture
Finalizing README.md
5194d41 verified
import gradio as gr
import base64
import json
import os
from pathlib import Path
from langgraph_agent import AgentFactory
from langgraph_agent.config import AgentConfig
from langgraph_agent.subagent_config import SubAgentConfig
from langgraph_agent.prompts import BIRDSCOPE_AI_PROMPT, NUTHATCH_BIRDSCOPE_PROMPT
from fastmcp.client import Client
from fastmcp.client.transports import StreamableHttpTransport
from agent_cache import get_or_create_agent
from langgraph_agent.structured_output import parse_agent_response
# Load environment variables from .env file
from dotenv import load_dotenv
load_dotenv()
# ============================================================================
# EXAMPLE SETS FOR DIFFERENT AGENT MODES
# ============================================================================
# Shared photo examples - always shown for both modes
PHOTO_EXAMPLES = [
{"text": "What bird is this?", "files": ["examples/bird_example_1.jpg"]},
{"text": "Can you identify this bird?", "files": ["examples/bird_example_2.jpg"]},
{"text": "Identify this bird and show me similar species", "files": ["examples/bird_example_5.jpg"]},
{"text": "", "files": ["examples/bird_example_6.jpg"]}
]
# Text-only examples for Specialized Subagents mode
MULTI_AGENT_TEXT_EXAMPLES = [
"Tell me about Northern Cardinals - show me images and audio",
"What birds are in the Cardinalidae family?",
"Find me audio recordings for Snow Goose",
"Get me bird call samples for any two species"
]
# Text-only examples for Audio Finder Agent mode
AUDIO_FINDER_TEXT_EXAMPLES = [
"Find me audio for any bird",
"Get audio recordings for Snow Goose",
"Find me any two audio samples of bird calls",
"Show me audio recordings of Common Goldeneye"
]
# ============================================================================
# CUSTOM CSS WITH CLOUD/SKY AESTHETIC
# ============================================================================
custom_css = """
/* ========================================================================
GLOBAL STYLES - SKY/CLOUD AESTHETIC
======================================================================== */
/* Unified cloud/sky background across entire page */
body, html {
background: linear-gradient(180deg, #E0F4FF 0%, #B0E2FF 40%, #87CEEB 100%) !important;
min-height: 100vh !important;
}
.gradio-container {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif !important;
background:
/* Cloud formations - concentrated at TOP, fading down */
radial-gradient(ellipse 1200px 400px at 20% 0%, rgba(255, 255, 255, 0.6), transparent 70%),
radial-gradient(ellipse 1000px 350px at 80% 3%, rgba(255, 255, 255, 0.5), transparent 70%),
radial-gradient(ellipse 900px 300px at 50% 5%, rgba(255, 255, 255, 0.55), transparent 70%),
radial-gradient(ellipse 800px 250px at 10% 8%, rgba(255, 255, 255, 0.45), transparent 70%),
radial-gradient(ellipse 700px 200px at 90% 10%, rgba(255, 255, 255, 0.4), transparent 70%),
radial-gradient(ellipse 600px 180px at 40% 12%, rgba(255, 255, 255, 0.35), transparent 70%),
radial-gradient(ellipse 500px 150px at 60% 15%, rgba(255, 255, 255, 0.3), transparent 70%),
/* Base sky gradient - REVERSED: lighter at top, deeper blue at bottom */
linear-gradient(180deg, #E0F4FF 0%, #B0E2FF 40%, #87CEEB 100%) !important;
}
/* ========================================================================
SIDEBAR STYLING - DARK THEME
======================================================================== */
.sidebar {
background: #1f2937 !important;
padding: 24px 20px !important;
border-radius: 12px !important;
border: 1px solid #374151 !important;
}
/* Hide Gradio's default loading indicator in sidebar (we use badge for loading state) */
.sidebar .loading,
.sidebar .wrap.pending,
.sidebar .progress-bar,
.sidebar [class*="loading"],
.sidebar [class*="progress"] {
display: none !important;
}
/* Also hide the loading indicator that appears as a child of the sidebar */
.gradio-container .sidebar ~ * .loading,
.gradio-container .sidebar ~ * .progress-bar {
display: none !important;
}
/* Hide Gradio's global top progress bar (the blue horizontal line) */
.app > div > div > .progress-level-inner,
body > gradio-app > div > div > div.progress-level-inner,
[class*="progress-level"],
.progress-level-inner {
display: none !important;
visibility: hidden !important;
}
/* Make all sidebar text light for dark background */
.sidebar h1,
.sidebar h2,
.sidebar h3,
.sidebar h4,
.sidebar h5,
.sidebar h6 {
color: #f9fafb !important;
}
.sidebar p,
.sidebar span,
.sidebar label {
color: #d1d5db !important;
}
/* Keep links distinguishable */
.sidebar a {
color: #818cf8 !important;
text-decoration: underline !important;
}
.sidebar a:hover {
color: #a5b4fc !important;
}
/* API Key sections */
.hf-section, .openai-section, .anthropic-section {
margin-top: 12px !important;
}
/* Dark theme input styling */
.sidebar input[type="password"],
.sidebar input[type="text"],
.sidebar textarea {
border: 1px solid #374151 !important;
border-radius: 8px !important;
padding: 10px 14px !important;
font-size: 14px !important;
font-family: 'SF Mono', 'Monaco', 'Inconsolata', monospace !important;
background: #111827 !important;
color: #f9fafb !important;
transition: all 0.2s ease !important;
}
.sidebar input[type="password"]::placeholder,
.sidebar input[type="text"]::placeholder,
.sidebar textarea::placeholder {
color: #6b7280 !important;
}
.sidebar input[type="password"]:focus,
.sidebar input[type="text"]:focus,
.sidebar textarea:focus {
border-color: #818cf8 !important;
box-shadow: 0 0 0 2px rgba(129, 140, 248, 0.2) !important;
outline: none !important;
background: #1f2937 !important;
}
/* ========================================================================
CHATBOT & TOOL LOG PANELS
======================================================================== */
.chatbot-container {
border-radius: 12px !important;
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.08) !important;
border: 1px solid #e5e7eb !important;
}
/* Force icon SVG elements to use light colors for visibility on dark background */
.chatbot-container svg,
.chatbot-container svg path,
.chatbot-container svg circle,
.chatbot-container svg rect {
fill: #d1d5db !important;
stroke: #d1d5db !important;
}
.tool-log-panel textarea {
background: #1f2937 !important;
border-radius: 12px !important;
padding: 20px !important;
border: 1px solid #374151 !important;
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.08) !important;
font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Consolas', monospace !important;
font-size: 13px !important;
line-height: 1.6 !important;
color: #d1d5db !important;
resize: none !important;
height: 500px !important;
min-height: 500px !important;
max-height: 500px !important;
overflow-y: auto !important;
}
/* Ensure tool log panel container aligns perfectly */
.tool-log-panel {
margin: 0 !important;
padding: 0 !important;
}
.tool-log-panel textarea::-webkit-scrollbar {
width: 8px !important;
}
.tool-log-panel textarea::-webkit-scrollbar-track {
background: #111827 !important;
border-radius: 4px !important;
}
.tool-log-panel textarea::-webkit-scrollbar-thumb {
background: #4b5563 !important;
border-radius: 4px !important;
}
.tool-log-panel textarea::-webkit-scrollbar-thumb:hover {
background: #6b7280 !important;
}
hr {
border: none !important;
border-top: 1px solid #374151 !important;
margin: 20px 0 !important;
}
.sidebar hr {
border-top-color: #374151 !important;
}
/* ========================================================================
TEXT ON LIGHT BACKGROUND - MAKE DARK FOR READABILITY
======================================================================== */
/* All text elements outside dark panels should be dark for readability */
.gradio-container label:not(.sidebar label):not(.tool-log-panel label):not(.chatbot-container label),
.gradio-container span:not(.sidebar span):not(.tool-log-panel span):not(.chatbot-container span):not(.birdscope-header span),
.gradio-container p:not(.sidebar p):not(.tool-log-panel p):not(.chatbot-container p):not(.birdscope-header p),
.gradio-container div:not(.sidebar div):not(.tool-log-panel div):not(.chatbot-container div):not(.birdscope-header div) {
color: #1a1a1a !important;
}
/* Markdown text outside dark panels */
.gradio-container .markdown:not(.sidebar .markdown):not(.tool-log-panel .markdown) {
color: #1a1a1a !important;
}
/* Markdown headings - ensure all are black on light background (except sidebar) */
.gradio-container .markdown:not(.sidebar .markdown) h1,
.gradio-container .markdown:not(.sidebar .markdown) h2,
.gradio-container .markdown:not(.sidebar .markdown) h3,
.gradio-container .markdown:not(.sidebar .markdown) h4,
.gradio-container .markdown:not(.sidebar .markdown) h5,
.gradio-container .markdown:not(.sidebar .markdown) h6 {
color: #1a1a1a !important;
}
/* Regular buttons (not primary) should have dark text */
button:not([variant="primary"]) {
color: #1a1a1a !important;
}
/* BUT sidebar buttons should have light text (override above) */
.sidebar button:not([variant="primary"]),
.sidebar button:not([variant="primary"]) span,
.sidebar button:not([variant="primary"]) * {
color: #f9fafb !important;
}
/* Modal check button with logo */
.modal-check-btn {
background: rgba(59, 130, 246, 0.1) !important;
border: 1px solid rgba(59, 130, 246, 0.3) !important;
border-radius: 9999px !important;
transition: all 0.2s ease !important;
cursor: pointer !important;
}
.modal-check-btn:hover {
background: rgba(59, 130, 246, 0.2) !important;
border-color: rgba(59, 130, 246, 0.5) !important;
transform: translateY(-1px);
box-shadow: 0 2px 8px rgba(59, 130, 246, 0.3) !important;
}
.modal-check-btn:active {
transform: translateY(0);
}
.modal-check-btn::before {
content: "";
display: inline-block;
width: 18px;
height: 18px;
margin-right: 8px;
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100' fill='none'%3E%3C!-- Left ribbon --%3E%3Cpath d='M0 30 L25 15 L50 30 L50 70 L25 85 L0 70 Z' fill='%2335D07F'/%3E%3Cpath d='M25 15 L50 30 L25 45 L0 30 Z' fill='%2388E5A8'/%3E%3Cpath d='M25 45 L50 30 L50 70 L25 85 Z' fill='%2315B866'/%3E%3C!-- Right ribbon --%3E%3Cpath d='M50 30 L75 15 L100 30 L100 70 L75 85 L50 70 Z' fill='%2335D07F'/%3E%3Cpath d='M75 15 L100 30 L75 45 L50 30 Z' fill='%2388E5A8'/%3E%3Cpath d='M75 45 L100 30 L100 70 L75 85 Z' fill='%2315B866'/%3E%3C/svg%3E");
background-size: contain;
background-repeat: no-repeat;
background-position: center;
vertical-align: middle;
}
/* ========================================================================
EXAMPLES - BLACK TEXT FOR READABILITY
======================================================================== */
/* Examples label - force black text with very high specificity */
label.svelte-1gfkn6j,
.label,
span.svelte-1gfkn6j {
color: #1a1a1a !important;
}
/* Target example buttons specifically, excluding footer */
.gradio-container button:not([variant="primary"]):not(.sidebar button):not(footer button):not([class*="footer"] button) {
color: #1a1a1a !important;
}
/* Footer text should be black on light background */
footer,
footer *,
footer a,
[class*="footer"],
[class*="footer"] *,
[class*="footer"] a {
color: #1a1a1a !important;
}
/* ========================================================================
ENHANCED HEADER - BIRDSCOPE BRANDING
======================================================================== */
@import url('https://fonts.googleapis.com/css2?family=Quicksand:wght@500;700&display=swap');
.birdscope-header {
position: relative;
overflow: hidden;
padding: 2rem 1.5rem;
}
/* Decorative cloud elements */
.cloud-decor-1 {
position: absolute;
top: -2.5rem;
right: 2.5rem;
width: 10rem;
height: 10rem;
background: rgba(255, 255, 255, 0.4);
border-radius: 50%;
filter: blur(60px);
pointer-events: none;
}
.cloud-decor-2 {
position: absolute;
top: 0;
right: 33%;
width: 8rem;
height: 8rem;
background: rgba(224, 242, 254, 0.5);
border-radius: 50%;
filter: blur(40px);
pointer-events: none;
}
.cloud-decor-3 {
position: absolute;
top: -1.25rem;
left: 5rem;
width: 6rem;
height: 6rem;
background: rgba(255, 255, 255, 0.3);
border-radius: 50%;
filter: blur(40px);
pointer-events: none;
}
/* Flying birds animation */
@keyframes drift {
0%, 100% { transform: translateX(0) translateY(0); }
50% { transform: translateX(10px) translateY(-5px); }
}
@keyframes fadeIn {
from { opacity: 0; transform: translateY(5px); }
to { opacity: 1; transform: translateY(0); }
}
.bird-silhouette {
position: absolute;
animation: drift 8s ease-in-out infinite;
}
.bird-1 { top: 1.5rem; right: 8rem; width: 1.25rem; height: 1.25rem; color: rgba(148, 163, 184, 0.3); }
.bird-2 { top: 2.5rem; right: 12rem; width: 1rem; height: 1rem; color: rgba(148, 163, 184, 0.2); animation-delay: 1s; }
.bird-3 { top: 1rem; right: 16rem; width: 0.75rem; height: 0.75rem; color: rgba(148, 163, 184, 0.15); animation-delay: 2s; }
/* Logo container */
.bird-logo-wrapper {
position: relative;
display: inline-block;
}
.bird-logo-glow {
position: absolute;
inset: 0;
background: linear-gradient(135deg, #38bdf8 0%, #3b82f6 100%);
border-radius: 1rem;
filter: blur(8px);
opacity: 0.3;
transition: opacity 0.3s;
}
.bird-logo-wrapper:hover .bird-logo-glow {
opacity: 0.5;
}
.bird-logo {
position: relative;
background: linear-gradient(135deg, #38bdf8 0%, #3b82f6 100%);
padding: 0.75rem;
border-radius: 1rem;
box-shadow: 0 10px 25px rgba(56, 189, 248, 0.2);
}
/* Header content */
.header-content {
position: relative;
z-index: 10;
max-width: 72rem;
margin: 0 auto;
}
.header-top {
display: flex;
align-items: center;
gap: 1rem;
}
.header-title-group h1 {
font-family: 'Quicksand', 'Nunito', sans-serif !important;
font-size: 1.875rem !important;
font-weight: 700 !important;
color: #1e293b !important;
letter-spacing: -0.025em !important;
margin: 0 !important;
display: inline !important;
}
.header-ai-text {
font-size: 1.5rem;
font-weight: 300;
color: #0ea5e9;
margin-left: 0.5rem;
}
.header-v2-badge {
display: inline-block;
padding: 0.125rem 0.5rem;
font-size: 0.75rem;
font-weight: 600;
background: linear-gradient(to right, #fbbf24, #f97316);
color: white;
border-radius: 9999px;
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.1);
margin-left: 0.5rem;
}
.header-subtitle {
color: #64748b !important;
font-size: 0.875rem !important;
margin-top: 0.125rem !important;
}
.mcp-badge {
display: inline-flex;
align-items: center;
gap: 0.5rem;
padding: 0.375rem 0.75rem;
background: rgba(255, 255, 255, 0.6);
backdrop-filter: blur(8px);
border: 1px solid #e2e8f0;
border-radius: 6px;
font-size: 0.75rem;
color: #1a1a1a !important;
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
margin-left: auto;
user-select: none;
}
.mcp-badge span {
color: #1a1a1a !important;
}
.mcp-badge.checking {
animation: badgePulse 1.5s ease-in-out infinite !important;
background: rgba(251, 191, 36, 0.15) !important;
border-color: #fbbf24 !important;
}
/* White text while checking */
.mcp-badge.checking span {
color: #ffffff !important;
}
/* Disable hover effects while checking */
.mcp-badge.checking:hover {
transform: none !important;
animation: badgePulse 1.5s ease-in-out infinite !important;
}
.mcp-badge.checking .mcp-pulse {
background: #fbbf24;
animation: pulse 2s cubic-bezier(0.4, 0, 0.6, 1) infinite;
}
.mcp-badge.offline .mcp-pulse {
background: #ef4444;
animation: none;
}
.mcp-badge.online .mcp-pulse {
background: #34d399;
}
.mcp-pulse {
width: 0.5rem;
height: 0.5rem;
background: #34d399;
border-radius: 50%;
animation: pulse 2s cubic-bezier(0.4, 0, 0.6, 1) infinite;
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.5; }
}
@keyframes badgePulse {
0%, 100% {
opacity: 1;
transform: scale(1);
}
50% {
opacity: 0.8;
transform: scale(1.08);
}
}
/* Feature tags */
.feature-tags {
margin-top: 1.25rem;
display: flex;
flex-wrap: wrap;
gap: 0.5rem;
}
.feature-tag {
display: inline-flex;
align-items: center;
gap: 0.5rem;
padding: 0.375rem 0.75rem;
background: rgba(255, 255, 255, 0.7);
backdrop-filter: blur(8px);
border: 1px solid rgba(226, 232, 240, 0.8);
border-radius: 9999px;
font-size: 0.875rem;
color: #1a1a1a !important;
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
cursor: default;
animation: fadeIn 0.4s ease-out forwards;
opacity: 0;
}
.feature-tag span {
color: #1a1a1a !important;
}
.feature-tag:nth-child(1) { animation-delay: 0ms; }
.feature-tag:nth-child(2) { animation-delay: 80ms; }
.feature-tag:nth-child(3) { animation-delay: 160ms; }
.feature-tag:nth-child(4) { animation-delay: 240ms; }
/* Bottom border */
.header-border {
position: absolute;
bottom: 0;
left: 0;
right: 0;
height: 1px;
background: linear-gradient(to right, transparent, #e2e8f0, transparent);
}
/* Mobile responsive */
@media (max-width: 640px) {
.mcp-badge {
display: none;
}
.header-top {
flex-direction: column;
align-items: flex-start;
}
}
/* ========================================================================
ONBOARDING FLOW STYLING
======================================================================== */
/* Center and constrain onboarding pages */
.onboarding-page {
max-width: 500px !important;
margin: 2rem auto !important;
padding: 32px !important;
}
/* Ensure welcome text is visible on dark background */
.welcome-text h1, .api-key-text h1 {
color: #f9fafb !important;
}
/* Scroll animation for step transitions */
.onboarding-page {
animation: fadeInStep 0.3s ease-out;
}
@keyframes fadeInStep {
from {
opacity: 0.5;
transform: translateY(10px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
/* ========================================================================
README TAB STYLING - BLACK TEXT ON WHITE BACKGROUND
======================================================================== */
.readme-tab-container {
background-color: #ffffff !important;
padding: 2rem !important;
border-radius: 12px !important;
max-width: 1200px !important;
margin: 1rem auto !important;
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.08) !important;
}
.readme-markdown,
.readme-markdown *,
.readme-markdown h1,
.readme-markdown h2,
.readme-markdown h3,
.readme-markdown h4,
.readme-markdown h5,
.readme-markdown h6,
.readme-markdown p,
.readme-markdown li,
.readme-markdown span,
.readme-markdown div,
.readme-markdown strong,
.readme-markdown em,
.readme-markdown code {
color: #000000 !important;
background-color: transparent !important;
}
.readme-markdown a {
color: #2563eb !important;
text-decoration: underline !important;
}
.readme-markdown a:hover {
color: #1d4ed8 !important;
}
.readme-markdown code {
background-color: #f3f4f6 !important;
padding: 2px 6px !important;
border-radius: 4px !important;
font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Consolas', monospace !important;
}
.readme-markdown pre {
background-color: #f3f4f6 !important;
padding: 1rem !important;
border-radius: 8px !important;
overflow-x: auto !important;
}
.readme-markdown pre code {
background-color: transparent !important;
padding: 0 !important;
}
.readme-markdown blockquote {
border-left: 4px solid #e5e7eb !important;
padding-left: 1rem !important;
color: #4b5563 !important;
}
.readme-markdown hr {
border-top: 1px solid #e5e7eb !important;
}
.readme-markdown table {
border-collapse: collapse !important;
width: 100% !important;
}
.readme-markdown table th,
.readme-markdown table td {
border: 1px solid #e5e7eb !important;
padding: 0.5rem !important;
}
.readme-markdown table th {
background-color: #f9fafb !important;
font-weight: 600 !important;
}
"""
# ============================================================================
# CHAT FUNCTIONS - DUAL OUTPUT (CHAT + TOOL LOG)
# ============================================================================
def format_tool_output_for_chat(tool_output):
"""
Parse tool output and format images/content for display in chatbot.
Detects image URLs and converts them to markdown image syntax.
Handles both JSON-formatted MCP responses and plain text.
"""
import re
# Extract content from ToolMessage objects (LangGraph wraps outputs in ToolMessage)
if hasattr(tool_output, 'content'):
output_str = tool_output.content
print(f"[FORMAT_TOOL_OUTPUT] Extracted content from ToolMessage")
elif isinstance(tool_output, dict) and 'content' in tool_output:
output_str = tool_output['content']
print(f"[FORMAT_TOOL_OUTPUT] Extracted content from dict")
else:
output_str = str(tool_output)
print(f"[FORMAT_TOOL_OUTPUT] Using str() fallback")
image_urls = []
# Try to parse as JSON first (MCP tools often return JSON)
try:
import json
parsed = json.loads(output_str)
print(f"[FORMAT_TOOL_OUTPUT] Successfully parsed JSON")
# Extract URLs from common JSON structures
if isinstance(parsed, dict):
# Check for "data" field (Nuthatch MCP format)
data = parsed.get("data", [])
if isinstance(data, list):
# data is a list of URLs
for item in data:
if isinstance(item, str) and item.startswith("http"):
image_urls.append(item)
elif isinstance(data, str) and data.startswith("http"):
image_urls.append(data)
# Also check for images in nested structures
for key, value in parsed.items():
if isinstance(value, list):
for item in value:
if isinstance(item, str) and item.startswith("http") and any(ext in item.lower() for ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg']):
image_urls.append(item)
except (json.JSONDecodeError, ValueError):
# Not JSON, fallback to regex extraction
pass
# Fallback: regex extraction for non-JSON or additional URLs
if not image_urls:
# Updated pattern: more permissive to catch URLs even with surrounding JSON characters
# Match URLs ending in image extensions, allowing any characters before the extension
image_pattern = r'https?://[^\s]+?\.(?:jpg|jpeg|png|gif|webp|svg)(?:\?[^\s"]*)?'
found_urls = re.findall(image_pattern, output_str, re.IGNORECASE)
image_urls.extend(found_urls)
# Remove duplicates while preserving order
seen = set()
unique_urls = []
for url in image_urls:
# Clean URL (remove trailing quotes, brackets, etc.)
clean_url = url.rstrip('",}]')
if clean_url not in seen:
seen.add(clean_url)
unique_urls.append(clean_url)
if unique_urls:
# Format images as markdown
formatted_output = ""
for url in unique_urls[:3]: # Limit to first 3 images to avoid clutter
formatted_output += f"![Image]({url})\n\n"
print(f"[FORMAT_TOOL_OUTPUT] ✅ Formatted {len(unique_urls[:3])} images as markdown")
return formatted_output
# If no images, return truncated text
if len(output_str) > 200:
return output_str[:200] + "...\n\n"
return output_str + "\n\n" if output_str else ""
async def chat_with_tool_visibility(
message,
history,
provider,
hf_key,
openai_key,
anthropic_key,
agent_mode,
request: gr.Request,
progress=gr.Progress()
):
"""
Dual-output streaming: chat response + tool execution log
Yields: tuple(chat_response_text, tool_log_markdown)
"""
# -------------------------------------------------------------------------
# 1. VALIDATE CREDENTIALS & SELECT PROVIDER
# -------------------------------------------------------------------------
if provider == "HuggingFace":
api_key = (hf_key.strip() if hf_key and hf_key.strip()
else os.getenv("HF_API_KEY", ""))
if not api_key:
yield "**API Key Required**\n\nPlease enter your HuggingFace API key in the sidebar.", "*Waiting for API key...*"
return
provider_key = "huggingface"
model = AgentConfig.DEFAULT_HF_MODEL
elif provider == "Anthropic":
api_key = (anthropic_key.strip() if anthropic_key and anthropic_key.strip()
else os.getenv("ANTHROPIC_API_KEY", ""))
if not api_key:
yield "**API Key Required**\n\nPlease enter your Anthropic API key in the sidebar.", "*Waiting for API key...*"
return
provider_key = "anthropic"
model = AgentConfig.DEFAULT_ANTHROPIC_MODEL
else: # OpenAI
api_key = (openai_key.strip() if openai_key and openai_key.strip()
else os.getenv("OPENAI_API_KEY", ""))
if not api_key:
yield "**API Key Required**\n\nPlease enter your OpenAI API key in the sidebar.", "*Waiting for API key...*"
return
provider_key = "openai"
model = AgentConfig.DEFAULT_OPENAI_MODEL
# -------------------------------------------------------------------------
# 2. GET OR CREATE AGENT
# -------------------------------------------------------------------------
progress(0.1, desc="🔧 Initializing agent...")
try:
session_id = request.session_hash
# Get or create agent (unified subagent architecture)
agent = await get_or_create_agent(
session_id=session_id,
provider=provider_key,
api_key=api_key,
model=model,
mode=agent_mode, # Include mode in cache key
agent_factory_method=lambda: AgentFactory.create_subagent_orchestrator(
model=model,
api_key=api_key,
provider=provider_key,
mode=agent_mode # Pass mode to determine agent composition
)
)
except Exception as e:
yield f"**Agent Creation Failed**\n\n{str(e)}", "*Agent creation failed*"
return
progress(0.3, desc="🤖 Agent ready...")
config = {"configurable": {"thread_id": session_id}}
# -------------------------------------------------------------------------
# 3. PARSE MESSAGE & HANDLE IMAGE UPLOADS
# -------------------------------------------------------------------------
# Separate accumulators for chat and tool log
chat_response = ""
tool_log = ""
tool_count = 0
user_text = ""
if isinstance(message, dict):
user_text = message.get("text", "")
print(f"[DEBUG MESSAGE] User query: {user_text}") # DEBUG
files = message.get("files", [])
# Handle image uploads
if files and len(files) > 0:
image_path = files[0]
if image_path.startswith("http"):
# URL - agent will call classify_from_url
user_text += f"\n\nWhat bird is this? {image_path}"
else:
# Local file - call MCP tool directly (show in tool log)
tool_log += "🟢 Pre-Classification (Direct MODAL MCP Call)\n"
tool_log += "Tool: classify_from_base64\n"
tool_log += "Status: Calling Modal GPU classifier directly to avoid token limits...\n\n"
yield chat_response, tool_log
with open(image_path, "rb") as img_file:
image_data = base64.b64encode(img_file.read()).decode('utf-8')
# Direct MCP call
transport = StreamableHttpTransport(
url=AgentConfig.MODAL_MCP_URL,
headers={"X-API-Key": AgentConfig.BIRD_CLASSIFIER_API_KEY}
)
async with Client(transport) as client:
result = await client.call_tool(
"classify_from_base64",
arguments={"image_data": image_data}
)
if result and result.content:
classification = json.loads(result.content[0].text)
species = classification.get("species", "Unknown")
confidence = classification.get("confidence", 0)
# Update tool log
tool_log += f"✅ Result: {species} ({confidence:.1%})\n"
tool_log += f"{json.dumps(classification, indent=2)}\n\n"
tool_log += "---\n\n"
# Update user message
user_text += f"\n\nI uploaded a bird image. The classifier identified it as: {species} (confidence: {confidence:.1%}). Can you tell me more about this bird?"
else:
tool_log += "❌ Failed\n\n---\n\n"
user_text += "\n\n⚠️ Failed to classify the uploaded image."
yield chat_response, tool_log
else:
user_text = message
# -------------------------------------------------------------------------
# 4. STREAM AGENT RESPONSE WITH TOOL VISIBILITY
# -------------------------------------------------------------------------
# Initial "thinking" indicator
progress(0.5, desc="💭 Thinking...")
chat_response = "💭 _Thinking..._"
tool_log += "🔵 Agent started processing...\n"
yield chat_response, tool_log
print(f"[DEBUG AGENT INPUT] Sending to agent: {user_text}") # DEBUG
async for event in agent.astream_events(
{"messages": [{"role": "user", "content": user_text}]},
config,
version="v2"
):
kind = event["event"]
# Tool call started
if kind == "on_tool_start":
tool_count += 1
tool_name = event["name"]
tool_input = event.get("data", {}).get("input", {})
# Update progress
progress(0.6 + (tool_count * 0.05), desc=f"🔍 Using {tool_name}...")
# Add to tool log
tool_log += f"\n🟢 Tool #{tool_count}: {tool_name}\n"
tool_log += f"Status: Running...\n"
tool_log += f"Input:\n{json.dumps(tool_input, indent=2)}\n\n"
# Also add visual indicator to chat (wrapped in semantic tag)
chat_response += f"\n\n<tool_call>🔧 Using {tool_name}...</tool_call>\n\n"
yield chat_response, tool_log
# LLM streaming tokens
elif kind == "on_chat_model_stream":
content = event["data"]["chunk"].content
if content:
# Clear "Thinking..." on first real content
if chat_response == "💭 _Thinking..._":
print("[STREAM] Clearing 'Thinking...' placeholder")
chat_response = ""
progress(0.7, desc="📝 Generating response...")
# Handle both string (OpenAI) and list (Anthropic) content formats
content_to_add = ""
if isinstance(content, list):
# Anthropic returns list of content blocks - extract text
for block in content:
if hasattr(block, 'text'):
content_to_add += block.text
elif isinstance(block, dict) and 'text' in block:
content_to_add += block['text']
else:
# OpenAI/HF return string directly
content_to_add = content
if content_to_add:
print(f"[STREAM] Adding LLM content: {content_to_add[:100]}...")
chat_response += content_to_add
yield chat_response, tool_log
# Tool finished
elif kind == "on_tool_end":
tool_output = event.get("data", {}).get("output", "")
# Update progress
progress(0.8, desc="📊 Processing results...")
# Format output for tool log (truncate if needed)
output_str = str(tool_output)
if len(output_str) > 1000:
output_str = output_str[:1000] + "\n...(truncated)"
# Add to tool log
tool_log += f"✅ Status: Completed\n"
tool_log += f"Output:\n{output_str}\n\n"
tool_log += "---\n\n"
# Format output for chat display (with image rendering)
formatted_output = format_tool_output_for_chat(tool_output)
if formatted_output.strip():
print(f"[STREAM] Adding formatted tool output ({len(formatted_output)} chars): {formatted_output[:200]}...")
print(f"[STREAM] chat_response length before: {len(chat_response)}")
chat_response += formatted_output
print(f"[STREAM] chat_response length after: {len(chat_response)}")
yield chat_response, tool_log
# Final yield
## NEW: Updated with LlamaIndex OutputPraser
# yield chat_response, tool_log
progress(0.9, desc="✨ Finalizing response...")
print(f"\n[FINAL] chat_response length before parsing: {len(chat_response)}")
print(f"[FINAL] chat_response preview (first 300): {chat_response[:300]}")
print(f"[FINAL] chat_response preview (last 300): {chat_response[-300:]}\n")
try:
from langgraph_agent.structured_output import parse_agent_response
formatted_response = await parse_agent_response(
raw_response=chat_response,
provider=provider_key,
api_key=api_key,
model=model
)
print(f"\n[FINAL] Formatted response length: {len(formatted_response)}")
print(f"[FINAL] Formatted response (last 800 chars): {formatted_response[-800:]}")
print(f"[FINAL] Image markdown count: {formatted_response.count('![')}")
progress(1.0, desc="✅ Complete")
yield formatted_response, tool_log
except ImportError:
# Fallback if LlamaIndex not installed
progress(1.0, desc="✅ Complete")
yield chat_response, tool_log
except Exception as e:
# Fallback if parsing fails
print(f"[STRUCTURED OUTPUT ERROR]: {e}")
progress(1.0, desc="✅ Complete")
yield chat_response, tool_log
# ============================================================================
# MODAL SERVER HEALTH CHECK
# ============================================================================
async def check_modal_server_health():
"""
Check if Modal MCP server is alive and warm.
Returns status message for UI display.
"""
import asyncio
print("[DEBUG] Health check started...")
async def do_health_check():
transport = StreamableHttpTransport(
url=AgentConfig.MODAL_MCP_URL,
headers={"X-API-Key": AgentConfig.BIRD_CLASSIFIER_API_KEY}
)
async with Client(transport) as client:
# Try to list tools as a health check
tools = await client.list_tools()
if tools and len(tools) > 0:
return f"✅ Online ({len(tools)} tools ready)"
else:
return "⚠️ Server responded but no tools found"
try:
# Wrap in timeout - Modal cold starts can take 30-60 seconds
result = await asyncio.wait_for(do_health_check(), timeout=60.0)
print(f"[DEBUG] Health check result: {result}")
return result
except asyncio.TimeoutError:
print("[DEBUG] Health check timeout")
return "⏱️ Timeout (still warming up...)"
except Exception as e:
print(f"[DEBUG] Health check error: {e}")
error_msg = str(e)
if "401" in error_msg or "Unauthorized" in error_msg:
return "🔐 Auth failed"
elif "timeout" in error_msg.lower():
return "⏱️ Timeout (waking up...)"
else:
return f"❌ Offline"
def show_immediate_loading(message, history, tool_log_state):
"""
Show immediate loading indicator when user submits a message.
This provides instant feedback before async processing begins.
Returns: (updated_history, updated_tool_log)
"""
# Just add a loading indicator to the history
# The user message will be added by chat_wrapper to avoid duplication
updated_history = history + [
{"role": "assistant", "content": "⏳ _Starting..._"}
]
# Add initial message to tool log
updated_tool_log = "🔵 Initializing agent...\n"
return updated_history, updated_tool_log
# Wrapper to convert to Gradio 6 message format
async def chat_wrapper(message, history, provider, hf_key, openai_key, anthropic_key, agent_mode, tool_log_state, request: gr.Request, progress=gr.Progress()):
"""
Wrapper to convert chat outputs to Gradio 6 message format.
Returns: (updated_history, updated_tool_log)
"""
# Debug: print received API keys
print(f"[DEBUG] chat_wrapper received - provider: {provider}, hf_key: {'***' if hf_key else 'None'}, openai_key: {'***' if openai_key else 'None'}, anthropic_key: {'***' if anthropic_key else 'None'}")
# Extract user message text
if isinstance(message, dict):
user_message_text = message.get("text", "")
else:
user_message_text = message
# Check if immediate loading added a loading indicator
if (len(history) >= 1 and
history[-1].get("role") == "assistant" and
history[-1].get("content") == "⏳ _Starting..._"):
# Remove loading indicator
history = history[:-1]
# Add user message to history
history = history + [{"role": "user", "content": user_message_text}]
# Stream response
async for chat_text, tool_log_text in chat_with_tool_visibility(message, history, provider, hf_key, openai_key, anthropic_key, agent_mode, request, progress):
# Update history with assistant response
updated_history = history + [{"role": "assistant", "content": chat_text}]
yield updated_history, tool_log_text
# ============================================================================
# UI DEFINITION - DUAL PANEL LAYOUT WITH CLOUD AESTHETIC
# ============================================================================
# Helper function to update text examples based on agent mode
def update_text_examples_for_mode(mode):
"""Return appropriate text example dataset based on agent mode."""
print(f"[DEBUG] Updating text examples for mode: {mode}")
# Placeholder for future mode-specific examples
# if mode == "Future Mode Name":
# samples = [[text] for text in FUTURE_MODE_EXAMPLES]
# print(f"[DEBUG] Future mode text samples: {len(samples)} examples")
# else:
# Default: Supervisor (Multi-Agent) - includes image ID, taxonomy, and audio finder
samples = [[text] for text in MULTI_AGENT_TEXT_EXAMPLES]
print(f"[DEBUG] Multi-agent text samples: {len(samples)} examples")
return gr.Dataset(samples=samples)
# Helper function to create config HTML
def create_config_html(provider_choice, agent_mode_choice, hf_key_input, openai_key_input, anthropic_key_input=""):
"""Generate sky-themed config card HTML."""
# Determine model and API key status
if provider_choice == "HuggingFace":
model = AgentConfig.DEFAULT_HF_MODEL
has_key = bool((hf_key_input and hf_key_input.strip()) or os.getenv("HF_API_KEY"))
elif provider_choice == "Anthropic":
model = AgentConfig.DEFAULT_ANTHROPIC_MODEL
has_key = bool((anthropic_key_input and anthropic_key_input.strip()) or os.getenv("ANTHROPIC_API_KEY"))
else:
model = AgentConfig.DEFAULT_OPENAI_MODEL
has_key = bool((openai_key_input and openai_key_input.strip()) or os.getenv("OPENAI_API_KEY"))
# Extract mode name
mode_display = "3 Specialists" if "Specialized Subagents" in agent_mode_choice else "Audio Finder"
# Status styling
if has_key:
status_bg = "rgba(16, 185, 129, 0.2)"
status_color = "#10b981"
status_icon = "✓"
else:
status_bg = "rgba(239, 68, 68, 0.2)"
status_color = "#ef4444"
status_icon = "✗"
return f"""
<div style="
background: linear-gradient(135deg, rgba(31, 41, 55, 0.95) 0%, rgba(17, 24, 39, 0.98) 100%);
border-radius: 12px;
padding: 16px 20px;
font-family: 'Segoe UI', system-ui, sans-serif;
border: 1px solid #374151;
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.3);
backdrop-filter: blur(10px);
">
<!-- Provider Row -->
<div style="
display: flex;
align-items: center;
justify-content: space-between;
padding: 6px 0;
">
<span style="font-size: 12px; color: #9ca3af;">Provider</span>
<div style="display: flex; align-items: center; gap: 6px;">
<span style="
font-size: 13px;
font-weight: 500;
color: #f9fafb;
">{provider_choice}</span>
<span style="
display: inline-flex;
align-items: center;
justify-content: center;
width: 18px;
height: 18px;
border-radius: 50%;
background: {status_bg};
color: {status_color};
font-size: 11px;
font-weight: bold;
">{status_icon}</span>
</div>
</div>
<!-- Model Row -->
<div style="
display: flex;
align-items: center;
justify-content: space-between;
padding: 6px 0;
">
<span style="font-size: 12px; color: #9ca3af;">Model</span>
<span style="
font-size: 12px;
font-weight: 500;
color: #60a5fa;
font-family: 'SF Mono', 'Fira Code', 'Consolas', monospace;
background: rgba(59, 130, 246, 0.15);
padding: 2px 8px;
border-radius: 4px;
">{model}</span>
</div>
<!-- Mode Row -->
<div style="
display: flex;
align-items: center;
justify-content: space-between;
padding: 6px 0;
">
<span style="font-size: 12px; color: #9ca3af;">Mode</span>
<span style="
font-size: 12px;
font-weight: 500;
color: #38bdf8;
background: rgba(56, 189, 248, 0.15);
padding: 3px 10px;
border-radius: 20px;
border: 1px solid rgba(56, 189, 248, 0.3);
">{mode_display}</span>
</div>
</div>
"""
with gr.Blocks() as demo:
# ============================================================================
# STATE MANAGEMENT - ONBOARDING FLOW
# ============================================================================
stored_hf_key = gr.State("")
stored_openai_key = gr.State("")
stored_anthropic_key = gr.State("")
# Enhanced BirdScope header
gr.HTML("""
<header class="birdscope-header">
<!-- Decorative cloud elements -->
<div style="position: absolute; inset: 0; overflow: hidden; pointer-events: none;">
<div class="cloud-decor-1"></div>
<div class="cloud-decor-2"></div>
<div class="cloud-decor-3"></div>
<!-- Flying bird silhouettes -->
<svg class="bird-silhouette bird-1" viewBox="0 0 24 24" fill="currentColor">
<path d="M3.5 12C3.5 12 6 9 12 9C18 9 20.5 12 20.5 12C20.5 12 18 10 12 10C6 10 3.5 12 3.5 12Z"/>
</svg>
<svg class="bird-silhouette bird-2" viewBox="0 0 24 24" fill="currentColor">
<path d="M3.5 12C3.5 12 6 9 12 9C18 9 20.5 12 20.5 12C20.5 12 18 10 12 10C6 10 3.5 12 3.5 12Z"/>
</svg>
<svg class="bird-silhouette bird-3" viewBox="0 0 24 24" fill="currentColor">
<path d="M3.5 12C3.5 12 6 9 12 9C18 9 20.5 12 20.5 12C20.5 12 18 10 12 10C6 10 3.5 12 3.5 12Z"/>
</svg>
</div>
<!-- Main content -->
<div class="header-content">
<!-- Logo and title row -->
<div class="header-top">
<!-- Bird logo -->
<div class="bird-logo-wrapper">
<div class="bird-logo-glow"></div>
<div class="bird-logo">
<svg style="width: 2rem; height: 2rem; color: white;" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round">
<!-- Stylized bird -->
<path d="M21 8c-2 0-4 1-6 3-1.5-2-4-3-7-3-2 0-4 .5-5 1l8 4-2 6 4-3 4 3-2-6 8-4c-1-.5-1.5-1-2-1z" fill="currentColor" stroke-width="0"/>
<circle cx="7" cy="9" r="1" fill="white"/>
</svg>
</div>
</div>
<!-- Title -->
<div class="header-title-group">
<div style="display: flex; align-items: baseline; gap: 0.5rem;">
<h1>BirdScope</h1>
<span class="header-ai-text">AI</span>
</div>
<p class="header-subtitle">AI-powered bird identification & species reference</p>
</div>
</div>
<!-- Feature tags with MCP status check button -->
<div class="feature-tags">
<div class="feature-tag">
<span>🔍</span>
<span>Image Classification</span>
</div>
<div class="feature-tag">
<span>📸</span>
<span>Unsplash Reference</span>
</div>
<div class="feature-tag">
<span>🎵</span>
<span>Audio Recordings</span>
</div>
<div class="feature-tag">
<span>🌍</span>
<span>Conservation Status</span>
</div>
</div>
</div>
<!-- Bottom border -->
<div class="header-border"></div>
</header>
<script>
// Auto-scroll tool log panel continuously (for Textbox component)
const observer = new MutationObserver(() => {
const toolLog = document.querySelector('#tool-log-output textarea');
if (toolLog) {
toolLog.scrollTop = toolLog.scrollHeight;
}
});
// Start observing once the page loads
setTimeout(() => {
const toolLogContainer = document.querySelector('#tool-log-output');
if (toolLogContainer) {
observer.observe(toolLogContainer, {
childList: true,
subtree: true,
characterData: true,
attributes: true
});
}
}, 1000);
</script>
""")
# ============================================================================
# ONBOARDING WALKTHROUGH - Using Native Gradio Component
# ============================================================================
with gr.Walkthrough(selected=1) as walkthrough:
# Step 1: Welcome & Provider Selection
with gr.Step("Welcome", id=1):
with gr.Column(elem_classes=["sidebar", "onboarding-page"]):
# Skip onboarding button at top of sidebar
with gr.Row():
gr.HTML("<div></div>") # Spacer
skip_btn = gr.Button(
"Skip onboarding →",
size="sm",
variant="secondary",
scale=0,
min_width=140
)
gr.Markdown(
"""
# Welcome to BirdScope AI!
Let's get you started with your AI-powered bird identification assistant.
""",
elem_classes=["welcome-text"]
)
gr.Markdown("---")
gr.Markdown("### SELECT LLM PROVIDER")
welcome_provider = gr.Dropdown(
choices=["HuggingFace", "OpenAI", "Anthropic"],
value="OpenAI",
show_label=False,
container=False
)
gr.Markdown("**Choose your AI provider**")
gr.Markdown("Select between HuggingFace (open models) or OpenAI (GPT models)")
gr.Markdown("---")
gr.HTML("""
<div style="display: flex; align-items: center; gap: 8px; margin-bottom: 8px;">
<img src="https://cdn.brandfetch.io/idGqKHD5xE/theme/dark/symbol.svg?c=1bxid64Mup7aczewSAYMX&t=1668516030712"
alt="HuggingFace"
style="width: 20px; height: 20px;">
<strong style="color: #d1d5db;">HuggingFace</strong>
</div>
""")
gr.Markdown("Uses open-source models like Qwen 2.5-72B")
gr.HTML("""
<div style="display: flex; align-items: center; gap: 8px; margin-top: 16px;">
<img src="https://cdn.oaistatic.com/_next/static/media/apple-touch-icon.59f2e898.png"
alt="OpenAI"
style="width: 20px; height: 20px; border-radius: 4px;">
<strong style="color: #d1d5db;">OpenAI</strong>
</div>
""")
gr.Markdown("Uses GPT-4 models for high-quality responses")
gr.HTML("""
<div style="display: flex; align-items: center; gap: 8px; margin-top: 16px;">
<img src="https://cdn.brandfetch.io/idmJWF3N06/theme/dark/symbol.svg?c=1bxid64Mup7aczewSAYMX&t=1721803183716"
alt="Anthropic"
style="width: 20px; height: 20px; filter: invert(52%) sepia(48%) saturate(779%) hue-rotate(327deg) brightness(91%) contrast(88%);">
<strong style="color: #d1d5db;">Anthropic</strong>
</div>
""")
gr.Markdown("Uses Claude models (Sonnet, Opus, Haiku)")
gr.Markdown("---")
welcome_next_btn = gr.Button("Next: Enter API Key →", variant="primary", size="lg")
# Step 2: API Key Input
with gr.Step("API Key", id=2):
with gr.Column(elem_classes=["sidebar", "onboarding-page"]):
gr.Markdown("# Step 2: Enter Your API Key 🔑")
gr.Markdown("To use BirdScope AI, you'll need an API key from your selected provider.")
gr.Markdown("---")
# HuggingFace API key section
with gr.Column(visible=False) as hf_key_section:
gr.Markdown("### AUTHENTICATION")
gr.HTML("""
<div style="display: flex; align-items: center; gap: 8px; margin-bottom: 8px;">
<img src="https://cdn.brandfetch.io/idGqKHD5xE/theme/dark/symbol.svg?c=1bxid64Mup7aczewSAYMX&t=1668516030712"
alt="HuggingFace"
style="width: 20px; height: 20px;">
<strong style="color: #d1d5db;">HuggingFace API Key</strong>
</div>
""")
onboarding_hf_key = gr.Textbox(
placeholder="hf_...",
type="password",
show_label=False,
container=False,
elem_classes=["hf-section"]
)
gr.Markdown("Get your key from [HF Settings](https://huggingface.co/settings/tokens)")
# OpenAI API key section
with gr.Column(visible=False) as openai_key_section:
gr.Markdown("### AUTHENTICATION")
gr.HTML("""
<div style="display: flex; align-items: center; gap: 8px; margin-bottom: 8px;">
<img src="https://cdn.oaistatic.com/_next/static/media/apple-touch-icon.59f2e898.png"
alt="OpenAI"
style="width: 20px; height: 20px; border-radius: 4px;">
<strong style="color: #d1d5db;">OpenAI API Key</strong>
</div>
""")
onboarding_openai_key = gr.Textbox(
placeholder="sk-...",
type="password",
show_label=False,
container=False,
elem_classes=["openai-section"]
)
gr.Markdown("Get your key from [OpenAI Platform](https://platform.openai.com/api-keys)")
# Anthropic API key section
with gr.Column(visible=False) as anthropic_key_section:
gr.Markdown("### AUTHENTICATION")
gr.HTML("""
<div style="display: flex; align-items: center; gap: 8px; margin-bottom: 8px;">
<img src="https://cdn.brandfetch.io/idmJWF3N06/theme/dark/symbol.svg?c=1bxid64Mup7aczewSAYMX&t=1721803183716"
alt="Anthropic"
style="width: 20px; height: 20px; filter: invert(52%) sepia(48%) saturate(779%) hue-rotate(327deg) brightness(91%) contrast(88%);">
<strong style="color: #d1d5db;">Anthropic API Key</strong>
</div>
""")
onboarding_anthropic_key = gr.Textbox(
placeholder="sk-ant-...",
type="password",
show_label=False,
container=False,
elem_classes=["anthropic-section"]
)
gr.Markdown("Get your key from [Anthropic Console](https://console.anthropic.com/settings/keys)")
gr.Markdown("---")
with gr.Row():
api_back_btn = gr.Button("← Back", variant="secondary", scale=1)
api_start_btn = gr.Button("Start Using BirdScope →", variant="primary", scale=3)
# Step 3: Main App
with gr.Step("BirdScope AI", id=3):
with gr.Tabs():
with gr.Tab("💬 Chat"):
with gr.Row():
# Left: Chat interface (scale=2)
with gr.Column(scale=2):
chatbot = gr.Chatbot(
show_label=False,
height=500,
elem_classes=["chatbot-container"]
)
msg = gr.MultimodalTextbox(
placeholder="Ask about birds or upload an image...",
file_count="single",
file_types=["image"],
interactive=True,
show_label=False
)
with gr.Row():
submit = gr.Button("Send", scale=3)
clear = gr.Button("Clear", scale=1)
# Photo examples - always shown (static)
gr.Markdown("**Try uploading a bird photo:**")
gr.Examples(
examples=PHOTO_EXAMPLES,
inputs=msg,
cache_examples=False
)
# Text examples - change based on agent mode (dynamic)
gr.Markdown("**Or try a text query:**")
text_examples = gr.Examples(
examples=MULTI_AGENT_TEXT_EXAMPLES, # Default to multi-agent text examples
inputs=msg,
cache_examples=False
)
# Middle: Tool execution log (scale=1)
with gr.Column(scale=1):
tool_output = gr.Textbox(
value="*Waiting for tool calls...*",
elem_classes=["tool-log-panel"],
elem_id="tool-log-output",
autoscroll=True,
show_label=False,
interactive=False,
container=False
)
# Right: Sidebar (scale=1)
with gr.Column(scale=1, elem_classes=["sidebar"]):
# MCP Server Status Check
mcp_status_html = gr.HTML("""
<div class="mcp-badge online" style="margin-bottom: 16px; justify-content: center;">
<span class="mcp-pulse"></span>
<span>Powered by Modal MCP</span>
</div>
""")
check_mcp_btn = gr.Button("Check Modal MCP Server Status", size="sm", variant="secondary", elem_classes=["modal-check-btn"])
gr.HTML("""
<p style="font-size: 0.75rem; color: #9ca3af; margin-top: 8px; margin-bottom: 16px; line-height: 1.4;">
Please be patient if the Modal MCP server needs to cold start
</p>
""")
gr.Markdown("---")
# Provider selection
gr.Markdown("### SELECT LLM PROVIDER")
provider = gr.Dropdown(
choices=["HuggingFace", "OpenAI", "Anthropic"],
value="OpenAI",
show_label=False,
container=False
)
# Agent Mode Selector
gr.Markdown("**Agent Configuration**")
gr.Markdown("Choose between unified agent or specialized routing")
agent_mode = gr.Dropdown(
choices=[
"Supervisor (Multi-Agent)"
],
value="Supervisor (Multi-Agent)",
show_label=False,
container=False
)
gr.Markdown("---")
# API Keys
gr.Markdown("### AUTHENTICATION")
gr.HTML("""
<div style="display: flex; align-items: center; gap: 8px; margin-bottom: 8px;">
<img src="https://cdn.brandfetch.io/idGqKHD5xE/theme/dark/symbol.svg?c=1bxid64Mup7aczewSAYMX&t=1668516030712"
alt="HuggingFace"
style="width: 20px; height: 20px;">
<strong style="color: #d1d5db;">HuggingFace API Key</strong>
</div>
""")
hf_key = gr.Textbox(
placeholder="hf_...",
type="password",
show_label=False,
container=False,
elem_classes=["hf-section"]
)
gr.Markdown("Get your key from [HF Settings](https://huggingface.co/settings/tokens)")
gr.HTML("""
<div style="display: flex; align-items: center; gap: 8px; margin-bottom: 8px;">
<img src="https://cdn.oaistatic.com/_next/static/media/apple-touch-icon.59f2e898.png"
alt="OpenAI"
style="width: 20px; height: 20px; border-radius: 4px;">
<strong style="color: #d1d5db;">OpenAI API Key</strong>
</div>
""")
openai_key = gr.Textbox(
placeholder="sk-...",
type="password",
show_label=False,
container=False,
elem_classes=["openai-section"]
)
gr.Markdown("Get your key from [OpenAI Platform](https://platform.openai.com/api-keys)")
gr.HTML("""
<div style="display: flex; align-items: center; gap: 8px; margin-bottom: 8px;">
<img src="https://cdn.brandfetch.io/idmJWF3N06/theme/dark/symbol.svg?c=1bxid64Mup7aczewSAYMX&t=1721803183716"
alt="Anthropic"
style="width: 20px; height: 20px; filter: invert(52%) sepia(48%) saturate(779%) hue-rotate(327deg) brightness(91%) contrast(88%);">
<strong style="color: #d1d5db;">Anthropic API Key</strong>
</div>
""")
anthropic_key = gr.Textbox(
placeholder="sk-ant-...",
type="password",
show_label=False,
container=False,
elem_classes=["anthropic-section"]
)
gr.Markdown("Get your key from [Anthropic Console](https://console.anthropic.com/settings/keys)")
# Current Configuration Display
gr.Markdown("---")
gr.Markdown("### CURRENT CONFIG")
# Generate initial config HTML
session_status = gr.HTML(
value=create_config_html(
provider_choice="OpenAI",
agent_mode_choice="Supervisor (Multi-Agent)",
hf_key_input="",
openai_key_input="",
anthropic_key_input=""
)
)
# About
gr.Markdown("---")
gr.Markdown("""
### ABOUT
Built for the [Hugging Face MCP-1st-Birthday Hackathon](https://huggingface.co/MCP-1st-Birthday)
""")
gr.HTML("""
<div style="text-align: center; margin: 16px 0;">
<img src="https://cdn-uploads.huggingface.co/production/uploads/60d2dc1007da9c17c72708f8/s4q7RzD3S-8xQ8ecXrSwb.png"
alt="Hugging Face MCP 1st Birthday"
style="max-width: 100%; height: auto; border-radius: 8px;">
</div>
""")
gr.Markdown("""
**MCP Servers:**
- Modal GPU classifier (2 tools)
- Nuthatch species database (7 tools)
**Capabilities:**
- Visual bird identification
- Species reference images (Unsplash)
- Audio recordings (xeno-canto)
- Conservation status data
- Taxonomic exploration
- Separate tool log panel
- Detailed execution tracking
- Tool input/output inspection
""")
with gr.Tab("📖 README"):
with gr.Column(elem_classes=["readme-tab-container"]):
try:
with open("README.md", "r", encoding="utf-8") as f:
readme_content = f.read()
gr.Markdown(readme_content, elem_classes=["readme-markdown"])
except FileNotFoundError:
gr.Markdown("README.md not found", elem_classes=["readme-markdown"])
# State for tool log
tool_log_state = gr.State("*Waiting for tool calls...*")
# ============================================================================
# ONBOARDING NAVIGATION HANDLERS - Using Walkthrough
# ============================================================================
def handle_welcome_next(provider_choice):
"""Navigate to API key page and show appropriate input section."""
show_hf = provider_choice == "HuggingFace"
show_openai = provider_choice == "OpenAI"
show_anthropic = provider_choice == "Anthropic"
return (
gr.Walkthrough(selected=2), # walkthrough - go to step 2
gr.update(visible=show_hf), # hf_key_section
gr.update(visible=show_openai), # openai_key_section
gr.update(visible=show_anthropic) # anthropic_key_section
)
def handle_api_back():
"""Navigate back to welcome page."""
return gr.Walkthrough(selected=1)
def handle_skip_onboarding():
"""Skip onboarding and go directly to main app."""
return gr.Walkthrough(selected=3)
def handle_api_start(provider_choice, hf_key_input, openai_key_input, anthropic_key_input):
"""Save credentials and navigate to main app with pre-populated values."""
provider_str = str(provider_choice) if provider_choice else "OpenAI"
# Debug output
print(f"[DEBUG] handle_api_start - provider: {provider_str}")
print(f"[DEBUG] handle_api_start - hf_key: {'***' if hf_key_input else 'empty'}")
print(f"[DEBUG] handle_api_start - openai_key: {'***' if openai_key_input else 'empty'}")
print(f"[DEBUG] handle_api_start - anthropic_key: {'***' if anthropic_key_input else 'empty'}")
# Determine which API key to use
if provider_str == "HuggingFace":
hf_key_value = hf_key_input if hf_key_input else ""
openai_key_value = ""
anthropic_key_value = ""
elif provider_str == "Anthropic":
hf_key_value = ""
openai_key_value = ""
anthropic_key_value = anthropic_key_input if anthropic_key_input else ""
else:
hf_key_value = ""
openai_key_value = openai_key_input if openai_key_input else ""
anthropic_key_value = ""
# Generate config HTML
config_html = create_config_html(
provider_choice=provider_str,
agent_mode_choice="Supervisor (Multi-Agent)",
hf_key_input=hf_key_value,
openai_key_input=openai_key_value,
anthropic_key_input=anthropic_key_value
)
return (
gr.Walkthrough(selected=3), # walkthrough - go to step 3 (main app)
provider_str, # provider dropdown
hf_key_value, # hf_key textbox
openai_key_value, # openai_key textbox
anthropic_key_value, # anthropic_key textbox
config_html, # session_status HTML
hf_key_value, # stored_hf_key state
openai_key_value, # stored_openai_key state
anthropic_key_value # stored_anthropic_key state
)
# Connect onboarding navigation
skip_btn.click(
fn=handle_skip_onboarding,
outputs=[walkthrough]
)
welcome_next_btn.click(
fn=handle_welcome_next,
inputs=[welcome_provider],
outputs=[walkthrough, hf_key_section, openai_key_section, anthropic_key_section]
)
api_back_btn.click(
fn=handle_api_back,
outputs=[walkthrough]
)
api_start_btn.click(
fn=handle_api_start,
inputs=[welcome_provider, onboarding_hf_key, onboarding_openai_key, onboarding_anthropic_key],
outputs=[
walkthrough,
provider,
hf_key,
openai_key,
anthropic_key,
session_status,
stored_hf_key,
stored_openai_key,
stored_anthropic_key
]
)
# Helper function to update MCP badge HTML
def update_mcp_badge_html(status_text: str) -> str:
"""Generate HTML for MCP badge based on status."""
# Determine badge class based on status
if "✅" in status_text or "Online" in status_text:
badge_class = "online"
elif "❌" in status_text or "Offline" in status_text:
badge_class = "offline"
elif "⏱️" in status_text or "Timeout" in status_text or "Checking" in status_text:
badge_class = "checking"
else:
badge_class = "online"
return f"""
<div class="mcp-badge {badge_class}" style="margin-bottom: 16px; justify-content: center;">
<span class="mcp-pulse"></span>
<span>{status_text}</span>
</div>
"""
# JavaScript to scroll tool log to bottom
scroll_js = """
() => {
const toolLog = document.querySelector('#tool-log-output textarea');
if (toolLog) {
toolLog.scrollTop = toolLog.scrollHeight;
}
}
"""
# Connect events
# Update config display when provider, agent mode, or API keys change
provider.change(
fn=create_config_html,
inputs=[provider, agent_mode, hf_key, openai_key, anthropic_key],
outputs=[session_status]
)
agent_mode.change(
fn=create_config_html,
inputs=[provider, agent_mode, hf_key, openai_key, anthropic_key],
outputs=[session_status]
)
# Update text examples when agent mode changes (photo examples stay the same)
agent_mode.change(
fn=update_text_examples_for_mode,
inputs=[agent_mode],
outputs=[text_examples.dataset]
)
hf_key.change(
fn=create_config_html,
inputs=[provider, agent_mode, hf_key, openai_key, anthropic_key],
outputs=[session_status]
)
openai_key.change(
fn=create_config_html,
inputs=[provider, agent_mode, hf_key, openai_key, anthropic_key],
outputs=[session_status]
)
anthropic_key.change(
fn=create_config_html,
inputs=[provider, agent_mode, hf_key, openai_key, anthropic_key],
outputs=[session_status]
)
submit_event = msg.submit(
fn=show_immediate_loading,
inputs=[msg, chatbot, tool_log_state],
outputs=[chatbot, tool_output]
).then(
fn=chat_wrapper,
inputs=[msg, chatbot, provider, hf_key, openai_key, anthropic_key, agent_mode, tool_log_state],
outputs=[chatbot, tool_output]
).then(
lambda: None,
None,
msg,
js=scroll_js
)
submit_click = submit.click(
fn=show_immediate_loading,
inputs=[msg, chatbot, tool_log_state],
outputs=[chatbot, tool_output]
).then(
fn=chat_wrapper,
inputs=[msg, chatbot, provider, hf_key, openai_key, anthropic_key, agent_mode, tool_log_state],
outputs=[chatbot, tool_output]
).then(
lambda: None,
None,
msg,
js=scroll_js
)
def clear_conversation(request: gr.Request):
"""Clear UI and agent memory by removing agent from cache."""
from agent_cache import agent_cache, agent_last_used
# Clear all cached agents for this session
session_id = request.session_hash
keys_to_remove = [key for key in agent_cache.keys() if key[0] == session_id]
for key in keys_to_remove:
del agent_cache[key]
if key in agent_last_used:
del agent_last_used[key]
print(f"[DEBUG] Clear clicked - removed {len(keys_to_remove)} cached agents for session {session_id[:8]}")
return [], "*Waiting for tool calls...*", None
clear.click(
fn=clear_conversation,
inputs=[], # request will be auto-injected
outputs=[chatbot, tool_output, msg]
)
# MCP status check handler
async def handle_mcp_check():
"""Check MCP status and return updated HTML."""
# First return "checking" state
yield update_mcp_badge_html("Checking...")
# Then check actual status
status = await check_modal_server_health()
yield update_mcp_badge_html(status)
check_mcp_btn.click(
fn=handle_mcp_check,
outputs=mcp_status_html,
show_progress="hidden"
)
if __name__ == "__main__":
# JavaScript to force dark mode
force_dark_mode = """
function() {
const params = new URLSearchParams(window.location.search);
if (!params.has('__theme')) {
params.set('__theme', 'dark');
window.location.search = params.toString();
}
}
"""
demo.launch(theme=gr.themes.Soft(), css=custom_css, js=force_dark_mode)