File size: 8,865 Bytes
ff0e97f db789ae acf4dc8 db789ae ff0e97f acf4dc8 db789ae acf4dc8 ff0e97f db789ae acf4dc8 ff0e97f 17f468c acf4dc8 db789ae ff0e97f acf4dc8 17f468c db789ae 0588003 db789ae 0588003 db789ae ff0e97f 0588003 ff0e97f 17f468c ff0e97f 0588003 ff0e97f 0588003 ff0e97f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
"""
Structured output parsing using LlamaIndex Pydantic Programs.
Ensures consistent image formatting in agent responses.
HACKATHON OPTIMIZED: Uses regex extraction instead of LLM calls for speed.
"""
from typing import List, Optional
import re
from pydantic import BaseModel, Field
class BirdIdentificationResponse(BaseModel):
"""Structured response for bird identification using LlamaIndex Pydantic."""
summary: str = Field(
description="Main response text with bird identification, facts, or information"
)
species_name: Optional[str] = Field(
default=None,
description="Common name of the bird species (e.g., 'Northern Cardinal')"
)
image_urls: List[str] = Field(
default_factory=list,
description="List of image URLs to display for this bird"
)
audio_urls: List[str] = Field(
default_factory=list,
description="List of audio URLs (bird calls/songs)"
)
confidence_score: Optional[float] = Field(
default=None,
description="Confidence score from classifier (0.0-1.0)"
)
def extract_urls_from_text(text: str) -> tuple[List[str], List[str]]:
"""
Extract image and audio URLs from text using regex.
Updated to handle URLs within markdown, JSON, and plain text.
Supports both extension-based URLs (.jpg, .png) and domain-based (Unsplash).
Returns:
tuple: (image_urls, audio_urls)
"""
# Pattern 1: Image URLs with file extensions
# Matches URLs ending in image extensions, allowing most characters before the extension
# Stops at whitespace or common delimiters like ), ], }
image_pattern_ext = r'https?://[^\s)}\]]+?\.(?:jpg|jpeg|png|gif|webp|svg)(?:\?[^\s)}\]]*)?'
# Pattern 2: Unsplash image URLs (no file extension needed)
# Matches: https://images.unsplash.com/photo-XXXXXXX or similar
image_pattern_unsplash = r'https?://images\.unsplash\.com/[^\s)}\]]*'
# Pattern for audio URLs - handles both direct audio files AND xeno-canto links
# Updated to be more permissive like image pattern
audio_pattern_files = r'https?://[^\s)}\]]+?\.(?:mp3|wav|ogg|m4a)(?:\?[^\s)}\]]*)?'
audio_pattern_xenocanto = r'https?://xeno-canto\.org/\d+(?:/download)?'
print(f"[EXTRACT_URLS] Searching text of length {len(text)}")
# Extract all URLs - combine both image patterns
raw_image_urls_ext = re.findall(image_pattern_ext, text, re.IGNORECASE)
raw_image_urls_unsplash = re.findall(image_pattern_unsplash, text, re.IGNORECASE)
raw_audio_urls_files = re.findall(audio_pattern_files, text, re.IGNORECASE)
audio_urls_xenocanto = list(set(re.findall(audio_pattern_xenocanto, text, re.IGNORECASE)))
# Combine image URLs from both patterns
raw_image_urls = raw_image_urls_ext + raw_image_urls_unsplash
print(f"[EXTRACT_URLS] Found {len(raw_image_urls_ext)} extension-based image URLs")
print(f"[EXTRACT_URLS] Found {len(raw_image_urls_unsplash)} Unsplash image URLs")
print(f"[EXTRACT_URLS] Found {len(raw_audio_urls_files)} audio file URLs")
print(f"[EXTRACT_URLS] Found {len(audio_urls_xenocanto)} xeno-canto URLs")
# Clean URLs (remove trailing quotes, commas, etc.)
def clean_url(url: str) -> str:
cleaned = url.rstrip('",;)')
# Validate it's still a proper URL
if cleaned.startswith('http://') or cleaned.startswith('https://'):
return cleaned
else:
print(f"[EXTRACT_URLS] ⚠️ Rejected malformed URL after cleaning: {cleaned}")
return None
image_urls = [u for u in (clean_url(url) for url in raw_image_urls) if u is not None]
image_urls = list(set(image_urls)) # Deduplicate
audio_urls_files = [u for u in (clean_url(url) for url in raw_audio_urls_files) if u is not None]
audio_urls_files = list(set(audio_urls_files)) # Deduplicate
# Combine both types of audio URLs
audio_urls = audio_urls_files + audio_urls_xenocanto
# Log the actual URLs extracted
print(f"[EXTRACT_URLS] ✅ Cleaned image URLs ({len(image_urls)}): {image_urls}")
print(f"[EXTRACT_URLS] ✅ Cleaned audio URLs ({len(audio_urls)}): {audio_urls}")
return image_urls, audio_urls
def extract_species_name(text: str) -> Optional[str]:
"""
Try to extract species name from common patterns in response.
"""
# Pattern: "identified as SPECIES NAME" or "species: SPECIES NAME"
patterns = [
r'identified as[:\s]+([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,3})',
r'species[:\s]+([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,3})',
r'This is (?:a |an )?([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,3})',
]
for pattern in patterns:
match = re.search(pattern, text)
if match:
return match.group(1)
return None
async def parse_agent_response(
raw_response: str,
provider: str,
api_key: str,
model: str
) -> str:
"""
Parse agent response into structured format and reformat with guaranteed markdown.
OPTIMIZED FOR HACKATHON: Uses regex extraction instead of LLM call.
Still uses LlamaIndex Pydantic models for structured data.
Args:
raw_response: The agent's raw text response
provider: LLM provider ("openai", "anthropic", "huggingface")
api_key: API key (unused in optimized version)
model: Model name (unused in optimized version)
Returns:
Formatted markdown response with guaranteed image syntax
"""
try:
print("[STRUCTURED OUTPUT] Starting parsing...")
print(f"[STRUCTURED OUTPUT] Raw response length: {len(raw_response)} characters")
print(f"[STRUCTURED OUTPUT] First 500 chars: {raw_response[:500]}")
print(f"[STRUCTURED OUTPUT] Last 500 chars: {raw_response[-500:]}")
# Extract URLs using regex (fast, no API call)
image_urls, audio_urls = extract_urls_from_text(raw_response)
print(f"[STRUCTURED OUTPUT] Found {len(image_urls)} images, {len(audio_urls)} audio files")
# Extract species name if possible
species_name = extract_species_name(raw_response)
# Create structured response using LlamaIndex Pydantic model
structured = BirdIdentificationResponse(
summary=raw_response, # Keep full response as summary
species_name=species_name,
image_urls=image_urls,
audio_urls=audio_urls,
confidence_score=None # Could extract with regex if needed
)
# Check if we found any media to format
if not structured.image_urls and not structured.audio_urls:
print("[STRUCTURED OUTPUT] No images or audio found, returning original")
return raw_response
# Reformat into markdown with guaranteed images
formatted_parts = []
# Main summary (but remove already-formatted images/audio to avoid duplication)
clean_summary = raw_response
for url in image_urls:
# Remove existing markdown images
clean_summary = re.sub(rf'!\[([^\]]*)\]\({re.escape(url)}\)', '', clean_summary)
# Remove plain URLs
clean_summary = clean_summary.replace(url, '')
for url in audio_urls:
# Remove audio URLs from summary
clean_summary = clean_summary.replace(url, '')
formatted_parts.append(clean_summary.strip())
# Add images with markdown syntax
if structured.image_urls:
formatted_parts.append("\n### Images\n")
for idx, url in enumerate(structured.image_urls, 1):
# Use species name if available, otherwise generic
alt_text = structured.species_name or f"Bird {idx}"
img_markdown = f""
print(f"[STRUCTURED OUTPUT] Generated image markdown: {img_markdown}")
formatted_parts.append(img_markdown)
# Add audio links if present
if structured.audio_urls:
formatted_parts.append("\n### Audio Recordings\n")
for idx, url in enumerate(structured.audio_urls, 1):
# Strip /download from xeno-canto URLs for browser-friendly links
display_url = url.replace("/download", "") if "xeno-canto.org" in url else url
formatted_parts.append(f"🔊 [Listen to recording {idx}]({display_url})")
result = "\n\n".join(formatted_parts)
print(f"[STRUCTURED OUTPUT] ✅ Successfully formatted response")
print(f"[STRUCTURED OUTPUT] Final markdown length: {len(result)} characters")
print(f"[STRUCTURED OUTPUT] Final markdown (last 500 chars): {result[-500:]}")
return result
except Exception as e:
# Fallback: return original response if parsing fails
print(f"[STRUCTURED OUTPUT] ❌ Parsing failed: {e}")
return raw_response
|