File size: 8,865 Bytes
ff0e97f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db789ae
acf4dc8
db789ae
ff0e97f
 
 
acf4dc8
db789ae
 
acf4dc8
 
 
 
 
ff0e97f
 
db789ae
 
acf4dc8
ff0e97f
17f468c
 
acf4dc8
 
 
db789ae
ff0e97f
 
acf4dc8
 
 
 
 
17f468c
 
 
db789ae
 
0588003
 
 
 
 
 
 
db789ae
0588003
 
 
 
 
db789ae
ff0e97f
 
 
0588003
 
 
 
ff0e97f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17f468c
 
 
ff0e97f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0588003
 
 
ff0e97f
 
 
 
 
 
 
 
 
 
 
0588003
 
ff0e97f
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
"""
Structured output parsing using LlamaIndex Pydantic Programs.
Ensures consistent image formatting in agent responses.

HACKATHON OPTIMIZED: Uses regex extraction instead of LLM calls for speed.
"""
from typing import List, Optional
import re
from pydantic import BaseModel, Field


class BirdIdentificationResponse(BaseModel):
    """Structured response for bird identification using LlamaIndex Pydantic."""

    summary: str = Field(
        description="Main response text with bird identification, facts, or information"
    )
    species_name: Optional[str] = Field(
        default=None,
        description="Common name of the bird species (e.g., 'Northern Cardinal')"
    )
    image_urls: List[str] = Field(
        default_factory=list,
        description="List of image URLs to display for this bird"
    )
    audio_urls: List[str] = Field(
        default_factory=list,
        description="List of audio URLs (bird calls/songs)"
    )
    confidence_score: Optional[float] = Field(
        default=None,
        description="Confidence score from classifier (0.0-1.0)"
    )


def extract_urls_from_text(text: str) -> tuple[List[str], List[str]]:
    """
    Extract image and audio URLs from text using regex.

    Updated to handle URLs within markdown, JSON, and plain text.
    Supports both extension-based URLs (.jpg, .png) and domain-based (Unsplash).

    Returns:
        tuple: (image_urls, audio_urls)
    """
    # Pattern 1: Image URLs with file extensions
    # Matches URLs ending in image extensions, allowing most characters before the extension
    # Stops at whitespace or common delimiters like ), ], }
    image_pattern_ext = r'https?://[^\s)}\]]+?\.(?:jpg|jpeg|png|gif|webp|svg)(?:\?[^\s)}\]]*)?'

    # Pattern 2: Unsplash image URLs (no file extension needed)
    # Matches: https://images.unsplash.com/photo-XXXXXXX or similar
    image_pattern_unsplash = r'https?://images\.unsplash\.com/[^\s)}\]]*'

    # Pattern for audio URLs - handles both direct audio files AND xeno-canto links
    # Updated to be more permissive like image pattern
    audio_pattern_files = r'https?://[^\s)}\]]+?\.(?:mp3|wav|ogg|m4a)(?:\?[^\s)}\]]*)?'
    audio_pattern_xenocanto = r'https?://xeno-canto\.org/\d+(?:/download)?'

    print(f"[EXTRACT_URLS] Searching text of length {len(text)}")

    # Extract all URLs - combine both image patterns
    raw_image_urls_ext = re.findall(image_pattern_ext, text, re.IGNORECASE)
    raw_image_urls_unsplash = re.findall(image_pattern_unsplash, text, re.IGNORECASE)
    raw_audio_urls_files = re.findall(audio_pattern_files, text, re.IGNORECASE)
    audio_urls_xenocanto = list(set(re.findall(audio_pattern_xenocanto, text, re.IGNORECASE)))

    # Combine image URLs from both patterns
    raw_image_urls = raw_image_urls_ext + raw_image_urls_unsplash

    print(f"[EXTRACT_URLS] Found {len(raw_image_urls_ext)} extension-based image URLs")
    print(f"[EXTRACT_URLS] Found {len(raw_image_urls_unsplash)} Unsplash image URLs")
    print(f"[EXTRACT_URLS] Found {len(raw_audio_urls_files)} audio file URLs")
    print(f"[EXTRACT_URLS] Found {len(audio_urls_xenocanto)} xeno-canto URLs")

    # Clean URLs (remove trailing quotes, commas, etc.)
    def clean_url(url: str) -> str:
        cleaned = url.rstrip('",;)')
        # Validate it's still a proper URL
        if cleaned.startswith('http://') or cleaned.startswith('https://'):
            return cleaned
        else:
            print(f"[EXTRACT_URLS] ⚠️ Rejected malformed URL after cleaning: {cleaned}")
            return None

    image_urls = [u for u in (clean_url(url) for url in raw_image_urls) if u is not None]
    image_urls = list(set(image_urls))  # Deduplicate

    audio_urls_files = [u for u in (clean_url(url) for url in raw_audio_urls_files) if u is not None]
    audio_urls_files = list(set(audio_urls_files))  # Deduplicate

    # Combine both types of audio URLs
    audio_urls = audio_urls_files + audio_urls_xenocanto

    # Log the actual URLs extracted
    print(f"[EXTRACT_URLS] ✅ Cleaned image URLs ({len(image_urls)}): {image_urls}")
    print(f"[EXTRACT_URLS] ✅ Cleaned audio URLs ({len(audio_urls)}): {audio_urls}")

    return image_urls, audio_urls


def extract_species_name(text: str) -> Optional[str]:
    """
    Try to extract species name from common patterns in response.
    """
    # Pattern: "identified as SPECIES NAME" or "species: SPECIES NAME"
    patterns = [
        r'identified as[:\s]+([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,3})',
        r'species[:\s]+([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,3})',
        r'This is (?:a |an )?([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,3})',
    ]

    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            return match.group(1)

    return None


async def parse_agent_response(
    raw_response: str,
    provider: str,
    api_key: str,
    model: str
) -> str:
    """
    Parse agent response into structured format and reformat with guaranteed markdown.

    OPTIMIZED FOR HACKATHON: Uses regex extraction instead of LLM call.
    Still uses LlamaIndex Pydantic models for structured data.

    Args:
        raw_response: The agent's raw text response
        provider: LLM provider ("openai", "anthropic", "huggingface")
        api_key: API key (unused in optimized version)
        model: Model name (unused in optimized version)

    Returns:
        Formatted markdown response with guaranteed image syntax
    """
    try:
        print("[STRUCTURED OUTPUT] Starting parsing...")
        print(f"[STRUCTURED OUTPUT] Raw response length: {len(raw_response)} characters")
        print(f"[STRUCTURED OUTPUT] First 500 chars: {raw_response[:500]}")
        print(f"[STRUCTURED OUTPUT] Last 500 chars: {raw_response[-500:]}")

        # Extract URLs using regex (fast, no API call)
        image_urls, audio_urls = extract_urls_from_text(raw_response)

        print(f"[STRUCTURED OUTPUT] Found {len(image_urls)} images, {len(audio_urls)} audio files")

        # Extract species name if possible
        species_name = extract_species_name(raw_response)

        # Create structured response using LlamaIndex Pydantic model
        structured = BirdIdentificationResponse(
            summary=raw_response,  # Keep full response as summary
            species_name=species_name,
            image_urls=image_urls,
            audio_urls=audio_urls,
            confidence_score=None  # Could extract with regex if needed
        )

        # Check if we found any media to format
        if not structured.image_urls and not structured.audio_urls:
            print("[STRUCTURED OUTPUT] No images or audio found, returning original")
            return raw_response

        # Reformat into markdown with guaranteed images
        formatted_parts = []

        # Main summary (but remove already-formatted images/audio to avoid duplication)
        clean_summary = raw_response
        for url in image_urls:
            # Remove existing markdown images
            clean_summary = re.sub(rf'!\[([^\]]*)\]\({re.escape(url)}\)', '', clean_summary)
            # Remove plain URLs
            clean_summary = clean_summary.replace(url, '')

        for url in audio_urls:
            # Remove audio URLs from summary
            clean_summary = clean_summary.replace(url, '')

        formatted_parts.append(clean_summary.strip())

        # Add images with markdown syntax
        if structured.image_urls:
            formatted_parts.append("\n### Images\n")
            for idx, url in enumerate(structured.image_urls, 1):
                # Use species name if available, otherwise generic
                alt_text = structured.species_name or f"Bird {idx}"
                img_markdown = f"![{alt_text}]({url})"
                print(f"[STRUCTURED OUTPUT] Generated image markdown: {img_markdown}")
                formatted_parts.append(img_markdown)

        # Add audio links if present
        if structured.audio_urls:
            formatted_parts.append("\n### Audio Recordings\n")
            for idx, url in enumerate(structured.audio_urls, 1):
                # Strip /download from xeno-canto URLs for browser-friendly links
                display_url = url.replace("/download", "") if "xeno-canto.org" in url else url
                formatted_parts.append(f"🔊 [Listen to recording {idx}]({display_url})")

        result = "\n\n".join(formatted_parts)
        print(f"[STRUCTURED OUTPUT] ✅ Successfully formatted response")
        print(f"[STRUCTURED OUTPUT] Final markdown length: {len(result)} characters")
        print(f"[STRUCTURED OUTPUT] Final markdown (last 500 chars): {result[-500:]}")
        return result

    except Exception as e:
        # Fallback: return original response if parsing fails
        print(f"[STRUCTURED OUTPUT] ❌ Parsing failed: {e}")
        return raw_response