import os, sys, shutil, re
import gradio as gr
import pandas as pd
from huggingface_hub import hf_hub_download

MODEL_REPO = "sukriramli/tiny-bird-diffusion"
COLAB_WORKSPACE_URL = "https://colab.research.google.com/drive/1EL5VS_vAKvojPf5UPuQVFbK5gkgP51hB?usp=sharing"
os.makedirs("tiny-bird-diffusion", exist_ok=True)

# Bootstrapping: Download your modular codebase and configs from the model hub
manifest = [
    "cvt.py", "mel_spectrogram.py", "protoclr.pth", "config.json",
    "trained_cluster_brain.joblib", "acoustic_atlas_metadata.csv",
    "pipeline.py", "api.py"
]

print("📦 Fetching backend architecture files from model catalog...")
for f in manifest:
    target = os.path.join("tiny-bird-diffusion", f)
    if not os.path.exists(target):
        loc = hf_hub_download(repo_id=MODEL_REPO, filename=f)
        shutil.copy(loc, target)

# Replicate config.json to root where cvt.py expects to read it
if os.path.exists("tiny-bird-diffusion/config.json") and not os.path.exists("config.json"):
    shutil.copy("tiny-bird-diffusion/config.json", "config.json")

# Mount the localized module to system execution path
sys.path.append(os.path.abspath("tiny-bird-diffusion"))
from api import predict_bird

# Parse out atlas records for the Jukebox engine
df_atlas = pd.read_csv("tiny-bird-diffusion/acoustic_atlas_metadata.csv")
unique_species = sorted(df_atlas['Target_Bird'].dropna().unique())

# --- Core Event Logics ---
def process_audio_stream(audio_path):
    if not audio_path: 
        return f"### ⚠️ Input Gateway Empty\n\nPlease record or upload an audio clip to start inference pipelines.\n\n👉 **[Open Source Google Colab Sandbox]({COLAB_WORKSPACE_URL})**"
        
    with open(audio_path, "rb") as f: 
        audio_bytes = f.read()
        
    prediction = predict_bird(audio_bytes)
    
    if prediction["status"] == "SUCCESS":
        return f"### 🟢 Species Identified\n\n* **Taxon Target:** `{prediction['species']}`\n* **Confidence Match:** `{prediction['confidence']}%`"
        
    elif prediction["status"] == "NO_BIRD_DETECTED":
        return (
            f"### 🟡 Signal Boundary Warning\n\nAmbient background noise or signal structure falls outside verified biological ecosystem constraints (Distance: `{prediction['distance']:.3f}`).\n\n"
            f"💡 **[Run Advanced Debug Visualizations Natively in Colab]({COLAB_WORKSPACE_URL})**"
        )
        
    else:
        return (
            f"### ❌ SYSTEM RUNTIME ERROR\n"
            f"`{prediction['message']}`\n\n"
            f"---"
            f"\n\n"
            f"🛠️ **The server cloud instance is hitting a hurdle.**\n"
            f"You can completely bypass this server-side environment block by running your personal dedicated GPU/CPU session directly in the cloud workspace:\n\n"
            f"🚀 **[LAUNCH DEDICATED GOOGLE COLAB WORKSPACE]({COLAB_WORKSPACE_URL})**"
        )

def play_jukebox_track(selected_bird):
    species_rows = df_atlas[df_atlas['Target_Bird'] == selected_bird]
    if species_rows.empty: return None, "⚠️ Species catalog exception."
    
    sample_file_id = species_rows.iloc[0]['File_ID']
    id_match = re.search(r'XC(\d+)', str(sample_file_id))
    
    if id_match:
        xeno_canto_id = id_match.group(1)
        direct_url = f"https://xeno-canto.org/{xeno_canto_id}/download"
        status_msg = f"🔊 Streaming wild call track ID: XC{xeno_canto_id}"
        return direct_url, status_msg
    return None, f"⚠️ Fallback sample row encountered. Launch the full environment sheet to inspect: {COLAB_WORKSPACE_URL}"

# --- SEO META STRINGS ASSIGNMENT ---
CUSTOM_SEO_HEADERS = """
<meta name="google-site-verification" content="mowK6lo4yI0bI7XsqLVtnkWI30Jm0Iq5n0EU1weMmgc" />
"""

# --- Render Production Tabbed UI Layout ---
with gr.Blocks(theme=gr.themes.Soft(), head=CUSTOM_SEO_HEADERS) as demo:
    gr.Markdown("# 🦅 Real-Time Edge Bioacoustic Suite")
    
    with gr.Tab("🎙️ Species Classifier"):
        gr.Markdown("Stream raw environmental waveforms through your microphone or upload clips to track regional biodiversity.")
        with gr.Row():
            audio_input = gr.Audio(type="filepath", label="Acoustic Input Gateway")
        
        gr.Markdown("### 📊 Analysis Results Matrix")
        text_output = gr.Markdown("*Awaiting audio track deployment passes...*")
        
        submit_btn = gr.Button("⚡ Run Edge Inference Passes", variant="primary")
        submit_btn.click(fn=process_audio_stream, inputs=audio_input, outputs=text_output)
        
    with gr.Tab("🎵 Avian Jukebox"):
        gr.Markdown("Select a species from your 168-class repository to bypass cloud models and stream call maps natively.")
        with gr.Row():
            species_dropdown = gr.Dropdown(choices=unique_species, value=unique_species[0], label="Select Target Species")
            jukebox_status = gr.Textbox(label="Media Link Lookup Status", interactive=False)
        jukebox_audio = gr.Audio(label="Audio Stream Output", interactive=False)
        jukebox_btn = gr.Button("🎵 Stream Species Audio", variant="primary")
        jukebox_btn.click(fn=play_jukebox_track, inputs=species_dropdown, outputs=[jukebox_audio, jukebox_status])

demo.launch()