import gradio as gr
import torch
import pandas as pd
from sentence_transformers import SentenceTransformer, util
from deep_translator import GoogleTranslator
from datasets import load_dataset

print("Downloading dataset and initializing model...")
dataset = load_dataset('JotDe/birds')

print("Loading embeddings...")
df = pd.read_parquet("bird_embeddings.parquet")
subset_indices = df['dataset_index'].tolist()
subset_labels = df['label'].tolist()

feature_cols = [c for c in df.columns if c not in ['dataset_index', 'label']]
embeddings = df[feature_cols].values
dataset_embeddings = torch.tensor(embeddings, device='cpu')

model = SentenceTransformer('sentence-transformers/clip-ViT-B-32', device='cpu')
translator = GoogleTranslator(source='auto', target='en')

def get_recommendations(text_input, top_k=3):
    if not text_input or text_input.strip() == "":
        return []
        
    query_embedding = model.encode(text_input, convert_to_tensor=True, device='cpu')
    similarities = util.cos_sim(query_embedding, dataset_embeddings)[0]
    top_indices = similarities.argsort(descending=True)[:top_k]
    
    recommendations = []
    for idx in top_indices:
        i = idx.item() 
        # Crucial fix for numpy types!
        original_idx = int(subset_indices[i])
        
        img = dataset['train'][original_idx]['image']
        label_id = int(subset_labels[i])
        species_name = dataset['train'].features['label'].int2str(label_id)
        
        recommendations.append({
            "image": img,
            "label": species_name
        })
    return recommendations

def gradio_interface(text_input):
    if not text_input or text_input.strip() == "":
        return None, "### Please enter a description.", None, "", None, ""

    if any(char.isdigit() for char in text_input):
        return None, "### Please put a bird type (example: blue bird)", None, "", None, ""

    try:
        english_query = translator.translate(text_input)
    except:
        english_query = text_input

    results = get_recommendations(text_input=english_query, top_k=3)

    if not results or len(results) < 3:
        return None, "### No matches found.", None, "", None, ""

    name1 = f"### {results[0]['label']}"
    name2 = f"### {results[1]['label']}"
    name3 = f"### {results[2]['label']}"

    return results[0]['image'], name1, results[1]['image'], name2, results[2]['image'], name3

with gr.Blocks(title="🐦 Smart Bird Tracker") as demo:
    gr.Markdown("# 🐦 Smart Bird Tracker")
    gr.Markdown("Describe the bird you are looking for in **English**, **Spanish**, or **Hebrew**, and the AI will find the closest matches!")
    
    
     
    
    with gr.Row():
        with gr.Column(scale=1):
            text_in = gr.Textbox(label="Describe the bird", placeholder="Type your description here...")
            submit_btn = gr.Button("Find Birds", variant="primary")

        with gr.Column(scale=2):
            with gr.Row():
                with gr.Column():
                    out_img1 = gr.Image(label="Top Match 1")
                    out_name1 = gr.Markdown()
                with gr.Column():
                    out_img2 = gr.Image(label="Top Match 2")
                    out_name2 = gr.Markdown()
                with gr.Column():
                    out_img3 = gr.Image(label="Top Match 3")
                    out_name3 = gr.Markdown()

    submit_btn.click(
        fn=gradio_interface,
        inputs=[text_in],
        outputs=[out_img1, out_name1, out_img2, out_name2, out_img3, out_name3]
    )

demo.launch()