import gradio as gr
from transformers import pipeline
import re
import torch
import timm
from PIL import Image
import numpy as np

try:
    eva02_model = timm.create_model('hf_hub:SmilingWolf/wd-eva02-large-tagger-v3', pretrained=True)
    eva02_model.eval()
    
    import requests
    tags_response = requests.get('https://huggingface.co/SmilingWolf/wd-eva02-large-tagger-v3/resolve/main/selected_tags.csv')
    tags_lines = tags_response.text.strip().split('\n')
    eva02_tags = [line.split(',')[1] for line in tags_lines[1:]]  # Skip header
    
    print(f"Modelo EVA02 carregado com {len(eva02_tags)} tags")
except Exception as e:
    print(f"Erro ao carregar EVA02: {e}")
    eva02_model = None
    eva02_tags = []

content_model = pipeline("image-classification", model="facebook/convnext-base-224")
nsfw_model = pipeline("image-classification", model="Falconsai/nsfw_image_detection")

def analyze_with_eva02(image):
    if eva02_model is None:
        return [], []
    
    image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float() / 255.0
    image_tensor = torch.nn.functional.interpolate(
        image_tensor.unsqueeze(0), 
        size=(448, 448), 
        mode='bilinear', 
        align_corners=False
    )
    
    with torch.no_grad():
        features = eva02_model(image_tensor)
        probs = torch.sigmoid(features[0])
    
    detected_tags = []
    tag_scores = []
    
    for i, (tag, prob) in enumerate(zip(eva02_tags, probs)):
        if prob > 0.5:
            detected_tags.append(tag)
            tag_scores.append(float(prob))
    
    return detected_tags, tag_scores

def analyze_image(image):
    if eva02_model is not None:
        eva02_tags_detected, eva02_scores = analyze_with_eva02(image)
        combined_labels = " ".join(eva02_tags_detected).lower()
        print(f"EVA02 detectou: {eva02_tags_detected}")
    else:
        content_preds = content_model(image)
        top_content = max(content_preds, key=lambda x: x["score"])
        
        nsfw_preds = nsfw_model(image)
        top_nsfw = max(nsfw_preds, key=lambda x: x["score"])
        
        all_labels = []
        for pred in content_preds:
            all_labels.append(pred["label"].lower())
        
        for pred in nsfw_preds:
            all_labels.append(pred["label"].lower())
        
        combined_labels = " ".join(all_labels)
    
    human_keywords = [
        "human", "person", "people", "man", "woman", "child", "baby", "boy", "girl",
        "face", "portrait", "selfie", "crowd", "family", "couple", "teenager",
        "1boy", "1girl", "2boys", "2girls", "multiple boys", "multiple girls",
        "male", "female", "adult", "teen", "kid", "toddler", "infant"
    ]
    is_human = any(keyword in combined_labels for keyword in human_keywords)
    
    dog_keywords = [
        "dog", "puppy", "retriever", "labrador", "golden", "beagle", "bulldog",
        "poodle", "german shepherd", "chihuahua", "terrier", "hound", "mastiff",
        "canine", "pet", "animal", "malamute", "malemute", "alaskan",
        "1dog", "2dogs", "multiple dogs", "doggy", "doggie", "pup"
    ]
    is_dog = any(keyword in combined_labels for keyword in dog_keywords)
    
    violence_keywords = [
        "blood", "wound", "injury", "hurt", "pain", "fight", "violence", "weapon",
        "knife", "gun", "attack", "aggression", "conflict", "battle", "war",
        "bloody", "injured", "wounded", "bleeding", "scar", "bruise", "cut"
    ]
    
    suffering_keywords = [
        "sad", "crying", "tears", "depressed", "miserable", "suffering", "pain",
        "distress", "anguish", "grief", "mourning", "funeral", "death", "dead",
        "dying", "illness", "sick", "injured", "abandoned", "neglected",
        "crying", "tears", "sad", "depressed", "miserable", "grief", "mourning"
    ]
    
    abuse_keywords = [
        "abuse", "mistreatment", "cruelty", "torture", "beaten", "chained",
        "caged", "starving", "malnourished", "neglected", "abandoned",
        "chained", "caged", "starving", "malnourished", "abused", "mistreated"
    ]
    
    death_keywords = [
        "death", "dead", "dying", "corpse", "carcass", "deceased", "lifeless",
        "motionless", "still", "rigid", "pale", "cold", "skull", "bones",
        "grave", "tombstone", "funeral", "coffin", "burial"
    ]
    
    suspicious_keywords = [
        "lying", "laying", "ground", "floor", "side", "horizontal", "flat",
        "unconscious", "sleeping", "resting", "still", "motionless", "quiet",
        "on ground", "on floor", "lying down", "sleeping", "unconscious"
    ]
    
    if eva02_model is not None:
        adult_keywords = ["nsfw", "explicit", "nude", "naked", "sexual", "adult", "mature"]
        adult_content = any(keyword in combined_labels for keyword in adult_keywords)
    else:
        adult_content = top_nsfw["label"].lower() == "nsfw"
    
    violence = any(keyword in combined_labels for keyword in violence_keywords)
    suffering = any(keyword in combined_labels for keyword in suffering_keywords)
    abuse = any(keyword in combined_labels for keyword in abuse_keywords)
    death = any(keyword in combined_labels for keyword in death_keywords)
    
    suspicious_animal = is_dog and any(keyword in combined_labels for keyword in suspicious_keywords)
    
    sensitive = adult_content or violence or suffering or abuse or death or suspicious_animal
    
    if is_human:
        content_type = "human"
    elif is_dog:
        content_type = "dog"
    else:
        content_type = "unknown"
    
    description_parts = []
    
    if content_type == "human":
        description_parts.append("Imagem contendo pessoa(s)")
    elif content_type == "dog":
        description_parts.append("Imagem contendo cão/cachorro")
    else:
        description_parts.append(f"Imagem contendo {label}")
    
    if adult_content:
        description_parts.append("com conteúdo adulto")
    if violence:
        description_parts.append("com violência")
    if suffering:
        description_parts.append("mostrando sofrimento")
    if abuse:
        description_parts.append("com maus tratos")
    if death:
        description_parts.append("mostrando morte")
    if suspicious_animal:
        description_parts.append("com características suspeitas")
    
    if sensitive:
        description_parts.append("- CONTEÚDO SENSÍVEL")
    else:
        description_parts.append("- conteúdo seguro")
    
    description = " ".join(description_parts) + "."
    
    debug_info = f" [Debug: Labels detectadas: {combined_labels}]"

    return {
        "content": content_type,
        "adult_content": adult_content,
        "violence": violence,
        "sensitive_content": sensitive,
        "content_description": description + debug_info
    }

demo = gr.Interface(
    fn=analyze_image,
    inputs=gr.Image(type="pil"),
    outputs="json",
    title="Dog / Human Safety Detector"
)

if __name__ == "__main__":
    demo.launch()