import gradio as gr from transformers import pipeline import re import torch import timm from PIL import Image import numpy as np try: eva02_model = timm.create_model('hf_hub:SmilingWolf/wd-eva02-large-tagger-v3', pretrained=True) eva02_model.eval() import requests tags_response = requests.get('https://huggingface.co/SmilingWolf/wd-eva02-large-tagger-v3/resolve/main/selected_tags.csv') tags_lines = tags_response.text.strip().split('\n') eva02_tags = [line.split(',')[1] for line in tags_lines[1:]] # Skip header print(f"Modelo EVA02 carregado com {len(eva02_tags)} tags") except Exception as e: print(f"Erro ao carregar EVA02: {e}") eva02_model = None eva02_tags = [] content_model = pipeline("image-classification", model="facebook/convnext-base-224") nsfw_model = pipeline("image-classification", model="Falconsai/nsfw_image_detection") def analyze_with_eva02(image): if eva02_model is None: return [], [] image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float() / 255.0 image_tensor = torch.nn.functional.interpolate( image_tensor.unsqueeze(0), size=(448, 448), mode='bilinear', align_corners=False ) with torch.no_grad(): features = eva02_model(image_tensor) probs = torch.sigmoid(features[0]) detected_tags = [] tag_scores = [] for i, (tag, prob) in enumerate(zip(eva02_tags, probs)): if prob > 0.5: detected_tags.append(tag) tag_scores.append(float(prob)) return detected_tags, tag_scores def analyze_image(image): if eva02_model is not None: eva02_tags_detected, eva02_scores = analyze_with_eva02(image) combined_labels = " ".join(eva02_tags_detected).lower() print(f"EVA02 detectou: {eva02_tags_detected}") else: content_preds = content_model(image) top_content = max(content_preds, key=lambda x: x["score"]) nsfw_preds = nsfw_model(image) top_nsfw = max(nsfw_preds, key=lambda x: x["score"]) all_labels = [] for pred in content_preds: all_labels.append(pred["label"].lower()) for pred in nsfw_preds: all_labels.append(pred["label"].lower()) combined_labels = " ".join(all_labels) human_keywords = [ "human", "person", "people", "man", "woman", "child", "baby", "boy", "girl", "face", "portrait", "selfie", "crowd", "family", "couple", "teenager", "1boy", "1girl", "2boys", "2girls", "multiple boys", "multiple girls", "male", "female", "adult", "teen", "kid", "toddler", "infant" ] is_human = any(keyword in combined_labels for keyword in human_keywords) dog_keywords = [ "dog", "puppy", "retriever", "labrador", "golden", "beagle", "bulldog", "poodle", "german shepherd", "chihuahua", "terrier", "hound", "mastiff", "canine", "pet", "animal", "malamute", "malemute", "alaskan", "1dog", "2dogs", "multiple dogs", "doggy", "doggie", "pup" ] is_dog = any(keyword in combined_labels for keyword in dog_keywords) violence_keywords = [ "blood", "wound", "injury", "hurt", "pain", "fight", "violence", "weapon", "knife", "gun", "attack", "aggression", "conflict", "battle", "war", "bloody", "injured", "wounded", "bleeding", "scar", "bruise", "cut" ] suffering_keywords = [ "sad", "crying", "tears", "depressed", "miserable", "suffering", "pain", "distress", "anguish", "grief", "mourning", "funeral", "death", "dead", "dying", "illness", "sick", "injured", "abandoned", "neglected", "crying", "tears", "sad", "depressed", "miserable", "grief", "mourning" ] abuse_keywords = [ "abuse", "mistreatment", "cruelty", "torture", "beaten", "chained", "caged", "starving", "malnourished", "neglected", "abandoned", "chained", "caged", "starving", "malnourished", "abused", "mistreated" ] death_keywords = [ "death", "dead", "dying", "corpse", "carcass", "deceased", "lifeless", "motionless", "still", "rigid", "pale", "cold", "skull", "bones", "grave", "tombstone", "funeral", "coffin", "burial" ] suspicious_keywords = [ "lying", "laying", "ground", "floor", "side", "horizontal", "flat", "unconscious", "sleeping", "resting", "still", "motionless", "quiet", "on ground", "on floor", "lying down", "sleeping", "unconscious" ] if eva02_model is not None: adult_keywords = ["nsfw", "explicit", "nude", "naked", "sexual", "adult", "mature"] adult_content = any(keyword in combined_labels for keyword in adult_keywords) else: adult_content = top_nsfw["label"].lower() == "nsfw" violence = any(keyword in combined_labels for keyword in violence_keywords) suffering = any(keyword in combined_labels for keyword in suffering_keywords) abuse = any(keyword in combined_labels for keyword in abuse_keywords) death = any(keyword in combined_labels for keyword in death_keywords) suspicious_animal = is_dog and any(keyword in combined_labels for keyword in suspicious_keywords) sensitive = adult_content or violence or suffering or abuse or death or suspicious_animal if is_human: content_type = "human" elif is_dog: content_type = "dog" else: content_type = "unknown" description_parts = [] if content_type == "human": description_parts.append("Imagem contendo pessoa(s)") elif content_type == "dog": description_parts.append("Imagem contendo cão/cachorro") else: description_parts.append(f"Imagem contendo {label}") if adult_content: description_parts.append("com conteúdo adulto") if violence: description_parts.append("com violência") if suffering: description_parts.append("mostrando sofrimento") if abuse: description_parts.append("com maus tratos") if death: description_parts.append("mostrando morte") if suspicious_animal: description_parts.append("com características suspeitas") if sensitive: description_parts.append("- CONTEÚDO SENSÍVEL") else: description_parts.append("- conteúdo seguro") description = " ".join(description_parts) + "." debug_info = f" [Debug: Labels detectadas: {combined_labels}]" return { "content": content_type, "adult_content": adult_content, "violence": violence, "sensitive_content": sensitive, "content_description": description + debug_info } demo = gr.Interface( fn=analyze_image, inputs=gr.Image(type="pil"), outputs="json", title="Dog / Human Safety Detector" ) if __name__ == "__main__": demo.launch()