import os import gradio as gr from PIL import Image import torch import numpy as np from facenet_pytorch import MTCNN, InceptionResnetV1 from torch.nn.functional import cosine_similarity import requests from bs4 import BeautifulSoup from io import BytesIO # Initialize models mtcnn = MTCNN(image_size=160, margin=20, keep_all=False) resnet = InceptionResnetV1(pretrained='vggface2').eval() # Directory for temporary images DB_DIR = "scraped_images" os.makedirs(DB_DIR, exist_ok=True) # Scrape image URLs from Bing def fetch_image_urls(query, max_images=3): headers = {"User-Agent": "Mozilla/5.0"} search_url = f"https://www.bing.com/images/search?q={query.replace(' ', '+')}" response = requests.get(search_url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') image_elements = soup.find_all('a', class_='iusc') urls = [] for elem in image_elements[:max_images]: m = elem.get('m') if m: try: m_json = eval(m) urls.append(m_json['murl']) except: continue return urls # Download and save images def download_images(image_urls): for i, url in enumerate(image_urls): try: response = requests.get(url) img = Image.open(BytesIO(response.content)).convert('RGB') img.save(os.path.join(DB_DIR, f"scraped_{i}.jpg")) except: continue # Load embeddings for downloaded images def load_scraped_embeddings(): embeddings = {} for file in os.listdir(DB_DIR): if file.lower().endswith(('.jpg', '.png')): img_path = os.path.join(DB_DIR, file) img = Image.open(img_path).convert("RGB") face = mtcnn(img) if face is not None: face = face.unsqueeze(0) emb = resnet(face) embeddings[file] = emb return embeddings # Compare uploaded face with scraped images def identify_person(uploaded_img, search_query): if uploaded_img is None: return "Please upload an image." # Step 1: Scrape and download images for f in os.listdir(DB_DIR): os.remove(os.path.join(DB_DIR, f)) image_urls = fetch_image_urls(search_query, max_images=5) download_images(image_urls) # Step 2: Load scraped embeddings db_embeddings = load_scraped_embeddings() # Step 3: Get uploaded face embedding uploaded_face = mtcnn(uploaded_img.convert("RGB")) if uploaded_face is None: return "No face detected in the uploaded image." uploaded_embedding = resnet(uploaded_face.unsqueeze(0)) # Step 4: Compare and find best match best_match = None best_score = -1 for name, emb in db_embeddings.items(): score = cosine_similarity(uploaded_embedding, emb).item() if score > best_score: best_score = score best_match = name if best_score > 0.7: return f"Best Match: {best_match}\nSimilarity: {best_score:.2f}" else: return f"No confident match found.\nBest candidate: {best_match} (Score: {best_score:.2f})" # Gradio Interface iface = gr.Interface( fn=identify_person, inputs=[gr.Image(type="pil"), gr.Text(label="Search Query (e.g., Elon Musk)")], outputs="text", title="Reverse Face Search via Web Scraping", description="Upload a face image and enter a name or query. The app scrapes the web for images and tries to identify the person." ) if __name__ == "__main__": iface.launch()