import json import gradio as gr from sentence_transformers import SentenceTransformer, util import torch import requests # Load sentence transformer model model = SentenceTransformer("all-MiniLM-L6-v2") # Load Louvre data from Hugging Face url = "https://huggingface.co/datasets/Agathe1489/Louvre-ARK/resolve/main/louvre_data.jsonl" response = requests.get(url) artworks = [json.loads(line) for line in response.text.strip().split("\n")] # Precompute embeddings for title + description texts = [ f"{art.get('title', '')}. {art.get('description', '')}" for art in artworks ] embeddings = model.encode(texts, convert_to_tensor=True) # Search function def search_artwork(query): query_embedding = model.encode(query, convert_to_tensor=True) scores = util.cos_sim(query_embedding, embeddings)[0] best_idx = torch.argmax(scores).item() result = artworks[best_idx] image_url = result.get("image", [{}])[0].get("urlImage", "") title = result.get("title", "Untitled") description = result.get("description", "No description available.") creator = ( result.get("creator", [{}])[0].get("label", "Unknown artist") if result.get("creator") else "Unknown artist" ) page_url = result.get("url", "") display_text = f"**{title}** by *{creator}*\n\n{description}\n\n[View on Louvre]({page_url})" return image_url, display_text # Gradio UI iface = gr.Interface( fn=search_artwork, inputs=gr.Textbox(placeholder="Describe a scene, object, theme, or keyword..."), outputs=[ gr.Image(label="Artwork Image"), gr.Markdown(label="Details"), ], title="🎨 Louvre Artwork Finder", description="Enter a theme or description and discover related artwork from the Louvre Museum collection." ) # Launch if __name__ == "__main__": iface.launch()