Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from safetensors.torch import load_file | |
| from pathlib import Path | |
| import json | |
| from functools import lru_cache | |
| # Import utilities | |
| from models.model_loader import load_embed_model, load_rerank_model | |
| from utils.search import search_embeddings, rerank_results | |
| from utils.visualization import plot_results | |
| # ============================================================================ | |
| # Data Loading Functions | |
| # ============================================================================ | |
| def load_metadata(metadata_path: str = "data/recipes/metadata.json") -> dict: | |
| """Load metadata.json (cached).""" | |
| with open(metadata_path, "r", encoding="utf-8") as f: | |
| return json.load(f) | |
| def get_recipes_and_paths(metadata_path: str = "data/recipes/metadata.json"): | |
| """Load recipes, image paths, and full data.""" | |
| print("📋 Loading metadata...") | |
| metadata = load_metadata(metadata_path) | |
| recipes = metadata["recipes"] | |
| all_recipes_markdown = [r["markdown"] for r in recipes] | |
| all_image_paths = [r["image_path"] for r in recipes] | |
| print(f"✅ Loaded {len(recipes)} recipes") | |
| return all_recipes_markdown, all_image_paths, recipes | |
| # ============================================================================ | |
| # Load Models & Data (once at startup) | |
| # ============================================================================ | |
| print("🔄 Loading models...") | |
| embed_model, embed_device = load_embed_model() | |
| rerank_model, rerank_processor, rerank_device = load_rerank_model() | |
| print("📦 Loading embeddings...") | |
| # Load Image embeddings | |
| print(" - Loading image embeddings...") | |
| image_embeddings = load_file("data/embeddings/image_embedding_500.safetensors")["image_embeddings_500"] | |
| print(f" ✅ Image embeddings shape: {image_embeddings.shape}") | |
| # Load Text embeddings | |
| print(" - Loading text embeddings...") | |
| text_embeddings = load_file("data/embeddings/text_embeddings_500.safetensors")["text_embeddings_500"] | |
| print(f" ✅ Text embeddings shape: {text_embeddings.shape}") | |
| # Load Image+Text embeddings | |
| print(" - Loading image+text embeddings...") | |
| image_text_embeddings = load_file("data/embeddings/image_text_embeddings_500.safetensors")["image_text_embeddings"] | |
| print(f" ✅ Image+Text embeddings shape: {image_text_embeddings.shape}") | |
| print("📋 Loading recipe data...") | |
| all_recipes, all_image_paths, full_recipes = get_recipes_and_paths() | |
| print("\n✅ System ready!") | |
| print(f" 📚 Recipes loaded: {len(all_recipes)}") | |
| print(f" 🖼️ Images loaded: {len(all_image_paths)}") | |
| print(f" 📊 Embeddings:") | |
| print(f" - Image: {image_embeddings.shape}") | |
| print(f" - Text: {text_embeddings.shape}") | |
| print(f" - Image+Text: {image_text_embeddings.shape}") | |
| # ============================================================================ | |
| # Gradio Interface | |
| # ============================================================================ | |
| def search_and_display( | |
| query: str, | |
| modality: str, | |
| use_rerank: bool, | |
| num_results: int = 3 | |
| ): | |
| """Main search function.""" | |
| if not query.strip(): | |
| return None, {"error": "Please enter a search query"} | |
| # Select embeddings and documents based on modality | |
| if modality == "Image": | |
| embeddings = image_embeddings | |
| documents = [""] * len(all_image_paths) # Empty for image-only | |
| search_modality = "image" | |
| elif modality == "Text": | |
| embeddings = text_embeddings | |
| documents = all_recipes | |
| search_modality = "text" | |
| else: # Image+Text | |
| embeddings = image_text_embeddings | |
| documents = all_recipes | |
| search_modality = "image_text" | |
| print(f"\n🔍 Searching with modality: {modality}") | |
| print(f" Embeddings shape: {embeddings.shape}") | |
| print(f" Query: {query[:50]}...") | |
| # Initial search | |
| results = search_embeddings( | |
| query=query, | |
| query_image=None, | |
| model=embed_model, | |
| embeddings=embeddings, | |
| documents=documents, | |
| image_paths=all_image_paths, | |
| top_k=20, | |
| modality=search_modality | |
| ) | |
| print(f" ✅ Found {len(results)} results") | |
| # Optional reranking (only for image/image_text modalities) | |
| if use_rerank and modality != "Text": | |
| print(f" 🎯 Reranking top {min(10, len(results))} results...") | |
| results = rerank_results( | |
| query=query, | |
| results=results, | |
| rerank_model=rerank_model, | |
| rerank_processor=rerank_processor, | |
| device=rerank_device, | |
| top_k=min(10, len(results)) | |
| ) | |
| print(f" ✅ Reranking complete") | |
| # Add full recipe details to results | |
| for i, result in enumerate(results[:num_results]): | |
| # Find matching recipe index | |
| img_path = result.get("image_path") | |
| if img_path: | |
| try: | |
| idx = all_image_paths.index(img_path) | |
| result["recipe_details"] = { | |
| "name": full_recipes[idx]["name"], | |
| "description": full_recipes[idx]["description"][:200] + "...", | |
| "tags": full_recipes[idx]["tags"][:5], | |
| "ingredients_count": len(full_recipes[idx]["ingredients"]), | |
| "steps_count": len(full_recipes[idx]["steps"]), | |
| } | |
| except (ValueError, IndexError): | |
| pass | |
| # Visualize | |
| if modality != "Text": | |
| output_img = plot_results(results, query, num_images=num_results) | |
| return output_img, results[:num_results] | |
| else: | |
| # For text-only, return formatted text | |
| text_output = "\n\n".join([ | |
| f"**Rank {r['rank']}** (Score: {r['score']:.4f})\n{r.get('text', '')[:500]}..." | |
| for r in results[:num_results] | |
| ]) | |
| return None, results[:num_results] | |
| # ============================================================================ | |
| # Gradio UI | |
| # ============================================================================ | |
| with gr.Blocks(title="🍳 Multimodal Recipe Search", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # 🍳 Multimodal Recipe RAG System | |
| Search **500 recipes** using text queries across images and documents. | |
| **Three Search Modes:** | |
| - 🖼️ **Image**: Visual similarity search (find similar-looking dishes) | |
| - 📝 **Text**: Semantic text search (find by ingredients, instructions, reviews) | |
| - 🎨 **Image+Text**: Combined multimodal search (best of both worlds) | |
| Powered by **NVIDIA Nemotron Embed-VL** and **Rerank-VL** models. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| query_input = gr.Textbox( | |
| label="🔍 Search Query", | |
| placeholder="e.g., 'chocolate cake recipe', 'healthy breakfast', 'pasta with tomatoes'", | |
| lines=2 | |
| ) | |
| modality_radio = gr.Radio( | |
| choices=["Image", "Text", "Image+Text"], | |
| value="Image+Text", # ✅ Default to Image+Text | |
| label="📊 Search Modality", | |
| info="Choose how to search: visual, text, or combined" | |
| ) | |
| rerank_check = gr.Checkbox( | |
| label="🎯 Use Reranking (Cross-Encoder)", | |
| value=True, | |
| info="Rerank top results for better accuracy (adds ~1-2s)" | |
| ) | |
| num_results_slider = gr.Slider( | |
| minimum=1, | |
| maximum=5, | |
| value=3, | |
| step=1, | |
| label="📈 Number of Results to Display" | |
| ) | |
| search_btn = gr.Button("🚀 Search", variant="primary", size="lg") | |
| # Add info box | |
| gr.Markdown( | |
| """ | |
| **💡 Tips:** | |
| - Use **Image** for visual similarity (e.g., "desserts with chocolate") | |
| - Use **Text** for ingredient/instruction search (e.g., "vegetarian pasta") | |
| - Use **Image+Text** for best overall results | |
| """ | |
| ) | |
| with gr.Column(scale=2): | |
| output_image = gr.Image(label="🖼️ Top Recipe Results", type="pil") | |
| output_json = gr.JSON(label="📋 Detailed Results") | |
| # Examples for each modality | |
| with gr.Accordion("💡 Example Queries", open=False): | |
| gr.Examples( | |
| examples=[ | |
| # Image searches | |
| ["recipes with steak", "Image", True, 3], | |
| ["chocolate desserts", "Image", True, 3], | |
| ["colorful salads", "Image", False, 4], | |
| # Text searches | |
| ["healthy breakfast ideas", "Text", False, 5], | |
| ["vegetarian meals with pasta", "Text", False, 3], | |
| ["quick dinner under 30 minutes", "Text", False, 4], | |
| # Image+Text searches (best results) | |
| ["creamy pasta dishes", "Image+Text", True, 3], | |
| ["spicy chicken recipes", "Image+Text", True, 3], | |
| ["fresh summer salads", "Image+Text", True, 4], | |
| ], | |
| inputs=[query_input, modality_radio, rerank_check, num_results_slider], | |
| label="Try these examples" | |
| ) | |
| # Footer | |
| gr.Markdown( | |
| """ | |
| --- | |
| ### 🔧 Technical Details | |
| **Models:** | |
| - 🔍 Embedding: [nvidia/llama-nemotron-embed-vl-1b-v2](https://huggingface.co/nvidia/llama-nemotron-embed-vl-1b-v2) | |
| - 🎯 Reranking: [nvidia/llama-nemotron-rerank-vl-1b-v2](https://huggingface.co/nvidia/llama-nemotron-rerank-vl-1b-v2) | |
| **Dataset:** [TurkishCodeMan/recipe-synthetic-images-10k](https://huggingface.co/datasets/TurkishCodeMan/recipe-synthetic-images-10k) (500 recipes) | |
| **Embeddings:** | |
| - Image: 2048-dim visual embeddings | |
| - Text: 2048-dim semantic embeddings | |
| - Image+Text: 2048-dim multimodal embeddings | |
| """ | |
| ) | |
| # Connect button | |
| search_btn.click( | |
| fn=search_and_display, | |
| inputs=[query_input, modality_radio, rerank_check, num_results_slider], | |
| outputs=[output_image, output_json] | |
| ) | |
| # ============================================================================ | |
| # Launch | |
| # ============================================================================ | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_error=True | |
| ) |