""" Semantic Quote Search Engine AIPI 510 - Deployed on Hugging Face Spaces, Jaideep """ import gradio as gr from sentence_transformers import SentenceTransformer import chromadb import os # intialization # Load embedding model model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') print("Model loaded!") # Load existing ChromaDB (pre-built, not created on the fly) chroma_path = "./chromadb" client = chromadb.PersistentClient(path=chroma_path) collection = client.get_collection("quotes_collection") print(f"Loaded collection with {collection.count()} documents!") # SEARCH FUNCTION def semantic_search(query, n_results=5): """ Perform semantic search over the quotes collection. """ # Encode query using the same model query_embedding = model.encode([query]) # Query ChromaDB for similar documents results = collection.query( query_embeddings=query_embedding.tolist(), n_results=n_results, include=['documents', 'metadatas', 'distances'] ) # Format results nicely output = [] for i in range(len(results['documents'][0])): meta = results['metadatas'][0][i] distance = results['distances'][0][i] similarity = 1 - (distance / 2) # Convert distance to similarity score result_text = f""" ### Result {i+1} (Similarity: {similarity:.1%}) > "{meta['quote']}" **— {meta['author']}** 🏷️ *Tags: {meta['tags']}* """ output.append(result_text) return "\n---\n".join(output) def search_quotes(query, num_results): """Wrapper function for Gradio interface""" if not query.strip(): return "Please enter a search query!" return semantic_search(query, n_results=int(num_results)) # gradio interface demo = gr.Interface( fn=search_quotes, inputs=[ gr.Textbox( label="🔍 Search Query", placeholder="Try: 'love', 'success', 'wisdom', 'courage'...", lines=2 ), gr.Slider( minimum=1, maximum=10, value=5, step=1, label=" Number of Results" ) ], outputs=gr.Markdown(label=" Search Results"), title=" Semantic Quote Search Engine", description=""" ## Search through famous quotes using AI-powered semantic similarity! Unlike traditional keyword search, this understands the **meaning** of your query. **How it works:** 1. Your query is converted to a vector using a transformer model 2. We find quotes with the most similar meaning in our database 3. Results are ranked by semantic similarity *Built for AIPI 510: Data Sourcing for Analytics | Duke University* """, examples=[ ["finding happiness in life", 5], ["overcoming fear and challenges", 5], ["the importance of friendship", 3], ["learning from mistakes", 5], ["believing in yourself", 3] ] ) # Launch the app if __name__ == "__main__": demo.launch()