Spaces:

jaydeep123423
/

final_try_SS

Sleeping

App Files Files Community

jaydeep123423 commited on Nov 25, 2025

Commit

1f2ac71

verified ·

1 Parent(s): d3bebf0

Upload app.py

Browse files

Files changed (1) hide show

app.py +165 -0

app.py ADDED Viewed

	@@ -0,0 +1,165 @@

+"""
+Semantic Quote Search Engine
+Deploy this to Hugging Face Spaces!
+"""
+import gradio as gr
+from sentence_transformers import SentenceTransformer
+import chromadb
+from datasets import load_dataset
+import pandas as pd
+import os
+# ============== INITIALIZATION ==============
+print("🚀 Starting Semantic Search Engine...")
+# Load embedding model
+print("📦 Loading embedding model...")
+model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+print("✅ Model loaded!")
+# Initialize ChromaDB
+chroma_path = "./chroma_db"
+os.makedirs(chroma_path, exist_ok=True)
+client = chromadb.PersistentClient(path=chroma_path)
+# Check if collection exists, otherwise create it
+try:
+    collection = client.get_collection("quotes_collection")
+    print(f"✅ Loaded existing collection with {collection.count()} documents")
+except:
+    print("📊 Creating new collection from dataset...")
+    # Load dataset
+    dataset = load_dataset("Abirate/english_quotes", split="train")
+    df = pd.DataFrame(dataset)
+    texts = []
+    metadata = []
+    for idx, row in df.iterrows():
+        quote = row['quote']
+        author = row['author']
+        tags = ', '.join(row['tags']) if row['tags'] else 'No tags'
+        text = f"{quote} - {author}"
+        texts.append(text)
+        metadata.append({
+            'quote': quote,
+            'author': author,
+            'tags': tags
+        })
+        if idx >= 499:  # Limit to 500 quotes
+            break
+    # Generate embeddings
+    print("🔢 Generating embeddings...")
+    embeddings = model.encode(texts, show_progress_bar=True)
+    # Create collection
+    collection = client.create_collection(
+        name="quotes_collection",
+        metadata={"description": "Famous quotes collection"}
+    )
+    # Add documents in batches
+    ids = [f"quote_{i}" for i in range(len(texts))]
+    batch_size = 100
+    for i in range(0, len(texts), batch_size):
+        end_idx = min(i + batch_size, len(texts))
+        collection.add(
+            documents=texts[i:end_idx],
+            embeddings=embeddings[i:end_idx].tolist(),
+            ids=ids[i:end_idx],
+            metadatas=metadata[i:end_idx]
+        )
+    print(f"✅ Collection created with {collection.count()} documents!")
+# ============== SEARCH FUNCTION ==============
+def semantic_search(query, n_results=5):
+    """
+    Perform semantic search over the quotes collection.
+    """
+    # Encode query
+    query_embedding = model.encode([query])
+    # Query ChromaDB
+    results = collection.query(
+        query_embeddings=query_embedding.tolist(),
+        n_results=n_results,
+        include=['documents', 'metadatas', 'distances']
+    )
+    # Format results
+    output = []
+    for i in range(len(results['documents'][0])):
+        meta = results['metadatas'][0][i]
+        distance = results['distances'][0][i]
+        similarity = 1 - (distance / 2)  # Convert distance to similarity
+        result_text = f"""
+### Result {i+1} (Similarity: {similarity:.1%})
+> "{meta['quote']}"
+**— {meta['author']}**
+🏷️ *Tags: {meta['tags']}*
+"""
+        output.append(result_text)
+    return "\n---\n".join(output)
+def search_quotes(query, num_results):
+    """Wrapper for Gradio interface"""
+    if not query.strip():
+        return "⚠️ Please enter a search query!"
+    return semantic_search(query, n_results=int(num_results))
+# ============== GRADIO INTERFACE ==============
+demo = gr.Interface(
+    fn=search_quotes,
+    inputs=[
+        gr.Textbox(
+            label="🔍 Search Query",
+            placeholder="Try: 'love', 'success', 'wisdom', 'courage'...",
+            lines=2
+        ),
+        gr.Slider(
+            minimum=1,
+            maximum=10,
+            value=5,
+            step=1,
+            label="📊 Number of Results"
+        )
+    ],
+    outputs=gr.Markdown(label="📚 Search Results"),
+    title="📚 Semantic Quote Search Engine",
+    description="""
+    ## Search through famous quotes using AI-powered semantic similarity!
+    Unlike traditional keyword search, this understands the **meaning** of your query.
+    **How it works:**
+    1. Your query is converted to a vector using a transformer model
+    2. We find quotes with the most similar meaning
+    3. Results are ranked by semantic similarity
+    *Built with SentenceTransformers, ChromaDB, and Gradio*
+    """,
+    examples=[
+        ["finding happiness in life", 5],
+        ["overcoming fear and challenges", 5],
+        ["the importance of friendship", 3],
+        ["learning from mistakes", 5],
+        ["believing in yourself", 3]
+    ]
+)
+# Launch
+if __name__ == "__main__":
+    demo.launch()