File size: 2,993 Bytes
f3080da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""
Semantic Quote Search Engine
AIPI 510 - Deployed on Hugging Face Spaces, Jaideep
"""

import gradio as gr
from sentence_transformers import SentenceTransformer
import chromadb
import os

# intialization

# Load embedding model

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
print("Model loaded!")

# Load existing ChromaDB (pre-built, not created on the fly)

chroma_path = "./chromadb"
client = chromadb.PersistentClient(path=chroma_path)
collection = client.get_collection("quotes_collection")
print(f"Loaded collection with {collection.count()} documents!")


# SEARCH FUNCTION
def semantic_search(query, n_results=5):
    """
    Perform semantic search over the quotes collection.
    """
    # Encode query using the same model
    query_embedding = model.encode([query])
    
    # Query ChromaDB for similar documents
    results = collection.query(
        query_embeddings=query_embedding.tolist(),
        n_results=n_results,
        include=['documents', 'metadatas', 'distances']
    )
    
    # Format results nicely
    output = []
    for i in range(len(results['documents'][0])):
        meta = results['metadatas'][0][i]
        distance = results['distances'][0][i]
        similarity = 1 - (distance / 2)  # Convert distance to similarity score
        
        result_text = f"""
### Result {i+1} (Similarity: {similarity:.1%})

> "{meta['quote']}"

**— {meta['author']}**

🏷️ *Tags: {meta['tags']}*
"""
        output.append(result_text)
    
    return "\n---\n".join(output)


def search_quotes(query, num_results):
    """Wrapper function for Gradio interface"""
    if not query.strip():
        return "Please enter a search query!"
    return semantic_search(query, n_results=int(num_results))


# gradio interface
demo = gr.Interface(
    fn=search_quotes,
    inputs=[
        gr.Textbox(
            label="🔍 Search Query",
            placeholder="Try: 'love', 'success', 'wisdom', 'courage'...",
            lines=2
        ),
        gr.Slider(
            minimum=1,
            maximum=10,
            value=5,
            step=1,
            label=" Number of Results"
        )
    ],
    outputs=gr.Markdown(label=" Search Results"),
    title=" Semantic Quote Search Engine",
    description="""
## Search through famous quotes using AI-powered semantic similarity!

Unlike traditional keyword search, this understands the **meaning** of your query.

**How it works:**
1. Your query is converted to a vector using a transformer model
2. We find quotes with the most similar meaning in our database
3. Results are ranked by semantic similarity

*Built for AIPI 510: Data Sourcing for Analytics | Duke University*
""",
    examples=[
        ["finding happiness in life", 5],
        ["overcoming fear and challenges", 5],
        ["the importance of friendship", 3],
        ["learning from mistakes", 5],
        ["believing in yourself", 3]
    ]
)

# Launch the app
if __name__ == "__main__":
    demo.launch()