week13 / app.py
yiqian6999's picture
update app.py
7b55529
"""
Yelp Reviews Semantic Search Engine
AIPI 510 - Week 13 Project
"""
import gradio as gr
from sentence_transformers import SentenceTransformer
import chromadb
from pathlib import Path
# Load the embedding model
print("Loading embedding model...")
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# Resolve ChromaDB storage
print("Preparing ChromaDB storage...")
BASE_DIR = Path(__file__).resolve().parent
REPO_ROOT = BASE_DIR.parent
client = None
collection = None
tried_paths = []
print("Loading ChromaDB collection...")
candidate_client = chromadb.PersistentClient(path=str(BASE_DIR / "chromadb"))
collection = candidate_client.get_or_create_collection("yelp_reviews")
client = candidate_client
if collection is None or client is None:
raise RuntimeError(
"Could not find collection 'yelp_reviews'. Checked the following locations: "
+ (", ".join(tried_paths) if tried_paths else "<none>")
)
def semantic_search(query, n_results=3):
"""
Search Yelp reviews using semantic similarity
Args:
query: Search query string
n_results: Number of top results to return
Returns:
Formatted string with top matching reviews
"""
if not query.strip():
return "Please enter a search query."
# Encode the query
query_embedding = model.encode([query])
# Query the collection
results = collection.query(
query_embeddings=query_embedding.tolist(),
n_results=n_results
)
# Format the results
output = []
for i, (doc, metadata) in enumerate(zip(results['documents'][0], results['metadatas'][0])):
output.append(f"### Result {i+1}")
output.append(doc)
output.append("-" * 80)
return "\n\n".join(output) if output else "No results found."
def search_interface(query, num_results):
"""Wrapper function for Gradio interface"""
return semantic_search(query, n_results=int(num_results))
# Create Gradio interface
demo = gr.Interface(
fn=search_interface,
inputs=[
gr.Textbox(
label="Search Query",
placeholder="e.g., 'great pizza' or 'romantic atmosphere' or 'friendly staff'",
lines=2
),
gr.Slider(
minimum=1,
maximum=10,
value=3,
step=1,
label="Number of Results"
)
],
outputs=gr.Textbox(label="Top Matching Reviews", lines=20),
title="🔍 Yelp Reviews Semantic Search",
description="""Search through Yelp business reviews using AI-powered semantic search.
Enter natural language queries to find relevant reviews based on semantic meaning, not just keywords.""",
examples=[
["great food and friendly service", 3],
["romantic atmosphere perfect for date night", 3],
["fast service and good prices", 3],
["disappointed with the quality", 3],
["authentic Italian cuisine", 3],
],
theme=gr.themes.Soft(),
article="""
### About This App
This semantic search engine uses:
- **Dataset**: Yelp Review Full (650K reviews with 1-5 star ratings)
- **Embeddings**: sentence-transformers/all-MiniLM-L6-v2
- **Vector DB**: ChromaDB
- **Interface**: Gradio
Built for AIPI 510: Data Sourcing for Analytics - Week 13
"""
)
if __name__ == "__main__":
demo.launch()