Spaces:
Sleeping
Sleeping
| """ | |
| Yelp Reviews Semantic Search Engine | |
| AIPI 510 - Week 13 Project | |
| """ | |
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer | |
| import chromadb | |
| from pathlib import Path | |
| # Load the embedding model | |
| print("Loading embedding model...") | |
| model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
| # Resolve ChromaDB storage | |
| print("Preparing ChromaDB storage...") | |
| BASE_DIR = Path(__file__).resolve().parent | |
| REPO_ROOT = BASE_DIR.parent | |
| client = None | |
| collection = None | |
| tried_paths = [] | |
| print("Loading ChromaDB collection...") | |
| candidate_client = chromadb.PersistentClient(path=str(BASE_DIR / "chromadb")) | |
| collection = candidate_client.get_or_create_collection("yelp_reviews") | |
| client = candidate_client | |
| if collection is None or client is None: | |
| raise RuntimeError( | |
| "Could not find collection 'yelp_reviews'. Checked the following locations: " | |
| + (", ".join(tried_paths) if tried_paths else "<none>") | |
| ) | |
| def semantic_search(query, n_results=3): | |
| """ | |
| Search Yelp reviews using semantic similarity | |
| Args: | |
| query: Search query string | |
| n_results: Number of top results to return | |
| Returns: | |
| Formatted string with top matching reviews | |
| """ | |
| if not query.strip(): | |
| return "Please enter a search query." | |
| # Encode the query | |
| query_embedding = model.encode([query]) | |
| # Query the collection | |
| results = collection.query( | |
| query_embeddings=query_embedding.tolist(), | |
| n_results=n_results | |
| ) | |
| # Format the results | |
| output = [] | |
| for i, (doc, metadata) in enumerate(zip(results['documents'][0], results['metadatas'][0])): | |
| output.append(f"### Result {i+1}") | |
| output.append(doc) | |
| output.append("-" * 80) | |
| return "\n\n".join(output) if output else "No results found." | |
| def search_interface(query, num_results): | |
| """Wrapper function for Gradio interface""" | |
| return semantic_search(query, n_results=int(num_results)) | |
| # Create Gradio interface | |
| demo = gr.Interface( | |
| fn=search_interface, | |
| inputs=[ | |
| gr.Textbox( | |
| label="Search Query", | |
| placeholder="e.g., 'great pizza' or 'romantic atmosphere' or 'friendly staff'", | |
| lines=2 | |
| ), | |
| gr.Slider( | |
| minimum=1, | |
| maximum=10, | |
| value=3, | |
| step=1, | |
| label="Number of Results" | |
| ) | |
| ], | |
| outputs=gr.Textbox(label="Top Matching Reviews", lines=20), | |
| title="🔍 Yelp Reviews Semantic Search", | |
| description="""Search through Yelp business reviews using AI-powered semantic search. | |
| Enter natural language queries to find relevant reviews based on semantic meaning, not just keywords.""", | |
| examples=[ | |
| ["great food and friendly service", 3], | |
| ["romantic atmosphere perfect for date night", 3], | |
| ["fast service and good prices", 3], | |
| ["disappointed with the quality", 3], | |
| ["authentic Italian cuisine", 3], | |
| ], | |
| theme=gr.themes.Soft(), | |
| article=""" | |
| ### About This App | |
| This semantic search engine uses: | |
| - **Dataset**: Yelp Review Full (650K reviews with 1-5 star ratings) | |
| - **Embeddings**: sentence-transformers/all-MiniLM-L6-v2 | |
| - **Vector DB**: ChromaDB | |
| - **Interface**: Gradio | |
| Built for AIPI 510: Data Sourcing for Analytics - Week 13 | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |