"""Streamlit semantic search app for SemanticSearchApp.""" from __future__ import annotations import sys import subprocess from pathlib import Path from typing import List import streamlit as st from sentence_transformers import SentenceTransformer from search_engine import SemanticSearchEngine # ================= CONFIG ================= DATASET_PATH = Path("data/stackoverflow_sample_3000.json") # ================= STYLES ================= def inject_styles(): st.markdown( """ """, unsafe_allow_html=True, ) # ================= DATASET SETUP ================= def ensure_dataset(): if not DATASET_PATH.exists(): with st.spinner("Preparing dataset (first run only)..."): script = Path(__file__).parent / "prepare_stackoverflow_sample.py" result = subprocess.run( [sys.executable, str(script)], capture_output=True, text=True, ) if result.returncode != 0: st.error(f"Dataset preparation failed:\n\n{result.stderr}") st.stop() # ================= ENGINE ================= @st.cache_resource(show_spinner=False) def load_engine() -> SemanticSearchEngine: return SemanticSearchEngine(DATASET_PATH) # ================= EMBEDDING ================= @st.cache_resource(show_spinner=False) def load_embedder() -> SentenceTransformer: return SentenceTransformer("all-MiniLM-L6-v2") def get_query_embedding(query: str) -> List[float]: model = load_embedder() return model.encode(query).tolist() # ================= MAIN APP ================= def main(): st.set_page_config( page_title="SemanticSearchApp", page_icon="⬡", layout="centered", ) inject_styles() # ── Brand header ── st.markdown( """
Vector-powered programming search · all-MiniLM-L6-v2
' "Type a programming question above and hit Search.
", unsafe_allow_html=True, ) return # ── Run search ── try: with st.spinner("Running semantic search…"): engine = load_engine() query_embedding = get_query_embedding(query.strip()) results = engine.search(query_embedding, top_k=5) except Exception as e: st.error(f"Search failed: {e}") return # ── Results ── st.markdown( f'