Spaces:
Sleeping
Sleeping
| """Streamlit semantic search app for SemanticSearchApp.""" | |
| from __future__ import annotations | |
| import sys | |
| import subprocess | |
| from pathlib import Path | |
| from typing import List | |
| import streamlit as st | |
| from sentence_transformers import SentenceTransformer | |
| from search_engine import SemanticSearchEngine | |
| # ================= CONFIG ================= | |
| DATASET_PATH = Path("data/stackoverflow_sample_3000.json") | |
| # ================= STYLES ================= | |
| def inject_styles(): | |
| st.markdown( | |
| """ | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=DM+Sans:wght@300;400;500;600&display=swap'); | |
| /* ββ Root Variables ββ */ | |
| :root { | |
| --bg: #0a0c10; | |
| --surface: #111318; | |
| --surface2: #181c24; | |
| --border: #1f2430; | |
| --accent: #00e5ff; | |
| --accent2: #7c3aed; | |
| --text: #e2e8f0; | |
| --muted: #64748b; | |
| --success: #10b981; | |
| --mono: 'Space Mono', monospace; | |
| --sans: 'DM Sans', sans-serif; | |
| } | |
| /* ββ Global Reset ββ */ | |
| html, body, [class*="css"] { | |
| font-family: var(--sans) !important; | |
| background-color: var(--bg) !important; | |
| color: var(--text) !important; | |
| } | |
| .main .block-container { | |
| padding: 2rem 3rem 4rem 3rem !important; | |
| max-width: 900px !important; | |
| } | |
| /* ββ Hide Streamlit chrome ββ */ | |
| #MainMenu, footer, header { visibility: hidden; } | |
| .stDeployButton { display: none; } | |
| /* ββ Header / Brand ββ */ | |
| .ssa-header { | |
| display: flex; | |
| align-items: center; | |
| gap: 1rem; | |
| padding: 2.5rem 0 0.5rem 0; | |
| border-bottom: 1px solid var(--border); | |
| margin-bottom: 2rem; | |
| } | |
| .ssa-logo { | |
| width: 48px; | |
| height: 48px; | |
| background: linear-gradient(135deg, var(--accent), var(--accent2)); | |
| border-radius: 12px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| font-family: var(--mono); | |
| font-weight: 700; | |
| font-size: 22px; | |
| color: #fff; | |
| flex-shrink: 0; | |
| box-shadow: 0 0 20px rgba(0,229,255,0.25); | |
| } | |
| .ssa-brand h1 { | |
| font-family: var(--mono) !important; | |
| font-size: 1.7rem !important; | |
| font-weight: 700 !important; | |
| letter-spacing: -0.5px !important; | |
| color: var(--text) !important; | |
| margin: 0 !important; | |
| padding: 0 !important; | |
| } | |
| .ssa-brand p { | |
| font-size: 0.8rem !important; | |
| color: var(--muted) !important; | |
| margin: 2px 0 0 0 !important; | |
| font-family: var(--mono) !important; | |
| letter-spacing: 0.08em; | |
| text-transform: uppercase; | |
| } | |
| /* ββ Query Box ββ */ | |
| .stTextArea label { | |
| font-family: var(--mono) !important; | |
| font-size: 0.75rem !important; | |
| text-transform: uppercase !important; | |
| letter-spacing: 0.1em !important; | |
| color: var(--accent) !important; | |
| } | |
| .stTextArea textarea { | |
| background-color: var(--surface) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 10px !important; | |
| color: var(--text) !important; | |
| font-family: var(--mono) !important; | |
| font-size: 0.9rem !important; | |
| caret-color: var(--accent) !important; | |
| transition: border-color 0.2s; | |
| } | |
| .stTextArea textarea:focus { | |
| border-color: var(--accent) !important; | |
| box-shadow: 0 0 0 3px rgba(0,229,255,0.08) !important; | |
| } | |
| /* ββ Search Button ββ */ | |
| .stButton > button { | |
| background: linear-gradient(135deg, var(--accent), var(--accent2)) !important; | |
| color: #fff !important; | |
| border: none !important; | |
| border-radius: 8px !important; | |
| font-family: var(--mono) !important; | |
| font-size: 0.8rem !important; | |
| font-weight: 700 !important; | |
| letter-spacing: 0.08em !important; | |
| text-transform: uppercase !important; | |
| padding: 0.55rem 1.8rem !important; | |
| transition: opacity 0.2s, transform 0.15s !important; | |
| } | |
| .stButton > button:hover { | |
| opacity: 0.85 !important; | |
| transform: translateY(-1px) !important; | |
| } | |
| /* ββ Info / Warning banners ββ */ | |
| .stInfo { | |
| background-color: var(--surface) !important; | |
| border-left: 3px solid var(--accent) !important; | |
| border-radius: 6px !important; | |
| color: var(--muted) !important; | |
| } | |
| .stAlert { | |
| border-radius: 8px !important; | |
| } | |
| /* ββ Results heading ββ */ | |
| .results-heading { | |
| font-family: var(--mono); | |
| font-size: 0.72rem; | |
| text-transform: uppercase; | |
| letter-spacing: 0.14em; | |
| color: var(--muted); | |
| margin: 2rem 0 1.2rem 0; | |
| } | |
| .results-heading span { | |
| color: var(--accent); | |
| } | |
| /* ββ Result Card ββ */ | |
| .result-card { | |
| background: var(--surface); | |
| border: 1px solid var(--border); | |
| border-radius: 12px; | |
| padding: 1.4rem 1.6rem; | |
| margin-bottom: 1rem; | |
| position: relative; | |
| transition: border-color 0.2s, box-shadow 0.2s; | |
| } | |
| .result-card:hover { | |
| border-color: rgba(0,229,255,0.3); | |
| box-shadow: 0 4px 24px rgba(0,229,255,0.05); | |
| } | |
| .result-rank { | |
| font-family: var(--mono); | |
| font-size: 0.65rem; | |
| color: var(--accent); | |
| letter-spacing: 0.12em; | |
| text-transform: uppercase; | |
| margin-bottom: 0.4rem; | |
| } | |
| .result-question { | |
| font-size: 1.05rem; | |
| font-weight: 600; | |
| color: var(--text); | |
| margin-bottom: 0.8rem; | |
| line-height: 1.5; | |
| } | |
| .result-answer { | |
| font-size: 0.88rem; | |
| color: #94a3b8; | |
| line-height: 1.7; | |
| margin-bottom: 0.9rem; | |
| } | |
| .result-answer code, .result-answer pre { | |
| background: var(--surface2) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 5px !important; | |
| font-family: var(--mono) !important; | |
| font-size: 0.82rem !important; | |
| color: #a5f3fc !important; | |
| } | |
| .result-meta { | |
| display: flex; | |
| align-items: center; | |
| gap: 0.5rem; | |
| } | |
| .score-pill { | |
| font-family: var(--mono); | |
| font-size: 0.7rem; | |
| background: var(--surface2); | |
| border: 1px solid var(--border); | |
| border-radius: 20px; | |
| padding: 2px 10px; | |
| color: var(--success); | |
| letter-spacing: 0.05em; | |
| } | |
| /* ββ Spinner ββ */ | |
| .stSpinner > div { | |
| border-top-color: var(--accent) !important; | |
| } | |
| /* ββ Divider ββ */ | |
| hr { | |
| border-color: var(--border) !important; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| # ================= DATASET SETUP ================= | |
| def ensure_dataset(): | |
| if not DATASET_PATH.exists(): | |
| with st.spinner("Preparing dataset (first run only)..."): | |
| script = Path(__file__).parent / "prepare_stackoverflow_sample.py" | |
| result = subprocess.run( | |
| [sys.executable, str(script)], | |
| capture_output=True, | |
| text=True, | |
| ) | |
| if result.returncode != 0: | |
| st.error(f"Dataset preparation failed:\n\n{result.stderr}") | |
| st.stop() | |
| # ================= ENGINE ================= | |
| def load_engine() -> SemanticSearchEngine: | |
| return SemanticSearchEngine(DATASET_PATH) | |
| # ================= EMBEDDING ================= | |
| def load_embedder() -> SentenceTransformer: | |
| return SentenceTransformer("all-MiniLM-L6-v2") | |
| def get_query_embedding(query: str) -> List[float]: | |
| model = load_embedder() | |
| return model.encode(query).tolist() | |
| # ================= MAIN APP ================= | |
| def main(): | |
| st.set_page_config( | |
| page_title="SemanticSearchApp", | |
| page_icon="⬑", | |
| layout="centered", | |
| ) | |
| inject_styles() | |
| # ββ Brand header ββ | |
| st.markdown( | |
| """ | |
| <div class="ssa-header"> | |
| <div class="ssa-logo">⬑</div> | |
| <div class="ssa-brand"> | |
| <h1>SemanticSearchApp</h1> | |
| <p>Vector-powered programming search Β· all-MiniLM-L6-v2</p> | |
| </div> | |
| </div> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| ensure_dataset() | |
| # ββ Query input ββ | |
| query = st.text_area( | |
| "Query", | |
| placeholder="e.g. How do I reverse a list in Python?", | |
| height=110, | |
| ) | |
| run = st.button("Search") | |
| if not run or not query.strip(): | |
| if run and not query.strip(): | |
| st.info("Please enter a query before searching.") | |
| elif not run: | |
| st.markdown( | |
| '<p style="color:#475569;font-size:0.85rem;margin-top:1rem;">' | |
| "Type a programming question above and hit <strong>Search</strong>.</p>", | |
| unsafe_allow_html=True, | |
| ) | |
| return | |
| # ββ Run search ββ | |
| try: | |
| with st.spinner("Running semantic searchβ¦"): | |
| engine = load_engine() | |
| query_embedding = get_query_embedding(query.strip()) | |
| results = engine.search(query_embedding, top_k=5) | |
| except Exception as e: | |
| st.error(f"Search failed: {e}") | |
| return | |
| # ββ Results ββ | |
| st.markdown( | |
| f'<div class="results-heading">Top <span>{len(results)}</span> results</div>', | |
| unsafe_allow_html=True, | |
| ) | |
| for i, item in enumerate(results, start=1): | |
| score_pct = f"{item['score'] * 100:.1f}% match" | |
| st.markdown( | |
| f""" | |
| <div class="result-card"> | |
| <div class="result-rank">Result #{i}</div> | |
| <div class="result-question">{item['question']}</div> | |
| <div class="result-answer">{item['answer']}</div> | |
| <div class="result-meta"> | |
| <span class="score-pill">⬑ {score_pct}</span> | |
| </div> | |
| </div> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| if __name__ == "__main__": | |
| main() |