SemanticSearchApp / src /streamlit_app.py
iqra15's picture
Update src/streamlit_app.py
49837f9 verified
Raw
History Blame Contribute Delete
11.1 kB
"""Streamlit semantic search app for SemanticSearchApp."""
from __future__ import annotations
import sys
import subprocess
from pathlib import Path
from typing import List
import streamlit as st
from sentence_transformers import SentenceTransformer
from search_engine import SemanticSearchEngine
# ================= CONFIG =================
DATASET_PATH = Path("data/stackoverflow_sample_3000.json")
# ================= STYLES =================
def inject_styles():
st.markdown(
"""
<style>
@import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=DM+Sans:wght@300;400;500;600&display=swap');
/* ── Root Variables ── */
:root {
--bg: #0a0c10;
--surface: #111318;
--surface2: #181c24;
--border: #1f2430;
--accent: #00e5ff;
--accent2: #7c3aed;
--text: #e2e8f0;
--muted: #64748b;
--success: #10b981;
--mono: 'Space Mono', monospace;
--sans: 'DM Sans', sans-serif;
}
/* ── Global Reset ── */
html, body, [class*="css"] {
font-family: var(--sans) !important;
background-color: var(--bg) !important;
color: var(--text) !important;
}
.main .block-container {
padding: 2rem 3rem 4rem 3rem !important;
max-width: 900px !important;
}
/* ── Hide Streamlit chrome ── */
#MainMenu, footer, header { visibility: hidden; }
.stDeployButton { display: none; }
/* ── Header / Brand ── */
.ssa-header {
display: flex;
align-items: center;
gap: 1rem;
padding: 2.5rem 0 0.5rem 0;
border-bottom: 1px solid var(--border);
margin-bottom: 2rem;
}
.ssa-logo {
width: 48px;
height: 48px;
background: linear-gradient(135deg, var(--accent), var(--accent2));
border-radius: 12px;
display: flex;
align-items: center;
justify-content: center;
font-family: var(--mono);
font-weight: 700;
font-size: 22px;
color: #fff;
flex-shrink: 0;
box-shadow: 0 0 20px rgba(0,229,255,0.25);
}
.ssa-brand h1 {
font-family: var(--mono) !important;
font-size: 1.7rem !important;
font-weight: 700 !important;
letter-spacing: -0.5px !important;
color: var(--text) !important;
margin: 0 !important;
padding: 0 !important;
}
.ssa-brand p {
font-size: 0.8rem !important;
color: var(--muted) !important;
margin: 2px 0 0 0 !important;
font-family: var(--mono) !important;
letter-spacing: 0.08em;
text-transform: uppercase;
}
/* ── Query Box ── */
.stTextArea label {
font-family: var(--mono) !important;
font-size: 0.75rem !important;
text-transform: uppercase !important;
letter-spacing: 0.1em !important;
color: var(--accent) !important;
}
.stTextArea textarea {
background-color: var(--surface) !important;
border: 1px solid var(--border) !important;
border-radius: 10px !important;
color: var(--text) !important;
font-family: var(--mono) !important;
font-size: 0.9rem !important;
caret-color: var(--accent) !important;
transition: border-color 0.2s;
}
.stTextArea textarea:focus {
border-color: var(--accent) !important;
box-shadow: 0 0 0 3px rgba(0,229,255,0.08) !important;
}
/* ── Search Button ── */
.stButton > button {
background: linear-gradient(135deg, var(--accent), var(--accent2)) !important;
color: #fff !important;
border: none !important;
border-radius: 8px !important;
font-family: var(--mono) !important;
font-size: 0.8rem !important;
font-weight: 700 !important;
letter-spacing: 0.08em !important;
text-transform: uppercase !important;
padding: 0.55rem 1.8rem !important;
transition: opacity 0.2s, transform 0.15s !important;
}
.stButton > button:hover {
opacity: 0.85 !important;
transform: translateY(-1px) !important;
}
/* ── Info / Warning banners ── */
.stInfo {
background-color: var(--surface) !important;
border-left: 3px solid var(--accent) !important;
border-radius: 6px !important;
color: var(--muted) !important;
}
.stAlert {
border-radius: 8px !important;
}
/* ── Results heading ── */
.results-heading {
font-family: var(--mono);
font-size: 0.72rem;
text-transform: uppercase;
letter-spacing: 0.14em;
color: var(--muted);
margin: 2rem 0 1.2rem 0;
}
.results-heading span {
color: var(--accent);
}
/* ── Result Card ── */
.result-card {
background: var(--surface);
border: 1px solid var(--border);
border-radius: 12px;
padding: 1.4rem 1.6rem;
margin-bottom: 1rem;
position: relative;
transition: border-color 0.2s, box-shadow 0.2s;
}
.result-card:hover {
border-color: rgba(0,229,255,0.3);
box-shadow: 0 4px 24px rgba(0,229,255,0.05);
}
.result-rank {
font-family: var(--mono);
font-size: 0.65rem;
color: var(--accent);
letter-spacing: 0.12em;
text-transform: uppercase;
margin-bottom: 0.4rem;
}
.result-question {
font-size: 1.05rem;
font-weight: 600;
color: var(--text);
margin-bottom: 0.8rem;
line-height: 1.5;
}
.result-answer {
font-size: 0.88rem;
color: #94a3b8;
line-height: 1.7;
margin-bottom: 0.9rem;
}
.result-answer code, .result-answer pre {
background: var(--surface2) !important;
border: 1px solid var(--border) !important;
border-radius: 5px !important;
font-family: var(--mono) !important;
font-size: 0.82rem !important;
color: #a5f3fc !important;
}
.result-meta {
display: flex;
align-items: center;
gap: 0.5rem;
}
.score-pill {
font-family: var(--mono);
font-size: 0.7rem;
background: var(--surface2);
border: 1px solid var(--border);
border-radius: 20px;
padding: 2px 10px;
color: var(--success);
letter-spacing: 0.05em;
}
/* ── Spinner ── */
.stSpinner > div {
border-top-color: var(--accent) !important;
}
/* ── Divider ── */
hr {
border-color: var(--border) !important;
}
</style>
""",
unsafe_allow_html=True,
)
# ================= DATASET SETUP =================
def ensure_dataset():
if not DATASET_PATH.exists():
with st.spinner("Preparing dataset (first run only)..."):
script = Path(__file__).parent / "prepare_stackoverflow_sample.py"
result = subprocess.run(
[sys.executable, str(script)],
capture_output=True,
text=True,
)
if result.returncode != 0:
st.error(f"Dataset preparation failed:\n\n{result.stderr}")
st.stop()
# ================= ENGINE =================
@st.cache_resource(show_spinner=False)
def load_engine() -> SemanticSearchEngine:
return SemanticSearchEngine(DATASET_PATH)
# ================= EMBEDDING =================
@st.cache_resource(show_spinner=False)
def load_embedder() -> SentenceTransformer:
return SentenceTransformer("all-MiniLM-L6-v2")
def get_query_embedding(query: str) -> List[float]:
model = load_embedder()
return model.encode(query).tolist()
# ================= MAIN APP =================
def main():
st.set_page_config(
page_title="SemanticSearchApp",
page_icon="⬑",
layout="centered",
)
inject_styles()
# ── Brand header ──
st.markdown(
"""
<div class="ssa-header">
<div class="ssa-logo">⬑</div>
<div class="ssa-brand">
<h1>SemanticSearchApp</h1>
<p>Vector-powered programming search Β· all-MiniLM-L6-v2</p>
</div>
</div>
""",
unsafe_allow_html=True,
)
ensure_dataset()
# ── Query input ──
query = st.text_area(
"Query",
placeholder="e.g. How do I reverse a list in Python?",
height=110,
)
run = st.button("Search")
if not run or not query.strip():
if run and not query.strip():
st.info("Please enter a query before searching.")
elif not run:
st.markdown(
'<p style="color:#475569;font-size:0.85rem;margin-top:1rem;">'
"Type a programming question above and hit <strong>Search</strong>.</p>",
unsafe_allow_html=True,
)
return
# ── Run search ──
try:
with st.spinner("Running semantic search…"):
engine = load_engine()
query_embedding = get_query_embedding(query.strip())
results = engine.search(query_embedding, top_k=5)
except Exception as e:
st.error(f"Search failed: {e}")
return
# ── Results ──
st.markdown(
f'<div class="results-heading">Top <span>{len(results)}</span> results</div>',
unsafe_allow_html=True,
)
for i, item in enumerate(results, start=1):
score_pct = f"{item['score'] * 100:.1f}% match"
st.markdown(
f"""
<div class="result-card">
<div class="result-rank">Result #{i}</div>
<div class="result-question">{item['question']}</div>
<div class="result-answer">{item['answer']}</div>
<div class="result-meta">
<span class="score-pill">⬑ {score_pct}</span>
</div>
</div>
""",
unsafe_allow_html=True,
)
if __name__ == "__main__":
main()