CodeSeek-AI / src /app.py
noman2110014's picture
d
a1f8c96
Raw
History Blame Contribute Delete
2.54 kB
"""Streamlit semantic search app for CodeSeek AI."""
from __future__ import annotations
import sys
import subprocess
from pathlib import Path
from typing import List
import streamlit as st
from sentence_transformers import SentenceTransformer
from search_engine import SemanticSearchEngine
# ================= CONFIG =================
DATASET_PATH = Path("data/stackoverflow_sample_3000.json")
# ================= DATASET SETUP =================
def ensure_dataset():
if not DATASET_PATH.exists():
with st.spinner("Preparing dataset (first run only)..."):
script = Path(__file__).parent / "prepare_stackoverflow_sample.py"
result = subprocess.run(
[sys.executable, str(script)],
capture_output=True,
text=True
)
if result.returncode != 0:
st.error(f"Dataset preparation failed:\n\n{result.stderr}")
st.stop()
# ================= ENGINE =================
@st.cache_resource(show_spinner=False)
def load_engine() -> SemanticSearchEngine:
return SemanticSearchEngine(DATASET_PATH)
# ================= EMBEDDING =================
@st.cache_resource(show_spinner=False)
def load_embedder() -> SentenceTransformer:
return SentenceTransformer("all-MiniLM-L6-v2")
def get_query_embedding(query: str) -> List[float]:
model = load_embedder()
return model.encode(query).tolist()
# ================= MAIN APP =================
def main():
st.set_page_config(page_title="CodeSeek AI", page_icon="πŸ”Ž", layout="wide")
st.title("πŸ”Ž CodeSeek AI")
st.subheader("Semantic Programming Search")
ensure_dataset()
query = st.text_area(
"Ask a programming question:",
placeholder="e.g. How to declare array in Python?",
height=120,
)
if not query.strip():
st.info("Enter a query to begin search.")
return
try:
with st.spinner("Searching..."):
engine = load_engine()
query_embedding = get_query_embedding(query.strip())
results = engine.search(query_embedding, top_k=5)
except Exception as e:
st.error(f"Search failed: {e}")
return
st.markdown("### Top Results")
for i, item in enumerate(results, start=1):
st.markdown(f"**{i}. {item['question']}**")
st.markdown(item["answer"], unsafe_allow_html=True)
st.caption(f"Similarity score: {item['score']:.4f}")
st.divider()
if __name__ == "__main__":
main()