CodeSeek-AI / src /streamlit_app.py
noman2110014's picture
Update src/streamlit_app.py
983d9d7 verified
Raw
History Blame Contribute Delete
4.77 kB
from __future__ import annotations
import sys
import subprocess
from pathlib import Path
from typing import List
import re
import streamlit as st
from sentence_transformers import SentenceTransformer
from search_engine import SemanticSearchEngine
# ================= CONFIG =================
DATASET_PATH = Path("data/stackoverflow_sample_3000.json")
# ================= DATASET SETUP =================
def ensure_dataset():
if not DATASET_PATH.exists():
with st.spinner("Preparing dataset (first run only)..."):
script = Path(__file__).parent / "prepare_stackoverflow_sample.py"
result = subprocess.run(
[sys.executable, str(script)],
capture_output=True,
text=True
)
if result.returncode != 0:
st.error(f"Dataset preparation failed:\n\n{result.stderr}")
st.stop()
# ================= ENGINE =================
@st.cache_resource(show_spinner=False)
def load_engine() -> SemanticSearchEngine:
return SemanticSearchEngine(DATASET_PATH)
# ================= EMBEDDING =================
@st.cache_resource(show_spinner=False)
def load_embedder() -> SentenceTransformer:
return SentenceTransformer("all-MiniLM-L6-v2")
def get_query_embedding(query: str) -> List[float]:
model = load_embedder()
return model.encode(query).tolist()
# ================= HTML β†’ MARKDOWN RENDER =================
def render_answer(answer: str):
"""
Converts StackOverflow-style HTML into clean Streamlit output
"""
# Split text and code blocks
parts = re.split(r"<pre><code>|</code></pre>", answer)
for i, part in enumerate(parts):
if i % 2 == 0:
# Normal text β†’ remove simple HTML tags
clean_text = re.sub(r"<.*?>", "", part)
if clean_text.strip():
st.markdown(clean_text)
else:
# Code block
code = part.strip()
st.code(code, language="python") # default language
# ================= CHAT STATE =================
def init_chat():
if "messages" not in st.session_state:
st.session_state.messages = []
# ================= MAIN APP =================
def main():
st.set_page_config(
page_title="CodeSeek AI",
page_icon="πŸ”Ž",
layout="wide"
)
init_chat()
ensure_dataset()
# ================= SIDEBAR =================
with st.sidebar:
st.title("βš™οΈ Settings")
top_k = st.slider("Number of results", 1, 10, 5)
st.markdown("---")
if st.button("πŸ—‘οΈ Clear Chat"):
st.session_state.messages = []
st.rerun()
# ================= HEADER =================
st.title("πŸ”Ž CodeSeek AI")
st.caption("Semantic Programming Search (Chat Style)")
# ================= DISPLAY CHAT =================
for msg in st.session_state.messages:
with st.chat_message(msg["role"]):
if msg["role"] == "assistant":
render_answer(msg["content"])
else:
st.markdown(msg["content"])
# ================= INPUT =================
user_input = st.chat_input("Ask a programming question...")
if user_input:
# Store user message
st.session_state.messages.append({
"role": "user",
"content": user_input
})
with st.chat_message("user"):
st.markdown(user_input)
# Generate response
try:
with st.chat_message("assistant"):
with st.spinner("πŸ”Ž Searching..."):
engine = load_engine()
query_embedding = get_query_embedding(user_input.strip())
results = engine.search(query_embedding, top_k=top_k)
# Save raw response for rendering
full_response = ""
for i, item in enumerate(results, start=1):
st.markdown(f"### πŸ”Ή Result {i}")
st.markdown(f"**{item['question']}**")
render_answer(item["answer"])
st.caption(f"Score: {item['score']:.4f}")
st.divider()
# Save for history (raw)
full_response += f"{item['question']}\n{item['answer']}\n"
except Exception as e:
full_response = f"Error: {e}"
with st.chat_message("assistant"):
st.error(full_response)
# Store assistant message
st.session_state.messages.append({
"role": "assistant",
"content": full_response
})
# ================= RUN =================
if __name__ == "__main__":
main()