import streamlit as st
import requests
import json
import html
import sys
import os
import importlib
# ------------------------------------------
# Add project root + eval folder to path
# ------------------------------------------
CURRENT_DIR = os.path.dirname(__file__)
SRC_DIR = os.path.dirname(CURRENT_DIR)
PROJECT_ROOT = os.path.dirname(SRC_DIR)
sys.path.append(PROJECT_ROOT)
sys.path.append(os.path.join(PROJECT_ROOT, "eval"))
# ------------------------------------------
# FORCE RELOAD evaluate module
# ------------------------------------------
import eval.evaluate as eval_module
importlib.reload(eval_module)
from eval.evaluate import run_evaluation
API_GATEWAY_URL = "http://localhost:8000"
st.set_page_config(
page_title="Doc-Fetch",
layout="wide",
initial_sidebar_state="expanded", # Changed from "collapsed" to "expanded"
)
# =======================
# Disable Browser Autocomplete
# =======================
st.markdown("""
""", unsafe_allow_html=True)
# =======================
# UI STYLING
# =======================
st.markdown("""
""", unsafe_allow_html=True)
# =======================
# SIDEBAR (Settings)
# =======================
with st.sidebar:
st.markdown("### ⚙️ Settings")
top_k = st.slider("Top-K Results", 1, 10, 5)
url_input = st.text_input("API Endpoint", API_GATEWAY_URL)
st.divider()
st.subheader(" Evaluation")
run_eval = st.button("Run Evaluation Script")
st.divider()
st.caption("Semantic Search · Smart Cache · FAISS Retrieval · Multi-Service Architecture · Relevance by MiniLM Embeddings. Reasoning by LLM.")
API_GATEWAY_URL = url_input
# =======================
# MAIN HEADER
# =======================
col1, col2, col3 = st.columns([1, 6, 1])
with col2:
# Use HTML for the gradient text title
st.markdown('
Hello, Explorer
', unsafe_allow_html=True)
st.markdown('
How can I help you find documents today?
', unsafe_allow_html=True)
# =======================
# SEARCH BAR CENTERED
# =======================
# Centering the search bar using columns
sc1, sc2, sc3 = st.columns([1, 4, 1])
with sc2:
query = st.text_input(
"Search Query", # Label hidden by CSS/Config if needed, or set visibility hidden
placeholder="Ask a question about your documents...",
label_visibility="collapsed"
)
# Buttons row
b1, b2, b3 = st.columns([2, 1, 2])
with b2:
submit_btn = st.button("Sparkle Search", type="primary", use_container_width=True)
# =======================
# SEARCH HANDLER
# =======================
if (submit_btn or query) and query.strip():
# Gemini-style spinner
with st.spinner(" Analyzing semantics..."):
response = requests.post(
f"{API_GATEWAY_URL}/search",
json={"query": query, "top_k": top_k}
)
if response.status_code != 200:
st.error(f"❌ Connection Error: {response.text}")
st.stop()
try:
data = response.json()
except:
st.error("❌ Invalid JSON response.")
st.stop()
if "results" not in data:
st.info("No relevant documents found for that query.")
st.stop()
# Results Header
st.markdown("### Search Results")
st.markdown("---")
# =======================
# DISPLAY RESULTS (Card Style)
# =======================
for item in data["results"]:
filename = item["filename"]
score = item["score"]
explanation = item["explanation"]
preview = item["preview"]
full_text = item["full_text"]
safe_preview = html.escape(preview)
# Prepare keyword HTML
keywords = explanation.get("keyword_overlap", [])
keyword_html = ""
if keywords:
keyword_html = "".join([f"{kw}" for kw in keywords])
# Doc Icon (SVG) - Changed stroke to dark blue for visibility on light bg
doc_icon = """"""
# Main Card Render
st.markdown(f"""
{doc_icon} {filename}
match: {score:.4f}
{safe_preview}...
Keyword Overlap:
{keyword_html}
""", unsafe_allow_html=True)
# Details Expander (Standard Streamlit but styled via global CSS)
with st.expander(f"View Document Insights: Semantic Overlap, Top Sentences, LLM Reasoning & Full Text for {filename}"):
overlap_ratio = explanation.get("overlap_ratio", 0)
sentences = explanation.get("top_sentences", [])
st.caption(f"Semantic Overlap Ratio: {overlap_ratio:.3f}")
if sentences:
st.markdown("**Key Excerpts:**")
for s in sentences:
# Updated quote box for light mode
st.markdown(f"""
"{s['sentence']}"(conf: {s['score']:.2f})
""", unsafe_allow_html=True)
llm_expl = explanation.get("llm_explanation")
if llm_expl:
st.markdown("**Why this document?**")
st.write(llm_expl)
st.markdown("---")
st.markdown("**📄 Full Document Content:**")
st.code(full_text, language="text") # Using code block for better readability of raw text
if run_eval:
st.info("Running evaluation... this may take 10–20 seconds...")
results = run_evaluation(top_k=10)
st.success("Evaluation Complete!")
# -----------------------------
# Summary Metrics
# -----------------------------
st.markdown("## Evaluation Summary")
c1, c2, c3, c4 = st.columns(4)
with c1:
st.metric("Accuracy", f"{results['accuracy']}%")
with c2:
st.metric("MRR", results["mrr"])
with c3:
st.metric("NDCG", results["ndcg"])
with c4:
st.metric("Queries", results["total_queries"])
st.markdown(
f"**Correct:** {results['correct_count']} | "
f"**Incorrect:** {results['incorrect_count']}"
)
st.markdown("---")
# -----------------------------
# Incorrect Results
# -----------------------------
st.markdown("## Incorrect Fetches ")
wrong = [d for d in results["details"] if not d["is_correct"]]
if wrong:
for item in wrong:
st.markdown(f"""
""", unsafe_allow_html=True)
else:
st.success(" No incorrect queries!")
st.markdown("---")
# -----------------------------
# Correct Results
# -----------------------------
st.markdown("## Correct Fetches")
correct_items = [d for d in results["details"] if d["is_correct"]]
if correct_items:
for item in correct_items:
st.markdown(f"""