import streamlit as st import requests import json import html import sys import os import importlib # ------------------------------------------ # Add project root + eval folder to path # ------------------------------------------ CURRENT_DIR = os.path.dirname(__file__) SRC_DIR = os.path.dirname(CURRENT_DIR) PROJECT_ROOT = os.path.dirname(SRC_DIR) sys.path.append(PROJECT_ROOT) sys.path.append(os.path.join(PROJECT_ROOT, "eval")) # ------------------------------------------ # FORCE RELOAD evaluate module # ------------------------------------------ import eval.evaluate as eval_module importlib.reload(eval_module) from eval.evaluate import run_evaluation API_GATEWAY_URL = "http://localhost:8000" st.set_page_config( page_title="Doc-Fetch", layout="wide", initial_sidebar_state="expanded", # Changed from "collapsed" to "expanded" ) # ======================= # Disable Browser Autocomplete # ======================= st.markdown(""" """, unsafe_allow_html=True) # ======================= # UI STYLING # ======================= st.markdown(""" """, unsafe_allow_html=True) # ======================= # SIDEBAR (Settings) # ======================= with st.sidebar: st.markdown("### ⚙️ Settings") top_k = st.slider("Top-K Results", 1, 10, 5) url_input = st.text_input("API Endpoint", API_GATEWAY_URL) st.divider() st.subheader(" Evaluation") run_eval = st.button("Run Evaluation Script") st.divider() st.caption("Semantic Search · Smart Cache · FAISS Retrieval · Multi-Service Architecture · Relevance by MiniLM Embeddings. Reasoning by LLM.") API_GATEWAY_URL = url_input # ======================= # MAIN HEADER # ======================= col1, col2, col3 = st.columns([1, 6, 1]) with col2: # Use HTML for the gradient text title st.markdown('
Hello, Explorer
', unsafe_allow_html=True) st.markdown('
How can I help you find documents today?
', unsafe_allow_html=True) # ======================= # SEARCH BAR CENTERED # ======================= # Centering the search bar using columns sc1, sc2, sc3 = st.columns([1, 4, 1]) with sc2: query = st.text_input( "Search Query", # Label hidden by CSS/Config if needed, or set visibility hidden placeholder="Ask a question about your documents...", label_visibility="collapsed" ) # Buttons row b1, b2, b3 = st.columns([2, 1, 2]) with b2: submit_btn = st.button("Sparkle Search", type="primary", use_container_width=True) # ======================= # SEARCH HANDLER # ======================= if (submit_btn or query) and query.strip(): # Gemini-style spinner with st.spinner(" Analyzing semantics..."): response = requests.post( f"{API_GATEWAY_URL}/search", json={"query": query, "top_k": top_k} ) if response.status_code != 200: st.error(f"❌ Connection Error: {response.text}") st.stop() try: data = response.json() except: st.error("❌ Invalid JSON response.") st.stop() if "results" not in data: st.info("No relevant documents found for that query.") st.stop() # Results Header st.markdown("### Search Results") st.markdown("---") # ======================= # DISPLAY RESULTS (Card Style) # ======================= for item in data["results"]: filename = item["filename"] score = item["score"] explanation = item["explanation"] preview = item["preview"] full_text = item["full_text"] safe_preview = html.escape(preview) # Prepare keyword HTML keywords = explanation.get("keyword_overlap", []) keyword_html = "" if keywords: keyword_html = "".join([f"{kw}" for kw in keywords]) # Doc Icon (SVG) - Changed stroke to dark blue for visibility on light bg doc_icon = """""" # Main Card Render st.markdown(f"""
{doc_icon} {filename}
match: {score:.4f}

{safe_preview}...

Keyword Overlap:
{keyword_html}
""", unsafe_allow_html=True) # Details Expander (Standard Streamlit but styled via global CSS) with st.expander(f"View Document Insights: Semantic Overlap, Top Sentences, LLM Reasoning & Full Text for {filename}"): overlap_ratio = explanation.get("overlap_ratio", 0) sentences = explanation.get("top_sentences", []) st.caption(f"Semantic Overlap Ratio: {overlap_ratio:.3f}") if sentences: st.markdown("**Key Excerpts:**") for s in sentences: # Updated quote box for light mode st.markdown(f"""
"{s['sentence']}" (conf: {s['score']:.2f})
""", unsafe_allow_html=True) llm_expl = explanation.get("llm_explanation") if llm_expl: st.markdown("**Why this document?**") st.write(llm_expl) st.markdown("---") st.markdown("**📄 Full Document Content:**") st.code(full_text, language="text") # Using code block for better readability of raw text if run_eval: st.info("Running evaluation... this may take 10–20 seconds...") results = run_evaluation(top_k=10) st.success("Evaluation Complete!") # ----------------------------- # Summary Metrics # ----------------------------- st.markdown("## Evaluation Summary") c1, c2, c3, c4 = st.columns(4) with c1: st.metric("Accuracy", f"{results['accuracy']}%") with c2: st.metric("MRR", results["mrr"]) with c3: st.metric("NDCG", results["ndcg"]) with c4: st.metric("Queries", results["total_queries"]) st.markdown( f"**Correct:** {results['correct_count']}   |   " f"**Incorrect:** {results['incorrect_count']}" ) st.markdown("---") # ----------------------------- # Incorrect Results # ----------------------------- st.markdown("## Incorrect Fetches ") wrong = [d for d in results["details"] if not d["is_correct"]] if wrong: for item in wrong: st.markdown(f"""
Query: {item['query']}
Expected: {item['expected']}
Retrieved: {item['retrieved']}
Rank: {item['rank']}
""", unsafe_allow_html=True) else: st.success(" No incorrect queries!") st.markdown("---") # ----------------------------- # Correct Results # ----------------------------- st.markdown("## Correct Fetches") correct_items = [d for d in results["details"] if d["is_correct"]] if correct_items: for item in correct_items: st.markdown(f"""
Query: {item['query']}
Expected: {item['expected']}
Top-K Retrieved: {item['retrieved']}
Rank: {item['rank']}
""", unsafe_allow_html=True) else: st.info("No correct queries.") st.markdown("---") # ----------------------------- # Full Table # ----------------------------- st.markdown("## Full Evaluation Table") table_data = [] for item in results["details"]: table_data.append({ "Query": item["query"], "Expected Doc": item["expected"], "Retrieved (Top-10)": ", ".join(item["retrieved"]), "Correct?": "Yes" if item["is_correct"] else "No", "Rank": item["rank"] }) st.dataframe(table_data, use_container_width=True)