XQ
Update description
50cf919
raw
history blame
47.1 kB
"""Streamlit frontend for Dokumentintelligens-system.
Calls the FastAPI backend at http://localhost:8000.
Single-page document search interface with clean sans-serif design.
"""
import datetime
import html
import json
import os
import random
import uuid
import extra_streamlit_components as stx
import streamlit as st
import requests
API_BASE = os.environ.get("API_BASE_URL", "http://localhost:8000")
# Cookie name used to persist the per-browser session ID across page reloads.
_SESSION_COOKIE_NAME = "kuda_session_id"
_SESSION_COOKIE_TTL_DAYS = 30
# ---------------------------------------------------------------------------
# Example questions — drawn from the documents in docs/
# ---------------------------------------------------------------------------
EXAMPLE_QUESTIONS: list[str] = [
"Hvad er reglerne for brug af generativ AI til eksamen på KU?",
"Hvordan håndteres uansøgt afsked begrundet i institutionens forhold?",
"Hvad er de disciplinære foranstaltninger over for studerende?",
"Hvordan skal klager over medarbejdere og ledere behandles?",
"Hvad er retningslinjerne for afholdelse af MUS-samtaler?",
"Hvordan er års- og skemastrukturen organiseret på KU?",
"Hvilke regler gælder for eksamenstilmelding og afmelding?",
"Hvordan skal studerende dokumentere brug af GAI i skriftlige opgaver?",
"Hvad er kommunernes ansvar ved brug af generativ AI?",
"Hvilke principper gælder for akademisk integritet ved brug af AI?",
"Hvornår kan en leder afvise en klage som åbenbart grundløs?",
"Hvad er reglerne for forlænget tid til eksamen?",
]
# ---------------------------------------------------------------------------
# Internationalisation — all UI strings live here
# ---------------------------------------------------------------------------
TEXTS: dict[str, dict[str, str]] = {
"da": {
"page_title": "Dokumentintelligens-system",
"lang_label": "Sprog",
"sidebar_heading": "Om systemet",
"sidebar_body": (
"- **Python + FastAPI** REST-backend\n"
"- **Ustruktureret data** — File-parsing, preprocessing, "
"tre chunking-strategier\n"
"- **Embedding-modeller** — flersproget semantisk "
"vektorrepræsentation\n"
"- **Vektordatabase + hybrid søgning** — Qdrant (semantisk) "
"+ BM25 (leksikalsk)\n"
"- **Reranking** — cross-encoder for præcis relevans\n"
"- **RAG-arkitektur** — LangChain + LangGraph-orkestreret pipeline\n"
"- **LLM-integration** — provider-agnostisk, prompt-styret "
"svargenerering\n"
"- **Evaluering** — RAGAS-baseret kvalitetsmåling\n"
"- **Agent Flows** — LangGraph Plan-and-Execute med værktøjskald og samtalehukommelse\n"
"- [**Kildedokumenter**](https://github.com/Xiiqiing/Dokumentassistent/tree/main/docs)"
" — de dokumenter systemet er indekseret fra"
),
"chunking_label": "Chunking-strategi",
"chunking_help": "Vælg hvordan dokumenterne opdeles i tekststykker.",
"topk_label": "Antal kilder (top_k)",
"topk_help": "Antal dokumentfragmenter der hentes fra søgeindekset.",
"title": "Dokumentintelligens-system",
"title_badge": "",
"subtitle": (
"Et dokumentintelligens-system bygget på en RAG-arkitektur, dækkende file-indlæsning, semantisk chunking, "
"hybrid søgning med reranking "
"og LLM-genererede svar med kildehenvisninger. LLM-laget er provider-agnostisk. "
"To tilstande: en LangGraph Plan-and-Execute-agent (standard) med samtalehukommelse til komplekse forespørgsler, "
"og en foruddefineret pipeline til lette modeller. Søgekvaliteten evalueres med RAGAS. "
'Vidensbasen indeholder <a href="https://github.com/Xiiqiing/Dokumentassistent/tree/main/docs" target="_blank">eksempler på dokumenter</a> om universitetsregler og -forskrifter. Prøv at stille et spørgsmål om den.'
),
"search_label": "Stil et spørgsmål om ... ",
"search_placeholder": "F.eks.: Hvad er reglerne for behandling af personoplysninger?",
"search_button": "Søg",
"example_button": "Tilfældigt eksempel",
"spinner": "Søger i dokumenterne ...",
"status_label": "Tænker ...",
"status_done": "Færdig",
"status_error": "Noget gik galt",
"confidence_label": "Konfidensgrad",
"intent_label": "Intent",
"strategy_label": "Strategi",
"no_answer": "Intet svar modtaget.",
"sources_label": "Kilder",
"page_label": "side",
"no_sources": "Ingen kilder fundet for denne forespørgsel.",
"empty_warning": "Indtast venligst et spørgsmål.",
"err_connection": (
"Kunne ikke oprette forbindelse til API-serveren. "
"Kontroller at backend kører på http://localhost:8000."
),
"err_api": "API-fejl",
"err_rate_limit": "For mange samtidige forespørgsler, eller API-kvoten er midlertidigt opbrugt. Vent venligst et øjeblik, og prøv igen.",
"err_timeout": "Forespørgslen tog for lang tid. Prøv igen.",
"unknown": "ukendt",
"model_heading": "Aktuel model",
"model_llm": "LLM",
"model_embedding": "Embedding",
"model_unavailable": "Kunne ikke hente modelinfo.",
"pipeline_heading": "Pipeline-detaljer",
"pipeline_translation": "Oversættelse",
"pipeline_original": "Original forespørgsel",
"pipeline_translated": "Oversat til dansk",
"pipeline_lang": "Sprog registreret",
"pipeline_no_translation": "Ingen oversættelse nødvendig",
"pipeline_bm25": "BM25-resultater (leksikalsk søgning)",
"pipeline_dense": "Vektorsøgning (semantisk)",
"pipeline_fused": "RRF-fusioneret rækkefølge",
"pipeline_reranked": "Reranking (endelig rækkefølge)",
"pipeline_doc": "Dokument",
"pipeline_score": "Score",
"pipeline_rank": "#",
"pipeline_no_results": "Ingen resultater",
"pipeline_score_change": "Score-ændring",
"pipeline_plan_steps": "Udførelsesplan",
"pipeline_tool_calls": "Værktøjskald",
"synthesize_status": "Syntetiserer endeligt svar ...",
"example_note": "",
},
"en": {
"page_title": "Document Intelligence System",
"lang_label": "Language",
"sidebar_heading": "About the system",
"sidebar_body": (
"- **Python + FastAPI** REST backend\n"
"- **Unstructured data** — File parsing, preprocessing, "
"three chunking strategies\n"
"- **Embedding models** — multilingual semantic vector "
"representations\n"
"- **Vector database + hybrid search** — Qdrant (semantic) "
"+ BM25 (lexical)\n"
"- **Reranking** — cross-encoder for precise relevance\n"
"- **RAG architecture** — LangChain + LangGraph-orchestrated pipeline\n"
"- **LLM integration** — provider-agnostic, prompt-driven "
"answer generation\n"
"- **Evaluation** — RAGAS-based quality measurement\n"
"- **Agent Flows** — LangGraph Plan-and-Execute with tool calling and conversation memory\n"
"- [**Source documents**](https://github.com/Xiiqiing/Dokumentassistent/tree/main/docs)"
" — the documents indexed into the knowledge base"
),
"chunking_label": "Chunking strategy",
"chunking_help": "Choose how documents are split into text chunks.",
"topk_label": "Number of sources (top_k)",
"topk_help": "Number of document fragments retrieved from the search index.",
"title": "Document Intelligence System",
"title_badge": "",
"subtitle": (
"A document intelligence system built on a RAG architecture, covering file ingestion, semantic chunking, "
"hybrid retrieval with reranking, "
"and LLM-generated answers with source citations. The LLM layer is provider-agnostic. "
"Two modes: a LangGraph Plan-and-Execute agent (default) with conversation memory for complex multi-step queries, "
"and a predefined pipeline for lightweight models. "
"Retrieval quality is evaluated with RAGAS. "
'The knowledge base contains <a href="https://github.com/Xiiqiing/Dokumentassistent/tree/main/docs" target="_blank"> example documents</a> of university rules and regulations. Try to ask questions about it.'
),
"search_label": "Ask a question ...",
"search_placeholder": "E.g.: What are the rules for processing personal data?",
"search_button": "Search",
"example_button": "Random question",
"spinner": "Searching documents ...",
"status_label": "Thinking ...",
"status_done": "Done",
"status_error": "Something went wrong",
"confidence_label": "Confidence",
"intent_label": "Intent",
"strategy_label": "Strategy",
"no_answer": "No answer received.",
"sources_label": "Sources",
"page_label": "page",
"no_sources": "No sources found for this query.",
"empty_warning": "Please enter a question.",
"err_connection": (
"Could not connect to the API server. "
"Make sure the backend is running at http://localhost:8000."
),
"err_api": "API error",
"err_rate_limit": "Too many simultaneous requests, or API quota temporarily exhausted. Please wait a moment and try again.",
"err_timeout": "The request took too long. Please try again.",
"unknown": "unknown",
"model_heading": "Current model",
"model_llm": "LLM",
"model_embedding": "Embedding",
"model_unavailable": "Could not fetch model info.",
"pipeline_heading": "Pipeline Details",
"pipeline_translation": "Query Translation",
"pipeline_original": "Original query",
"pipeline_translated": "Translated to Danish",
"pipeline_lang": "Detected language",
"pipeline_no_translation": "No need for translation",
"pipeline_bm25": "BM25 Results (lexical search)",
"pipeline_dense": "Vector Search (semantic)",
"pipeline_fused": "RRF Fused Ranking",
"pipeline_reranked": "Reranked (final ranking)",
"pipeline_doc": "Document",
"pipeline_score": "Score",
"pipeline_rank": "#",
"pipeline_no_results": "No results",
"pipeline_score_change": "Score change",
"pipeline_plan_steps": "Execution Plan",
"pipeline_tool_calls": "Tool Calls",
"synthesize_status": "Synthesizing final answer ...",
"example_note": "",
},
}
# ---------------------------------------------------------------------------
# Page config
# ---------------------------------------------------------------------------
st.set_page_config(
page_title="Dokumentintelligens-system",
page_icon="📄",
layout="centered",
)
st.markdown('<meta name="robots" content="noindex, nofollow">', unsafe_allow_html=True)
# ---------------------------------------------------------------------------
# Per-browser session ID — persisted in a cookie so chat history survives
# page refreshes. Falls back to a freshly generated UUID if the cookie is
# not yet readable (first visit, or before the JS component has initialised).
#
# CookieManager must be instantiated directly on every rerun (it cannot be
# wrapped in @st.cache_resource because its constructor calls a widget
# command). Streamlit treats it as the same widget across reruns thanks to
# the stable `key` argument.
# ---------------------------------------------------------------------------
_cookie_manager = stx.CookieManager(key="kuda_cookie_manager")
_cookies = _cookie_manager.get_all()
# CookieManager loads cookies asynchronously via a JS component. On the very
# first script run after a page load, get_all() returns None because the
# component has not yet reported back. Stop here and wait for the rerun the
# component triggers once it delivers the browser's cookies — otherwise we
# would always see "no cookie" on first render and overwrite any existing
# session_id with a fresh UUID.
if _cookies is None:
st.stop()
_existing_sid = _cookies.get(_SESSION_COOKIE_NAME)
if _existing_sid:
# Cookie present → reuse it so the backend can find prior turns.
st.session_state["session_id"] = _existing_sid
elif "session_id" not in st.session_state:
# No cookie yet → mint a fresh ID and persist it for next reload.
new_sid = str(uuid.uuid4())
st.session_state["session_id"] = new_sid
_cookie_manager.set(
_SESSION_COOKIE_NAME,
new_sid,
expires_at=datetime.datetime.now()
+ datetime.timedelta(days=_SESSION_COOKIE_TTL_DAYS),
key="kuda_set_session_cookie",
)
# ---------------------------------------------------------------------------
# Analytics — Umami Cloud
# ---------------------------------------------------------------------------
# `st.html` injects via React's dangerouslySetInnerHTML, and scripts inserted
# through innerHTML never execute (HTML5 spec). We instead use a tiny iframe
# bootstrap (via components.html) that attaches the real Umami script to the
# parent document, so analytics track the actual Streamlit page URL.
import streamlit.components.v1 as components # noqa: E402
components.html(
"""
<script>
(function () {
var doc = window.parent.document;
if (doc.querySelector('script[data-website-id="cf6c908e-1236-4406-8c02-88aa7c9a0db2"]')) {
return;
}
var s = doc.createElement('script');
s.async = true;
s.defer = true;
s.src = 'https://cloud.umami.is/script.js';
s.setAttribute('data-website-id', 'cf6c908e-1236-4406-8c02-88aa7c9a0db2');
doc.head.appendChild(s);
})();
</script>
""",
height=0,
width=0,
)
# ---------------------------------------------------------------------------
# Custom CSS -- Clean sans-serif design
# ---------------------------------------------------------------------------
st.markdown(
"""
<style>
/* ---------- Global ---------- */
html, body, [class*="css"] {
font-family: 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
color: #333333;
background-color: #FFFFFF;
}
/* Hide default Streamlit branding but keep the sidebar toggle */
#MainMenu, footer {visibility: hidden;}
header[data-testid="stHeader"] {background: transparent;}
/* ---------- Accent line ---------- */
.accent-line {
width: 100%;
height: 4px;
background-color: #901A1E;
margin-bottom: 1.5rem;
}
/* ---------- Title ---------- */
.app-title {
font-family: 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
font-size: 2.2rem;
font-weight: 700;
color: #901A1E;
margin: 0 0 0.4rem 0;
letter-spacing: -0.02em;
white-space: nowrap;
}
@media (max-width: 640px) {
.app-title {
font-size: clamp(1.3rem, 6vw, 2.2rem);
white-space: nowrap;
}
}
.app-subtitle {
font-family: 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
font-size: 1.05rem;
color: #666666;
margin: 0 0 2rem 0;
line-height: 1.6;
}
.app-subtitle a {
color: #901A1E;
text-decoration: underline;
}
.app-subtitle a:hover {
color: #6B1315;
}
@media (max-width: 640px) {
.app-subtitle {
font-size: 0.82rem;
line-height: 1.5;
}
}
/* ---------- Sidebar ---------- */
section[data-testid="stSidebar"] {
background-color: #FAFAFA;
border-right: 1px solid #E0E0E0;
}
/* Sidebar collapse button: always visible & KU red */
button[data-testid="stBaseButton-headerNoPadding"] {
opacity: 1 !important;
visibility: visible !important;
color: #901A1E !important;
}
button[data-testid="stBaseButton-headerNoPadding"] svg {
stroke: #901A1E !important;
color: #901A1E !important;
}
button[data-testid="stBaseButton-headerNoPadding"]:hover {
color: #6B1315 !important;
}
button[data-testid="stBaseButton-headerNoPadding"]:hover svg {
stroke: #6B1315 !important;
color: #6B1315 !important;
}
section[data-testid="stSidebar"] > div:first-child {
padding-top: 1rem;
padding-left: 1.2rem;
padding-right: 1.2rem;
}
/* ---------- Shrink main area top gap to align title with sidebar heading ---------- */
.block-container {
padding-top: 0.2rem !important;
}
/* Tighten language selector row so it doesn't push the title down */
div[data-testid="stHorizontalBlock"]:has(div[role="radiogroup"]) {
margin-top: 0 !important;
margin-bottom: 0 !important;
}
div[role="radiogroup"] {
margin-top: 0 !important;
margin-bottom: 0 !important;
}
section[data-testid="stSidebar"] .sidebar-heading {
font-family: 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
font-size: 1.2rem;
font-weight: 700;
color: #901A1E;
margin-bottom: 0.5rem;
}
section[data-testid="stSidebar"] p,
section[data-testid="stSidebar"] li {
font-size: 0.92rem;
color: #555555;
line-height: 1.55;
}
section[data-testid="stSidebar"] ul {
padding-left: 1.2rem;
margin: 0.4rem 0 0 0;
list-style-position: outside;
}
section[data-testid="stSidebar"] li {
padding-left: 0.2rem;
margin-bottom: 0.35rem;
}
/* ---------- Source card ---------- */
.source-card {
border: 1px solid #CCCCCC;
padding: 1rem 1.2rem;
margin-bottom: 0.75rem;
background-color: #FAFAFA;
}
.source-card-title {
font-weight: 600;
color: #333333;
font-size: 0.95rem;
margin-bottom: 0.3rem;
}
.source-card-text {
font-size: 0.88rem;
color: #555555;
line-height: 1.55;
}
.source-card-meta {
font-size: 0.8rem;
color: #888888;
margin-top: 0.4rem;
}
/* ---------- Result metadata ---------- */
.result-meta {
font-size: 0.88rem;
color: #666666;
margin-bottom: 1.2rem;
padding-bottom: 0.8rem;
border-bottom: 1px solid #E0E0E0;
}
/* ---------- Answer area ---------- */
.answer-block {
font-size: 1.05rem;
line-height: 1.7;
color: #333333;
margin-bottom: 1.5rem;
}
/* ---------- Search form container — equal padding on all sides ---------- */
[data-testid="stForm"] {
padding: 1.5rem !important;
}
/* ---------- Inputs ---------- */
.stTextInput {
margin-bottom: -0.5rem !important;
}
.stTextInput > div > div > input {
border-radius: 0 !important;
border: 1px solid #999999 !important;
font-family: 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif !important;
}
.stTextInput > div > div > input:focus {
border-color: #901A1E !important;
box-shadow: none !important;
}
/* ---------- Button ---------- */
.stButton > button {
border-radius: 0 !important;
background-color: #901A1E !important;
color: #FFFFFF !important;
border: none !important;
font-family: 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif !important;
font-size: 0.95rem !important;
padding: 0.5rem 2rem !important;
letter-spacing: 0.02em;
}
.stButton > button:hover {
background-color: #7A1619 !important;
}
.stButton > button:active {
background-color: #611114 !important;
}
/* ---------- Slider ---------- */
.stSlider [data-baseweb="slider"] div[role="slider"] {
background-color: #901A1E !important;
}
/* ---------- Selectbox ---------- */
.stSelectbox > div > div {
border-radius: 0 !important;
}
/* ---------- Language toggle (the only st.radio on the page) ---------- */
/* Collapse vertical space around the toggle row */
[data-testid="stRadio"] {
margin: 0 !important;
padding: 0 !important;
}
[data-testid="stRadio"] [role="radiogroup"] label {
min-height: 0 !important;
padding-top: 0 !important;
padding-bottom: 0 !important;
}
/* Reduce gap between language row and accent line */
[data-testid="stHorizontalBlock"]:first-child {
margin-bottom: -0.8rem !important;
}
/* Hide the "Language" label */
[data-testid="stRadio"] > label {
display: none !important;
}
[data-testid="stRadio"] [role="radiogroup"] {
gap: 0.15rem !important;
justify-content: flex-end;
align-items: center;
}
/* Hide the radio dot circle */
[data-testid="stRadio"] [role="radiogroup"] label > div:first-child {
display: none !important;
}
/* Base style for both options */
[data-testid="stRadio"] [role="radiogroup"] label {
background: none !important;
border: none !important;
padding: 0 !important;
margin: 0 !important;
min-height: 0 !important;
}
[data-testid="stRadio"] [role="radiogroup"] label,
[data-testid="stRadio"] [role="radiogroup"] label * {
font-size: 0.92rem !important;
font-weight: 600 !important;
color: #999999 !important;
cursor: pointer;
line-height: 1.2 !important;
}
/* Active / checked option → KU red */
[data-testid="stRadio"] [role="radiogroup"] label[data-checked="true"],
[data-testid="stRadio"] [role="radiogroup"] label[data-checked="true"] *,
[data-testid="stRadio"] [role="radiogroup"] label:has(input:checked),
[data-testid="stRadio"] [role="radiogroup"] label:has(input:checked) * {
color: #901A1E !important;
}
/* Separator between the two options */
[data-testid="stRadio"] [role="radiogroup"] label:first-child::after {
content: "|";
color: #CCCCCC;
font-weight: 400;
margin-left: 0.5rem;
font-size: 0.92rem;
}
/* ---------- Animated thinking dots on st.status label ---------- */
/* st.status renders as <details data-testid="stExpander"> in Streamlit 1.56;
we scope the animation to the running state by requiring the spinner icon. */
@keyframes thinking-dots {
0% { width: 0; }
100% { width: 1.5em; }
}
details[data-testid="stExpander"]:has([data-testid="stExpanderIconSpinner"])
summary [data-testid="stMarkdownContainer"] p::after {
content: "...";
display: inline-block;
width: 0;
overflow: hidden;
vertical-align: bottom;
white-space: nowrap;
margin-left: 0.15em;
animation: thinking-dots 1.2s steps(4, end) infinite;
}
/* ---------- Expander ---------- */
.streamlit-expanderHeader {
font-family: 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif !important;
font-size: 1rem !important;
color: #333333 !important;
}
/* ---------- Footer ---------- */
/* Clear transforms on ALL ancestors so position:fixed works */
*:has(> .app-footer),
*:has(.app-footer) {
transform: none !important;
}
.app-footer {
position: fixed;
bottom: 0;
left: 0;
right: 0;
z-index: 1000001;
background-color: #FFFFFF;
text-align: center;
border-top: 1px solid #E0E0E0;
padding: 0.55rem 1.5rem;
font-family: 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
font-size: 0.82rem;
color: #888888;
}
.app-footer a {
color: #555555;
text-decoration: none;
}
.app-footer a:hover {
color: #901A1E;
}
.app-footer svg {
vertical-align: middle;
margin-right: 0.25rem;
}
.app-footer .footer-sep {
margin: 0 0.6rem;
color: #CCCCCC;
}
/* Push main content above the fixed footer */
.block-container {
padding-bottom: 4rem !important;
}
@media (max-width: 640px) {
.app-footer {
padding: 0.45rem 1rem;
font-size: 0.78rem;
}
.app-footer .footer-label {
display: none;
}
.app-footer svg {
margin-right: 0;
}
.app-footer .footer-sep {
margin: 0 0.5rem;
}
.block-container {
padding-bottom: 5rem !important;
}
}
</style>
""",
unsafe_allow_html=True,
)
# ---------------------------------------------------------------------------
# Language selector -- right-aligned toggle styled in KU red
# ---------------------------------------------------------------------------
_col_spacer, _col_lang = st.columns([5, 1.5])
with _col_lang:
lang = st.radio(
"Language",
options=["da", "en"],
format_func=lambda c: "Dansk" if c == "da" else "English",
index=0,
horizontal=True,
label_visibility="collapsed",
)
t = TEXTS[lang]
# ---------------------------------------------------------------------------
# Sidebar
# ---------------------------------------------------------------------------
with st.sidebar:
st.markdown(
f'<div class="sidebar-heading">{t["sidebar_heading"]}</div>',
unsafe_allow_html=True,
)
st.markdown(t["sidebar_body"])
st.markdown("---")
strategy = st.selectbox(
t["chunking_label"],
options=["fixed_size", "recursive", "semantic"],
index=2,
help=t["chunking_help"],
)
top_k = st.slider(
t["topk_label"],
min_value=1,
max_value=20,
value=5,
help=t["topk_help"],
)
st.markdown("---")
try:
_health = requests.get(f"{API_BASE}/health", timeout=5).json()
_llm = _health.get("llm_model", "")
_llm_prov = _health.get("llm_provider", "")
_emb = _health.get("embedding_model", "")
_emb_prov = _health.get("embedding_provider", "")
st.markdown(
f'<div class="sidebar-heading">{t["model_heading"]}</div>',
unsafe_allow_html=True,
)
st.markdown(
f'**{t["model_llm"]}:** {_llm} ({_llm_prov}) \n'
f'**{t["model_embedding"]}:** {_emb} ({_emb_prov})'
)
except Exception:
st.caption(t["model_unavailable"])
# ---------------------------------------------------------------------------
# Main content
# ---------------------------------------------------------------------------
# Accent line
st.markdown('<div class="accent-line"></div>', unsafe_allow_html=True)
# Title block
st.markdown(
f'<div class="app-title">{t["title"]}</div>',
unsafe_allow_html=True,
)
# Subtitle placeholder — filled after we know whether search was clicked
_subtitle_slot = st.empty()
# ---------------------------------------------------------------------------
# Result rendering (extracted so it can be reused for cached results)
# ---------------------------------------------------------------------------
def _render_results(data: dict, t: dict, strategy: str, top_k: int) -> None:
"""Render query results: metadata bar, answer, sources, pipeline details."""
confidence = data.get("confidence", 0.0)
intent = data.get("intent", t["unknown"])
confidence_pct = f"{confidence * 100:.0f}%"
st.markdown(
f'<div class="result-meta">'
f'{t["confidence_label"]}: <strong>{confidence_pct}</strong> &nbsp;&middot;&nbsp; '
f'{t["intent_label"]}: <strong>{intent}</strong> &nbsp;&middot;&nbsp; '
f'{t["strategy_label"]}: <strong>{strategy}</strong> &nbsp;&middot;&nbsp; '
f"top_k: <strong>{top_k}</strong>"
f"</div>",
unsafe_allow_html=True,
)
answer = data.get("answer", t["no_answer"])
st.markdown(answer)
sources = data.get("sources", [])
if sources:
with st.expander(f'{t["sources_label"]} ({len(sources)})', expanded=False):
for src in sources:
doc_name = src.get("document_id", src.get("chunk_id", t["unknown"]))
text = src.get("text", "")
score = src.get("score", 0.0)
retrieval_source = src.get("source", "")
metadata = src.get("metadata", {})
page = metadata.get("page_number", "") if isinstance(metadata, dict) else ""
page_info = f' &middot; {t["page_label"]} {page}' if page else ""
score_display = f"{score:.3f}"
st.markdown(
f'<div class="source-card">'
f'<div class="source-card-title">{html.escape(doc_name)}{page_info}</div>'
f'<div class="source-card-text">{html.escape(text[:500])}</div>'
f'<div class="source-card-meta">'
f"Score: {score_display} &nbsp;&middot;&nbsp; {html.escape(retrieval_source)}"
f"</div>"
f"</div>",
unsafe_allow_html=True,
)
else:
st.info(t["no_sources"])
pd_details = data.get("pipeline_details", {})
if pd_details:
with st.expander(t["pipeline_heading"], expanded=False):
plan_steps = pd_details.get("plan_steps", [])
if plan_steps:
st.markdown(f'**{t["pipeline_plan_steps"]}**')
for i, step_item in enumerate(plan_steps, 1):
st.markdown(f"{i}. {step_item}")
st.markdown("---")
tool_calls = pd_details.get("tool_calls", [])
if tool_calls:
st.markdown(f'**{t["pipeline_tool_calls"]}**')
for tc in tool_calls:
st.markdown(f"- `{tc}`")
st.markdown("---")
if pd_details.get("translated"):
st.markdown(f'**{t["pipeline_translation"]}**')
st.markdown(
f'- {t["pipeline_lang"]}: **{pd_details.get("detected_language", "")}**\n'
f'- {t["pipeline_original"]}: {pd_details.get("original_query", "")}\n'
f'- {t["pipeline_translated"]}: {pd_details.get("retrieval_query", "")}'
)
st.markdown("---")
def _truncate_doc(name: str, max_len: int = 30) -> str:
return name if len(name) <= max_len else name[:max_len - 1] + "\u2026"
def _render_result_table(results: list[dict], label: str) -> None:
st.markdown(f"**{label}**")
if not results:
st.caption(t["pipeline_no_results"])
return
header = f'| {t["pipeline_rank"]} | {t["pipeline_doc"]} | {t["pipeline_score"]} |\n|---|---|---|'
rows = "\n".join(
f'| {i + 1} | {_truncate_doc(r.get("document_id", ""))} | {r.get("score", 0):.4f} |'
for i, r in enumerate(results)
)
st.markdown(f"{header}\n{rows}")
_has_retrieval = bool(
pd_details.get("dense_results") or pd_details.get("sparse_results") or pd_details.get("fused_results")
)
if _has_retrieval:
_render_result_table(pd_details.get("sparse_results", []), t["pipeline_bm25"])
st.markdown("---")
_render_result_table(pd_details.get("dense_results", []), t["pipeline_dense"])
st.markdown("---")
_render_result_table(pd_details.get("fused_results", []), t["pipeline_fused"])
st.markdown("---")
reranked = pd_details.get("reranked_results", [])
st.markdown(f'**{t["pipeline_reranked"]}**')
if reranked:
if _has_retrieval:
fused_scores: dict[str, float] = {
r.get("chunk_id", ""): r.get("score", 0.0)
for r in pd_details.get("fused_results", [])
}
header = (
f'| {t["pipeline_rank"]} | {t["pipeline_doc"]} | '
f'{t["pipeline_score"]} | {t["pipeline_score_change"]} |\n'
f"|---|---|---|---|"
)
rows_list = []
for i, r in enumerate(reranked):
cid = r.get("chunk_id", "")
new_score = r.get("score", 0.0)
old_score = fused_scores.get(cid)
if old_score is not None:
change = f"RRF {old_score:.4f} -> {new_score:.4f}"
else:
change = "-"
rows_list.append(
f'| {i + 1} | {_truncate_doc(r.get("document_id", ""))} | {new_score:.4f} | {change} |'
)
st.markdown(f"{header}\n" + "\n".join(rows_list))
else:
header = f'| {t["pipeline_rank"]} | {t["pipeline_doc"]} | {t["pipeline_score"]} |\n|---|---|---|'
rows = "\n".join(
f'| {i + 1} | {_truncate_doc(r.get("document_id", ""))} | {r.get("score", 0):.4f} |'
for i, r in enumerate(reranked)
)
st.markdown(f"{header}\n{rows}")
else:
st.caption(t["pipeline_no_results"])
# ---------------------------------------------------------------------------
# Search form
# ---------------------------------------------------------------------------
def _pick_example() -> None:
"""Select a random example question and store it in session state."""
st.session_state.query_input = random.choice(EXAMPLE_QUESTIONS)
with st.form(key="search_form", clear_on_submit=False):
question = st.text_input(
t["search_label"],
key="query_input",
placeholder=t["search_placeholder"],
)
col_search, col_example = st.columns([1, 1])
with col_search:
search_clicked = st.form_submit_button(t["search_button"], use_container_width=True)
with col_example:
st.form_submit_button(t["example_button"], on_click=_pick_example, use_container_width=True)
if t["example_note"]:
st.markdown(
f'<div style="text-align:right;font-size:0.85em;color:gray;">{t["example_note"]}</div>',
unsafe_allow_html=True,
)
# Show subtitle only when no search is active
if not search_clicked and not st.session_state.get("has_searched"):
_subtitle_slot.markdown(
f'<div class="app-subtitle">{t["subtitle"]}</div>',
unsafe_allow_html=True,
)
# ---------------------------------------------------------------------------
# Query logic
# ---------------------------------------------------------------------------
if search_clicked and question.strip():
st.session_state["has_searched"] = True
data: dict = {}
_sse_error: dict | None = None
with st.status(t["status_label"], expanded=True) as _status:
try:
with requests.post(
f"{API_BASE}/query/stream",
json={
"question": question.strip(),
"top_k": top_k,
"strategy": strategy,
"session_id": st.session_state["session_id"],
},
stream=True,
timeout=180,
) as _resp:
_resp.raise_for_status()
for _raw in _resp.iter_lines():
if not _raw:
continue
_line = _raw.decode("utf-8") if isinstance(_raw, bytes) else _raw
if not _line.startswith("data: "):
continue
try:
_event = json.loads(_line[6:])
except json.JSONDecodeError:
continue
_step = _event.get("step", "")
if _step == "detect":
_intent_val = _event.get("intent", "")
_lang_val = _event.get("language", "")
if lang == "da":
st.write(f"Intent: **{_intent_val}** · Sprog: **{_lang_val}**")
else:
st.write(f"Intent: **{_intent_val}** · Language: **{_lang_val}**")
elif _step == "translate":
if _event.get("translated"):
_rq = _event.get("retrieval_query", "")
st.write(
(f"Oversat til dansk: _{_rq}_")
if lang == "da"
else (f"Translated to Danish: _{_rq}_")
)
else:
st.write(
"Ingen oversættelse nødvendig for forespørgslen"
if lang == "da"
else "No translation needed for the query"
)
elif _step == "retrieve":
_dc = _event.get("dense_count", 0)
_sc = _event.get("sparse_count", 0)
st.write(
(f"Fandt **{_dc}** semantiske + **{_sc}** leksikalske kandidater")
if lang == "da"
else (f"Found **{_dc}** semantic + **{_sc}** lexical candidates")
)
elif _step == "rerank":
_rc = _event.get("reranked_count", 0)
_cf = _event.get("confidence", 0.0)
st.write(
(f"Reranket til **{_rc}** resultater · konfidensgrad **{_cf:.0%}**")
if lang == "da"
else (f"Reranked to **{_rc}** results · confidence **{_cf:.0%}**")
)
elif _step == "plan":
_steps = _event.get("steps", [])
st.write(
(f"Plan oprettet med **{len(_steps)}** trin")
if lang == "da"
else (f"Plan created with **{len(_steps)}** steps")
)
for _ps in _steps:
st.write(f" - {_ps}")
elif _step == "execute_step":
_si = _event.get("step_index", 0)
_sd = _event.get("step_desc", "")
st.write(
(f"Trin {_si} udført: _{_sd}_")
if lang == "da"
else (f"Step {_si} executed: _{_sd}_")
)
elif _step == "synthesize":
st.write(t["synthesize_status"])
elif _step == "tool_call":
_tool_name = _event.get("tool", "")
_tool_query = _event.get("query", "")
if _tool_query:
st.write(
(f"Værktøj **{_tool_name}** kaldt: _{_tool_query}_")
if lang == "da"
else (f"Tool **{_tool_name}** called: _{_tool_query}_")
)
else:
st.write(
(f"Værktøj **{_tool_name}** kaldt")
if lang == "da"
else (f"Tool **{_tool_name}** called")
)
elif _step == "tool_result":
_rc = _event.get("result_count", 0)
_tool_name = _event.get("tool", "")
if _tool_name == "list_documents":
# list_documents returns doc list in its text,
# parse count from the tool output or show generic
st.write(
"Dokumentliste hentet"
if lang == "da"
else "Document list retrieved"
)
elif _tool_name == "fetch_document":
st.write(
(f"Hentet dokument (**{_rc}** afsnit)")
if lang == "da"
else (f"Fetched document (**{_rc}** chunks)")
)
else:
st.write(
(f"Fandt **{_rc}** relevante passager")
if lang == "da"
else (f"Found **{_rc}** relevant passages")
)
elif _step == "broaden_query":
_retry = _event.get("retry_count", 1)
_rq = _event.get("retrieval_query", "")
st.write(
(f"Lav konfidensgrad – forsøg {_retry} med udvidet søgning: _{_rq}_")
if lang == "da"
else (f"Low confidence – retry {_retry} with broadened query: _{_rq}_")
)
elif _step == "generate":
st.write(
"Svar genereret"
if lang == "da"
else "Answer generated"
)
elif _step == "rate_limit":
_rl_msg = _event.get("message", "")
st.warning(
f"⏳ {_rl_msg} — vent venligst ..."
if lang == "da"
else f"⏳ {_rl_msg} — please wait ..."
)
elif _step == "done":
data = _event.get("result", {})
_status.update(label=t["status_done"], state="complete", expanded=False)
elif _step == "error":
_sse_error = _event
_status.update(label=t["status_error"], state="error", expanded=True)
break
except requests.ConnectionError:
_status.update(label=t["status_error"], state="error", expanded=True)
st.error(t["err_connection"])
st.stop()
except requests.HTTPError as _exc:
_status.update(label=t["status_error"], state="error", expanded=True)
if _exc.response.status_code == 429:
st.warning(t["err_rate_limit"])
else:
st.error(f'{t["err_api"]}: {_exc.response.status_code} -- {_exc.response.text}')
st.stop()
except requests.Timeout:
_status.update(label=t["status_error"], state="error", expanded=True)
st.error(t["err_timeout"])
st.stop()
if _sse_error is not None:
if _sse_error.get("code") == 429:
st.warning(t["err_rate_limit"])
else:
st.error(f'{t["err_api"]}: {_sse_error.get("message", "")}')
st.stop()
# Cache result in session_state so it survives iOS tombstone / reconnect
st.session_state["last_result"] = data
st.session_state["last_question"] = question.strip()
st.session_state["last_strategy"] = strategy
st.session_state["last_top_k"] = top_k
_render_results(data, t, strategy, top_k)
elif search_clicked:
st.warning(t["empty_warning"])
elif "last_result" in st.session_state:
# Restore cached results after iOS tombstone / reconnect
_render_results(
st.session_state["last_result"],
t,
st.session_state.get("last_strategy", strategy),
st.session_state.get("last_top_k", top_k),
)
# ---------------------------------------------------------------------------
# Footer
# ---------------------------------------------------------------------------
st.markdown(
"""
<div class="app-footer">
<a href="https://github.com/Xiiqiing/Dokumentassistent" target="_blank" rel="noopener noreferrer" aria-label="GitHub repository">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0C5.374 0 0 5.373 0 12c0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23A11.509 11.509 0 0 1 12 5.803c1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576C20.566 21.797 24 17.3 24 12c0-6.627-5.373-12-12-12z"/></svg>
<span class="footer-label">Xiiqiing/Dokumentassistent</span>
</a>
<span class="footer-sep">|</span>
<a href="https://www.linkedin.com/in/xiqing/" target="_blank" rel="noopener noreferrer" aria-label="LinkedIn profile">
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="currentColor"><path d="M20.447 20.452h-3.554v-5.569c0-1.328-.027-3.037-1.852-3.037-1.853 0-2.136 1.445-2.136 2.939v5.667H9.351V9h3.414v1.561h.046c.477-.9 1.637-1.85 3.37-1.85 3.601 0 4.267 2.37 4.267 5.455v6.286zM5.337 7.433a2.062 2.062 0 0 1-2.063-2.065 2.063 2.063 0 1 1 2.063 2.065zm1.782 13.019H3.555V9h3.564v11.452zM22.225 0H1.771C.792 0 0 .774 0 1.729v20.542C0 23.227.792 24 1.771 24h20.451C23.2 24 24 23.227 24 22.271V1.729C24 .774 23.2 0 22.222 0h.003z"/></svg>
<span class="footer-label">LinkedIn</span>
</a>
<span class="footer-sep">|</span>
<span>&copy; 2026 Xiqing</span>
</div>
""",
unsafe_allow_html=True,
)