"""Knowledge Value Lab — Streamlit prototype.""" from __future__ import annotations import os import time import anthropic import streamlit as st from dotenv import load_dotenv from datetime import datetime load_dotenv() from kvl import ingestor, scorer, report from kvl.modules import novelty, retrieval, generation, attribution, demand from kvl.config import DIMENSION_META, KVS_CLASSIFICATION, MODELS, SENSITIVITY_COLOR, model_meta # ── Page config ─────────────────────────────────────────────────────────────── st.set_page_config( page_title="Knowledge Value Lab", page_icon="🔬", layout="wide", ) # ── CSS ─────────────────────────────────────────────────────────────────────── st.markdown(""" """, unsafe_allow_html=True) # ── Sidebar ─────────────────────────────────────────────────────────────────── with st.sidebar: st.markdown("## 🔬 Knowledge Value Lab") st.markdown( "KVL measures the **marginal value** of a knowledge document to an AI system " "across five independent dimensions, producing a single weighted **Knowledge Value Score (KVS)**." ) st.divider() st.markdown("### Models Used") for key, m in MODELS.items(): st.markdown( f"{m['display']} \n" f"{m['role']}", unsafe_allow_html=True, ) st.markdown("") st.markdown( "
" "⚠️ Scores are model-relative. " "Knowledge Novelty and Generation Utility reflect this document's value " "to the specific models above. Scores will change when models are updated. " "Always report scores alongside the model names and evaluation date." "
", unsafe_allow_html=True, ) st.divider() st.markdown("### Score Classifications") for threshold, label, desc in KVS_CLASSIFICATION: hi = threshold + 19 if threshold < 81 else 100 st.markdown(f"**{threshold}–{hi}** — {label}") st.caption(desc) st.divider() st.markdown("### Metric Guide") for key, dmeta in DIMENSION_META.items(): sens = dmeta["model_sensitivity"] sc = SENSITIVITY_COLOR[sens] with st.expander(f"{dmeta['name']} · {int(dmeta['weight']*100)}%"): st.markdown( f"" f"Model sensitivity: {sens}", unsafe_allow_html=True, ) st.markdown(dmeta["description"]) st.markdown(f"**How measured:** {dmeta['how_measured']}") st.markdown(f"*{dmeta['sensitivity_note']}*") st.markdown(f"**High score:** {dmeta['high_means']}") st.markdown(f"**Low score:** {dmeta['low_means']}") # ── Header ──────────────────────────────────────────────────────────────────── st.title("🔬 Knowledge Value Lab") st.markdown( "**Measuring the Marginal Value of Knowledge Assets for AI Systems** \n" "Upload a Markdown document to receive a quantified Knowledge Value Score across five dimensions." ) st.divider() # ── Cached resources ────────────────────────────────────────────────────────── @st.cache_resource(show_spinner="Loading embedding model (all-MiniLM-L6-v2)...") def load_embedder(): from sentence_transformers import SentenceTransformer return SentenceTransformer("all-MiniLM-L6-v2") @st.cache_resource def load_client(): api_key = os.getenv("ANTHROPIC_API_KEY") if not api_key: st.error("ANTHROPIC_API_KEY not found. Add it to your .env file.") st.stop() return anthropic.Anthropic(api_key=api_key) # ── Upload section ──────────────────────────────────────────────────────────── col_upload, col_preview = st.columns([1, 1], gap="large") with col_upload: st.markdown("### Upload Document") uploaded = st.file_uploader( "Choose a Markdown file", type=["md"], help="Upload a .md file to evaluate its knowledge value for AI systems.", label_visibility="collapsed", ) if uploaded: md_text = uploaded.read().decode("utf-8") doc = ingestor.parse(md_text) st.success( f"**{doc.title}** \n" f"{doc.word_count:,} words · {len(doc.sections)} sections · {len(doc.chunks)} chunks" ) run = st.button("▶ Evaluate Knowledge Value", type="primary", use_container_width=True) else: st.info("Drag and drop a `.md` file above, or click to browse.") run = False with col_preview: st.markdown("### Document Preview") if uploaded: preview_text = md_text[:1200] + ("…" if len(md_text) > 1200 else "") st.markdown( f"
" f"{preview_text}
", unsafe_allow_html=True, ) else: st.markdown( "
No document uploaded yet
", unsafe_allow_html=True, ) # ── Evaluation pipeline ─────────────────────────────────────────────────────── if run and uploaded: st.divider() st.markdown("### Evaluation in Progress") client = load_client() embedder = load_embedder() steps = [ "Module A: Knowledge Novelty", "Module B: Retrieval Utility", "Module C: Generation Utility", "Module D: Attribution & Grounding", "Module E: Demand Utility", "Computing Knowledge Value Score", ] progress_bar = st.progress(0) step_statuses = {s: "pending" for s in steps} _sub_msg = [""] # mutable cell so sub_progress can update it step_placeholder = st.empty() def render_steps(elapsed: int | None = None): rows = [] for s, state in step_statuses.items(): is_running = state == "running" icon = {"pending": "○", "running": "⟳", "done": "✓"}[state] fg = {"pending": "#555", "running": "#e8f0fe", "done": "#4caf87"}[state] bg = "background:#0d1f35;" if is_running else "" bold = "font-weight:600;" if is_running else "" sub = ( f"
{_sub_msg[0]}
" if is_running and _sub_msg[0] else "" ) rows.append( f"
" f"{icon}  {s}" f"{sub}
" ) footer = "" if elapsed is not None: footer = ( f"
" f"✓ Evaluation complete in {elapsed}s
" ) step_placeholder.markdown( f"
" + "".join(rows) + footer + "
", unsafe_allow_html=True, ) def sub_progress(msg: str): _sub_msg[0] = msg render_steps() module_results = {} eval_start = datetime.now() t0 = time.time() step_statuses[steps[0]] = "running"; render_steps(); progress_bar.progress(5) module_results["novelty"] = novelty.evaluate(client, doc, progress_cb=sub_progress) step_statuses[steps[0]] = "done"; progress_bar.progress(20) step_statuses[steps[1]] = "running"; render_steps() module_results["retrieval"] = retrieval.evaluate(client, doc, embedder, progress_cb=sub_progress) step_statuses[steps[1]] = "done"; progress_bar.progress(40) step_statuses[steps[2]] = "running"; render_steps() module_results["generation"] = generation.evaluate(client, doc, progress_cb=sub_progress) step_statuses[steps[2]] = "done"; progress_bar.progress(60) step_statuses[steps[3]] = "running"; render_steps() module_results["attribution"] = attribution.evaluate( client, doc, module_results["generation"], embedder, progress_cb=sub_progress ) step_statuses[steps[3]] = "done"; progress_bar.progress(80) step_statuses[steps[4]] = "running"; render_steps() module_results["demand"] = demand.evaluate(client, doc, progress_cb=sub_progress) step_statuses[steps[4]] = "done"; progress_bar.progress(92) step_statuses[steps[5]] = "running"; _sub_msg[0] = "Computing weighted Knowledge Value Score..."; render_steps() dim_scores = {k: module_results[k]["score"] for k in module_results} kvs_result = scorer.compute(dim_scores) step_statuses[steps[5]] = "done"; progress_bar.progress(100) elapsed = round(time.time() - t0) _sub_msg[0] = "" render_steps(elapsed=elapsed) eval_date_str = eval_start.strftime("%Y-%m-%d %H:%M UTC") meta = model_meta(eval_date_str) # ── Results ─────────────────────────────────────────────────────────────── st.divider() st.markdown("## Knowledge Value Report") kvs = kvs_result["kvs"] classification = kvs_result["classification"] color_map = { "Transformational Value": "#ffd166", "High Value": "#06d6a0", "Moderate Value": "#8ab4f8", "Incremental Value": "#f8961e", "Minimal Value": "#ef476f", } badge_color = color_map.get(classification, "#8ab4f8") # KVS hero with model metadata st.markdown( f"""
{kvs}
Knowledge Value Score / 100
{classification}
Evaluated {eval_date_str}
Judge: {MODELS['judge']['display']}  ·  Worker: {MODELS['worker']['display']}  ·  Embeddings: {MODELS['embedder']['display']}
""", unsafe_allow_html=True, ) # Model-relativity warning st.markdown( "
" "⚠️ Score validity: " "Knowledge Novelty and Generation Utility are model-relative — " "they reflect this document's marginal value to the models listed above. " "Scores will change if the underlying models are updated or replaced. " "Always report scores alongside model names and evaluation date." "
", unsafe_allow_html=True, ) # ── Dimension breakdown ─────────────────────────────────────────────────── st.markdown("### Dimension Breakdown") st.caption( "Each dimension is scored 0–100 and weighted by its contribution to the overall KVS. " "The sensitivity badge shows how much the score depends on the specific AI model used." ) dims = [ ("Knowledge Novelty", "novelty", 0.30), ("Retrieval Utility", "retrieval", 0.20), ("Generation Utility", "generation", 0.25), ("Attribution", "attribution", 0.15), ("Demand Utility", "demand", 0.10), ] for label, key, weight in dims: sc = dim_scores[key] contrib = kvs_result["weighted_contributions"][key] filled = round(sc / 100 * 20) bar_str = "█" * filled + "░" * (20 - filled) pct = int(weight * 100) sens = DIMENSION_META[key]["model_sensitivity"] sc_color = SENSITIVITY_COLOR[sens] col1, col2 = st.columns([4, 1]) with col1: st.markdown( f"**{label}**  " f"sensitivity: {sens} \n" f"`{bar_str}`   **{sc}/100** " f" ×{pct}% = {contrib} pts", unsafe_allow_html=True, ) with col2: st.metric(label="score", value=str(sc), label_visibility="collapsed") # ── Detailed analysis expanders ─────────────────────────────────────────── st.markdown("### Detailed Analysis") # Module A dmeta = DIMENSION_META["novelty"] with st.expander(f"Module A — Knowledge Novelty · {dim_scores['novelty']}/100"): sens = dmeta["model_sensitivity"] sc = SENSITIVITY_COLOR[sens] st.markdown( f"" f"Model sensitivity: {sens}", unsafe_allow_html=True, ) st.markdown(f"**What this measures:** {dmeta['description']}") st.markdown(f"**How it's measured:** {dmeta['how_measured']}") st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}") st.info(dmeta["sensitivity_note"]) st.divider() st.markdown(f"**Result:** {module_results['novelty']['summary']}") details = module_results["novelty"].get("details", []) if details: st.markdown("**Claim analysis** (🟢 novel · 🟡 partial · 🔴 already known):") for d in details: known_pct = round(d["known_score"] * 100) icon = "🟢" if d["known_score"] < 0.4 else ("🟡" if d["known_score"] < 0.7 else "🔴") st.markdown( f"{icon} **{d['claim'][:130]}** \n" f"*Known to model: {known_pct}% — {d['reason']}*" ) # Module B dmeta = DIMENSION_META["retrieval"] with st.expander(f"Module B — Retrieval Utility · {dim_scores['retrieval']}/100"): sens = dmeta["model_sensitivity"] sc = SENSITIVITY_COLOR[sens] st.markdown( f"" f"Model sensitivity: {sens}", unsafe_allow_html=True, ) st.markdown(f"**What this measures:** {dmeta['description']}") st.markdown(f"**How it's measured:** {dmeta['how_measured']}") st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}") st.info(dmeta["sensitivity_note"]) st.divider() st.markdown(f"**Result:** {module_results['retrieval']['summary']}") details = module_results["retrieval"].get("details", []) if details: st.caption( "**Recall@3** — fraction of queries where the correct chunk appears in top 3 results (1.0 = perfect). \n" "**MRR** — Mean Reciprocal Rank; how high the correct chunk ranks on average (1.0 = always first)." ) st.table({ "Query": [d["query"] for d in details], "Recall@3": [f"{d['recall_at_3']:.2f}" for d in details], "MRR": [f"{d['reciprocal_rank']:.2f}" for d in details], }) # Module C dmeta = DIMENSION_META["generation"] with st.expander(f"Module C — Generation Utility · {dim_scores['generation']}/100"): sens = dmeta["model_sensitivity"] sc = SENSITIVITY_COLOR[sens] st.markdown( f"" f"Model sensitivity: {sens}", unsafe_allow_html=True, ) st.markdown(f"**What this measures:** {dmeta['description']}") st.markdown(f"**How it's measured:** {dmeta['how_measured']}") st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}") st.info(dmeta["sensitivity_note"]) st.divider() st.markdown(f"**Result:** {module_results['generation']['summary']}") for d in module_results["generation"].get("details", []): st.markdown(f"**Q: {d['question']}**") c1, c2 = st.columns(2) with c1: st.markdown("*Baseline — no document:*") st.markdown(f"> {d['baseline_answer'][:350]}") with c2: st.markdown("*RAG — with document:*") st.markdown(f"> {d['rag_answer'][:350]}") st.caption( f"Improvement: **{d['improvement']}/100**  |  " f"Accuracy: {d['accuracy']}/5  |  " f"Completeness: {d['completeness']}/5  |  " f"Specificity: {d['specificity']}/5 \n{d['reason']}" ) st.divider() # Module D dmeta = DIMENSION_META["attribution"] with st.expander(f"Module D — Attribution & Grounding · {dim_scores['attribution']}/100"): sens = dmeta["model_sensitivity"] sc = SENSITIVITY_COLOR[sens] st.markdown( f"" f"Model sensitivity: {sens}", unsafe_allow_html=True, ) st.markdown(f"**What this measures:** {dmeta['description']}") st.markdown(f"**How it's measured:** {dmeta['how_measured']}") st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}") st.info(dmeta["sensitivity_note"]) st.divider() st.markdown(f"**Result:** {module_results['attribution']['summary']}") for d in module_results["attribution"].get("details", []): halluc = "⚠️ Hallucination detected" if d.get("hallucination_detected") else "✓ No hallucination" st.markdown( f"**Q: {d['question'][:110]}** \n" f"Grounding: **{round(d['grounding_fraction']*100)}%**  |  " f"Semantic similarity: {d['semantic_similarity']}  |  {halluc}" ) if d.get("ungrounded_claims"): st.caption("Ungrounded claims: " + "; ".join(d["ungrounded_claims"][:3])) if d.get("reason"): st.caption(d["reason"]) # Module E dmeta = DIMENSION_META["demand"] with st.expander(f"Module E — Demand Utility · {dim_scores['demand']}/100"): sens = dmeta["model_sensitivity"] sc = SENSITIVITY_COLOR[sens] st.markdown( f"" f"Model sensitivity: {sens}", unsafe_allow_html=True, ) st.markdown(f"**What this measures:** {dmeta['description']}") st.markdown(f"**How it's measured:** {dmeta['how_measured']}") st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}") st.info(dmeta["sensitivity_note"]) st.divider() st.markdown(f"**Result:** {module_results['demand']['summary']}") topics = module_results["demand"].get("topics", []) if topics: st.caption( "**Query Freq** — estimated user query frequency for this topic (1 = rare, 10 = very common). \n" "**Priority Domain** — whether this is a high-impact sector (health, climate, food, policy, etc.). \n" "**Unmet Need** — whether existing AI models fall short in covering this topic." ) st.table({ "Topic": [t.get("topic", "") for t in topics], "Query Freq (1-10)": [t.get("query_frequency", "-") for t in topics], "Priority Domain": ["Yes" if t.get("priority_domain") else "No" for t in topics], "Unmet Need": ["Yes" if t.get("unmet_need") else "No" for t in topics], "Rationale": [t.get("rationale", "")[:80] for t in topics], }) # ── Recommendations ─────────────────────────────────────────────────────── st.markdown("### Recommended Actions") for rec in kvs_result["recommendations"]: st.markdown(f"- {rec}") # ── Download ────────────────────────────────────────────────────────────── st.divider() report_md = report.generate(doc.title, kvs_result, module_results, meta) st.download_button( label="⬇ Download Full Report (Markdown)", data=report_md, file_name=f"kvl_report_{doc.title[:40].replace(' ', '_')}.md", mime="text/markdown", use_container_width=True, ) elif not uploaded: st.info("Upload a `.md` file above to begin evaluation.")