Spaces:
Sleeping
Sleeping
| """Knowledge Value Lab β Streamlit prototype.""" | |
| from __future__ import annotations | |
| import os | |
| import time | |
| import anthropic | |
| import streamlit as st | |
| from dotenv import load_dotenv | |
| from datetime import datetime | |
| load_dotenv() | |
| from kvl import ingestor, scorer, report | |
| from kvl.modules import novelty, retrieval, generation, attribution, demand | |
| from kvl.config import DIMENSION_META, KVS_CLASSIFICATION, MODELS, SENSITIVITY_COLOR, model_meta | |
| # ββ Page config βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.set_page_config( | |
| page_title="Knowledge Value Lab", | |
| page_icon="π¬", | |
| layout="wide", | |
| ) | |
| # ββ CSS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown(""" | |
| <style> | |
| .kvs-box { | |
| background: linear-gradient(135deg, #1e3a5f 0%, #0d2137 100%); | |
| border-radius: 12px; | |
| padding: 28px 36px; | |
| text-align: center; | |
| margin-bottom: 8px; | |
| } | |
| .kvs-number { font-size: 64px; font-weight: 800; color: #f0f4ff; line-height: 1; } | |
| .kvs-label { font-size: 16px; color: #8ab4f8; margin-top: 4px; } | |
| .kvs-class { font-size: 22px; font-weight: 600; margin-top: 8px; } | |
| .kvs-meta { font-size: 12px; color: #556; margin-top: 10px; font-family: monospace; } | |
| .model-chip { | |
| display: inline-block; | |
| background: #1a2740; | |
| border: 1px solid #2a4060; | |
| border-radius: 4px; | |
| padding: 2px 8px; | |
| font-size: 11px; | |
| font-family: monospace; | |
| color: #8ab4f8; | |
| margin: 2px; | |
| } | |
| .sens-badge { | |
| display: inline-block; | |
| border-radius: 4px; | |
| padding: 1px 7px; | |
| font-size: 11px; | |
| font-weight: 600; | |
| } | |
| .warn-box { | |
| background: #1a1500; | |
| border-left: 3px solid #f8961e; | |
| border-radius: 4px; | |
| padding: 10px 14px; | |
| font-size: 13px; | |
| margin: 8px 0 16px 0; | |
| color: #fff; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # ββ Sidebar βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with st.sidebar: | |
| st.markdown("## π¬ Knowledge Value Lab") | |
| st.markdown( | |
| "KVL measures the **marginal value** of a knowledge document to an AI system " | |
| "across five independent dimensions, producing a single weighted **Knowledge Value Score (KVS)**." | |
| ) | |
| st.divider() | |
| st.markdown("### Models Used") | |
| for key, m in MODELS.items(): | |
| st.markdown( | |
| f"<span class='model-chip'>{m['display']}</span> \n" | |
| f"<span style='font-size:11px;color:#888;'>{m['role']}</span>", | |
| unsafe_allow_html=True, | |
| ) | |
| st.markdown("") | |
| st.markdown( | |
| "<div class='warn-box'>" | |
| "β οΈ <strong>Scores are model-relative.</strong> " | |
| "Knowledge Novelty and Generation Utility reflect this document's value " | |
| "to the <em>specific models above</em>. Scores will change when models are updated. " | |
| "Always report scores alongside the model names and evaluation date." | |
| "</div>", | |
| unsafe_allow_html=True, | |
| ) | |
| st.divider() | |
| st.markdown("### Score Classifications") | |
| for threshold, label, desc in KVS_CLASSIFICATION: | |
| hi = threshold + 19 if threshold < 81 else 100 | |
| st.markdown(f"**{threshold}β{hi}** β {label}") | |
| st.caption(desc) | |
| st.divider() | |
| st.markdown("### Metric Guide") | |
| for key, dmeta in DIMENSION_META.items(): | |
| sens = dmeta["model_sensitivity"] | |
| sc = SENSITIVITY_COLOR[sens] | |
| with st.expander(f"{dmeta['name']} Β· {int(dmeta['weight']*100)}%"): | |
| st.markdown( | |
| f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>" | |
| f"Model sensitivity: {sens}</span>", | |
| unsafe_allow_html=True, | |
| ) | |
| st.markdown(dmeta["description"]) | |
| st.markdown(f"**How measured:** {dmeta['how_measured']}") | |
| st.markdown(f"*{dmeta['sensitivity_note']}*") | |
| st.markdown(f"**High score:** {dmeta['high_means']}") | |
| st.markdown(f"**Low score:** {dmeta['low_means']}") | |
| # ββ Header ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.title("π¬ Knowledge Value Lab") | |
| st.markdown( | |
| "**Measuring the Marginal Value of Knowledge Assets for AI Systems** \n" | |
| "Upload a Markdown document to receive a quantified Knowledge Value Score across five dimensions." | |
| ) | |
| st.divider() | |
| # ββ Cached resources ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def load_embedder(): | |
| from sentence_transformers import SentenceTransformer | |
| return SentenceTransformer("all-MiniLM-L6-v2") | |
| def load_client(): | |
| api_key = os.getenv("ANTHROPIC_API_KEY") | |
| if not api_key: | |
| st.error("ANTHROPIC_API_KEY not found. Add it to your .env file.") | |
| st.stop() | |
| return anthropic.Anthropic(api_key=api_key) | |
| # ββ Upload section ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| col_upload, col_preview = st.columns([1, 1], gap="large") | |
| with col_upload: | |
| st.markdown("### Upload Document") | |
| uploaded = st.file_uploader( | |
| "Choose a Markdown file", | |
| type=["md"], | |
| help="Upload a .md file to evaluate its knowledge value for AI systems.", | |
| label_visibility="collapsed", | |
| ) | |
| if uploaded: | |
| md_text = uploaded.read().decode("utf-8") | |
| doc = ingestor.parse(md_text) | |
| st.success( | |
| f"**{doc.title}** \n" | |
| f"{doc.word_count:,} words Β· {len(doc.sections)} sections Β· {len(doc.chunks)} chunks" | |
| ) | |
| run = st.button("βΆ Evaluate Knowledge Value", type="primary", use_container_width=True) | |
| else: | |
| st.info("Drag and drop a `.md` file above, or click to browse.") | |
| run = False | |
| with col_preview: | |
| st.markdown("### Document Preview") | |
| if uploaded: | |
| preview_text = md_text[:1200] + ("β¦" if len(md_text) > 1200 else "") | |
| st.markdown( | |
| f"<div style='background:#0e1117;border:1px solid #2a2a3a;border-radius:8px;" | |
| f"padding:16px;font-size:13px;max-height:280px;overflow:auto;white-space:pre-wrap;color:#fff;'>" | |
| f"{preview_text}</div>", | |
| unsafe_allow_html=True, | |
| ) | |
| else: | |
| st.markdown( | |
| "<div style='background:#0e1117;border:1px solid #2a2a3a;border-radius:8px;" | |
| "padding:40px;text-align:center;color:#555;'>No document uploaded yet</div>", | |
| unsafe_allow_html=True, | |
| ) | |
| # ββ Evaluation pipeline βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if run and uploaded: | |
| st.divider() | |
| st.markdown("### Evaluation in Progress") | |
| client = load_client() | |
| embedder = load_embedder() | |
| steps = [ | |
| "Module A: Knowledge Novelty", | |
| "Module B: Retrieval Utility", | |
| "Module C: Generation Utility", | |
| "Module D: Attribution & Grounding", | |
| "Module E: Demand Utility", | |
| "Computing Knowledge Value Score", | |
| ] | |
| progress_bar = st.progress(0) | |
| step_statuses = {s: "pending" for s in steps} | |
| _sub_msg = [""] # mutable cell so sub_progress can update it | |
| step_placeholder = st.empty() | |
| def render_steps(elapsed: int | None = None): | |
| rows = [] | |
| for s, state in step_statuses.items(): | |
| is_running = state == "running" | |
| icon = {"pending": "β", "running": "β³", "done": "β"}[state] | |
| fg = {"pending": "#555", "running": "#e8f0fe", "done": "#4caf87"}[state] | |
| bg = "background:#0d1f35;" if is_running else "" | |
| bold = "font-weight:600;" if is_running else "" | |
| sub = ( | |
| f"<div style='font-size:12px;color:#8ab4f8;margin:3px 0 0 22px;'>{_sub_msg[0]}</div>" | |
| if is_running and _sub_msg[0] else "" | |
| ) | |
| rows.append( | |
| f"<div style='padding:7px 16px;{bg}border-bottom:1px solid #1a1a2e;'>" | |
| f"<span style='font-family:monospace;color:{fg};{bold}'>{icon} {s}</span>" | |
| f"{sub}</div>" | |
| ) | |
| footer = "" | |
| if elapsed is not None: | |
| footer = ( | |
| f"<div style='padding:7px 16px;font-size:12px;color:#4caf87;'>" | |
| f"β Evaluation complete in {elapsed}s</div>" | |
| ) | |
| step_placeholder.markdown( | |
| f"<div style='border:1px solid #2a2a3a;border-radius:8px;overflow:hidden;'>" | |
| + "".join(rows) + footer + | |
| "</div>", | |
| unsafe_allow_html=True, | |
| ) | |
| def sub_progress(msg: str): | |
| _sub_msg[0] = msg | |
| render_steps() | |
| module_results = {} | |
| eval_start = datetime.now() | |
| t0 = time.time() | |
| step_statuses[steps[0]] = "running"; render_steps(); progress_bar.progress(5) | |
| module_results["novelty"] = novelty.evaluate(client, doc, progress_cb=sub_progress) | |
| step_statuses[steps[0]] = "done"; progress_bar.progress(20) | |
| step_statuses[steps[1]] = "running"; render_steps() | |
| module_results["retrieval"] = retrieval.evaluate(client, doc, embedder, progress_cb=sub_progress) | |
| step_statuses[steps[1]] = "done"; progress_bar.progress(40) | |
| step_statuses[steps[2]] = "running"; render_steps() | |
| module_results["generation"] = generation.evaluate(client, doc, progress_cb=sub_progress) | |
| step_statuses[steps[2]] = "done"; progress_bar.progress(60) | |
| step_statuses[steps[3]] = "running"; render_steps() | |
| module_results["attribution"] = attribution.evaluate( | |
| client, doc, module_results["generation"], embedder, progress_cb=sub_progress | |
| ) | |
| step_statuses[steps[3]] = "done"; progress_bar.progress(80) | |
| step_statuses[steps[4]] = "running"; render_steps() | |
| module_results["demand"] = demand.evaluate(client, doc, progress_cb=sub_progress) | |
| step_statuses[steps[4]] = "done"; progress_bar.progress(92) | |
| step_statuses[steps[5]] = "running"; _sub_msg[0] = "Computing weighted Knowledge Value Score..."; render_steps() | |
| dim_scores = {k: module_results[k]["score"] for k in module_results} | |
| kvs_result = scorer.compute(dim_scores) | |
| step_statuses[steps[5]] = "done"; progress_bar.progress(100) | |
| elapsed = round(time.time() - t0) | |
| _sub_msg[0] = "" | |
| render_steps(elapsed=elapsed) | |
| eval_date_str = eval_start.strftime("%Y-%m-%d %H:%M UTC") | |
| meta = model_meta(eval_date_str) | |
| # ββ Results βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.divider() | |
| st.markdown("## Knowledge Value Report") | |
| kvs = kvs_result["kvs"] | |
| classification = kvs_result["classification"] | |
| color_map = { | |
| "Transformational Value": "#ffd166", | |
| "High Value": "#06d6a0", | |
| "Moderate Value": "#8ab4f8", | |
| "Incremental Value": "#f8961e", | |
| "Minimal Value": "#ef476f", | |
| } | |
| badge_color = color_map.get(classification, "#8ab4f8") | |
| # KVS hero with model metadata | |
| st.markdown( | |
| f"""<div class="kvs-box"> | |
| <div class="kvs-number">{kvs}</div> | |
| <div class="kvs-label">Knowledge Value Score / 100</div> | |
| <div class="kvs-class" style="color:{badge_color};">{classification}</div> | |
| <div class="kvs-meta"> | |
| Evaluated {eval_date_str}<br> | |
| Judge: {MODELS['judge']['display']} Β· | |
| Worker: {MODELS['worker']['display']} Β· | |
| Embeddings: {MODELS['embedder']['display']} | |
| </div> | |
| </div>""", | |
| unsafe_allow_html=True, | |
| ) | |
| # Model-relativity warning | |
| st.markdown( | |
| "<div class='warn-box'>" | |
| "β οΈ <strong>Score validity:</strong> " | |
| "Knowledge Novelty and Generation Utility are <strong>model-relative</strong> β " | |
| "they reflect this document's marginal value to the models listed above. " | |
| "Scores will change if the underlying models are updated or replaced. " | |
| "Always report scores alongside model names and evaluation date." | |
| "</div>", | |
| unsafe_allow_html=True, | |
| ) | |
| # ββ Dimension breakdown βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown("### Dimension Breakdown") | |
| st.caption( | |
| "Each dimension is scored 0β100 and weighted by its contribution to the overall KVS. " | |
| "The sensitivity badge shows how much the score depends on the specific AI model used." | |
| ) | |
| dims = [ | |
| ("Knowledge Novelty", "novelty", 0.30), | |
| ("Retrieval Utility", "retrieval", 0.20), | |
| ("Generation Utility", "generation", 0.25), | |
| ("Attribution", "attribution", 0.15), | |
| ("Demand Utility", "demand", 0.10), | |
| ] | |
| for label, key, weight in dims: | |
| sc = dim_scores[key] | |
| contrib = kvs_result["weighted_contributions"][key] | |
| filled = round(sc / 100 * 20) | |
| bar_str = "β" * filled + "β" * (20 - filled) | |
| pct = int(weight * 100) | |
| sens = DIMENSION_META[key]["model_sensitivity"] | |
| sc_color = SENSITIVITY_COLOR[sens] | |
| col1, col2 = st.columns([4, 1]) | |
| with col1: | |
| st.markdown( | |
| f"**{label}** " | |
| f"<span class='sens-badge' style='background:{sc_color}22;color:{sc_color};" | |
| f"border:1px solid {sc_color}55;'>sensitivity: {sens}</span> \n" | |
| f"`{bar_str}` **{sc}/100** " | |
| f"<span style='color:#888;font-size:13px;'> Γ{pct}% = {contrib} pts</span>", | |
| unsafe_allow_html=True, | |
| ) | |
| with col2: | |
| st.metric(label="score", value=str(sc), label_visibility="collapsed") | |
| # ββ Detailed analysis expanders βββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown("### Detailed Analysis") | |
| # Module A | |
| dmeta = DIMENSION_META["novelty"] | |
| with st.expander(f"Module A β Knowledge Novelty Β· {dim_scores['novelty']}/100"): | |
| sens = dmeta["model_sensitivity"] | |
| sc = SENSITIVITY_COLOR[sens] | |
| st.markdown( | |
| f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>" | |
| f"Model sensitivity: {sens}</span>", | |
| unsafe_allow_html=True, | |
| ) | |
| st.markdown(f"**What this measures:** {dmeta['description']}") | |
| st.markdown(f"**How it's measured:** {dmeta['how_measured']}") | |
| st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}") | |
| st.info(dmeta["sensitivity_note"]) | |
| st.divider() | |
| st.markdown(f"**Result:** {module_results['novelty']['summary']}") | |
| details = module_results["novelty"].get("details", []) | |
| if details: | |
| st.markdown("**Claim analysis** (π’ novel Β· π‘ partial Β· π΄ already known):") | |
| for d in details: | |
| known_pct = round(d["known_score"] * 100) | |
| icon = "π’" if d["known_score"] < 0.4 else ("π‘" if d["known_score"] < 0.7 else "π΄") | |
| st.markdown( | |
| f"{icon} **{d['claim'][:130]}** \n" | |
| f"*Known to model: {known_pct}% β {d['reason']}*" | |
| ) | |
| # Module B | |
| dmeta = DIMENSION_META["retrieval"] | |
| with st.expander(f"Module B β Retrieval Utility Β· {dim_scores['retrieval']}/100"): | |
| sens = dmeta["model_sensitivity"] | |
| sc = SENSITIVITY_COLOR[sens] | |
| st.markdown( | |
| f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>" | |
| f"Model sensitivity: {sens}</span>", | |
| unsafe_allow_html=True, | |
| ) | |
| st.markdown(f"**What this measures:** {dmeta['description']}") | |
| st.markdown(f"**How it's measured:** {dmeta['how_measured']}") | |
| st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}") | |
| st.info(dmeta["sensitivity_note"]) | |
| st.divider() | |
| st.markdown(f"**Result:** {module_results['retrieval']['summary']}") | |
| details = module_results["retrieval"].get("details", []) | |
| if details: | |
| st.caption( | |
| "**Recall@3** β fraction of queries where the correct chunk appears in top 3 results (1.0 = perfect). \n" | |
| "**MRR** β Mean Reciprocal Rank; how high the correct chunk ranks on average (1.0 = always first)." | |
| ) | |
| st.table({ | |
| "Query": [d["query"] for d in details], | |
| "Recall@3": [f"{d['recall_at_3']:.2f}" for d in details], | |
| "MRR": [f"{d['reciprocal_rank']:.2f}" for d in details], | |
| }) | |
| # Module C | |
| dmeta = DIMENSION_META["generation"] | |
| with st.expander(f"Module C β Generation Utility Β· {dim_scores['generation']}/100"): | |
| sens = dmeta["model_sensitivity"] | |
| sc = SENSITIVITY_COLOR[sens] | |
| st.markdown( | |
| f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>" | |
| f"Model sensitivity: {sens}</span>", | |
| unsafe_allow_html=True, | |
| ) | |
| st.markdown(f"**What this measures:** {dmeta['description']}") | |
| st.markdown(f"**How it's measured:** {dmeta['how_measured']}") | |
| st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}") | |
| st.info(dmeta["sensitivity_note"]) | |
| st.divider() | |
| st.markdown(f"**Result:** {module_results['generation']['summary']}") | |
| for d in module_results["generation"].get("details", []): | |
| st.markdown(f"**Q: {d['question']}**") | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| st.markdown("*Baseline β no document:*") | |
| st.markdown(f"> {d['baseline_answer'][:350]}") | |
| with c2: | |
| st.markdown("*RAG β with document:*") | |
| st.markdown(f"> {d['rag_answer'][:350]}") | |
| st.caption( | |
| f"Improvement: **{d['improvement']}/100** | " | |
| f"Accuracy: {d['accuracy']}/5 | " | |
| f"Completeness: {d['completeness']}/5 | " | |
| f"Specificity: {d['specificity']}/5 \n{d['reason']}" | |
| ) | |
| st.divider() | |
| # Module D | |
| dmeta = DIMENSION_META["attribution"] | |
| with st.expander(f"Module D β Attribution & Grounding Β· {dim_scores['attribution']}/100"): | |
| sens = dmeta["model_sensitivity"] | |
| sc = SENSITIVITY_COLOR[sens] | |
| st.markdown( | |
| f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>" | |
| f"Model sensitivity: {sens}</span>", | |
| unsafe_allow_html=True, | |
| ) | |
| st.markdown(f"**What this measures:** {dmeta['description']}") | |
| st.markdown(f"**How it's measured:** {dmeta['how_measured']}") | |
| st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}") | |
| st.info(dmeta["sensitivity_note"]) | |
| st.divider() | |
| st.markdown(f"**Result:** {module_results['attribution']['summary']}") | |
| for d in module_results["attribution"].get("details", []): | |
| halluc = "β οΈ Hallucination detected" if d.get("hallucination_detected") else "β No hallucination" | |
| st.markdown( | |
| f"**Q: {d['question'][:110]}** \n" | |
| f"Grounding: **{round(d['grounding_fraction']*100)}%** | " | |
| f"Semantic similarity: {d['semantic_similarity']} | {halluc}" | |
| ) | |
| if d.get("ungrounded_claims"): | |
| st.caption("Ungrounded claims: " + "; ".join(d["ungrounded_claims"][:3])) | |
| if d.get("reason"): | |
| st.caption(d["reason"]) | |
| # Module E | |
| dmeta = DIMENSION_META["demand"] | |
| with st.expander(f"Module E β Demand Utility Β· {dim_scores['demand']}/100"): | |
| sens = dmeta["model_sensitivity"] | |
| sc = SENSITIVITY_COLOR[sens] | |
| st.markdown( | |
| f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>" | |
| f"Model sensitivity: {sens}</span>", | |
| unsafe_allow_html=True, | |
| ) | |
| st.markdown(f"**What this measures:** {dmeta['description']}") | |
| st.markdown(f"**How it's measured:** {dmeta['how_measured']}") | |
| st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}") | |
| st.info(dmeta["sensitivity_note"]) | |
| st.divider() | |
| st.markdown(f"**Result:** {module_results['demand']['summary']}") | |
| topics = module_results["demand"].get("topics", []) | |
| if topics: | |
| st.caption( | |
| "**Query Freq** β estimated user query frequency for this topic (1 = rare, 10 = very common). \n" | |
| "**Priority Domain** β whether this is a high-impact sector (health, climate, food, policy, etc.). \n" | |
| "**Unmet Need** β whether existing AI models fall short in covering this topic." | |
| ) | |
| st.table({ | |
| "Topic": [t.get("topic", "") for t in topics], | |
| "Query Freq (1-10)": [t.get("query_frequency", "-") for t in topics], | |
| "Priority Domain": ["Yes" if t.get("priority_domain") else "No" for t in topics], | |
| "Unmet Need": ["Yes" if t.get("unmet_need") else "No" for t in topics], | |
| "Rationale": [t.get("rationale", "")[:80] for t in topics], | |
| }) | |
| # ββ Recommendations βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown("### Recommended Actions") | |
| for rec in kvs_result["recommendations"]: | |
| st.markdown(f"- {rec}") | |
| # ββ Download ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.divider() | |
| report_md = report.generate(doc.title, kvs_result, module_results, meta) | |
| st.download_button( | |
| label="β¬ Download Full Report (Markdown)", | |
| data=report_md, | |
| file_name=f"kvl_report_{doc.title[:40].replace(' ', '_')}.md", | |
| mime="text/markdown", | |
| use_container_width=True, | |
| ) | |
| elif not uploaded: | |
| st.info("Upload a `.md` file above to begin evaluation.") | |