"""Knowledge Value Lab — Streamlit prototype."""
from __future__ import annotations
import os
import time
import anthropic
import streamlit as st
from dotenv import load_dotenv
from datetime import datetime
load_dotenv()
from kvl import ingestor, scorer, report
from kvl.modules import novelty, retrieval, generation, attribution, demand
from kvl.config import DIMENSION_META, KVS_CLASSIFICATION, MODELS, SENSITIVITY_COLOR, model_meta
# ── Page config ───────────────────────────────────────────────────────────────
st.set_page_config(
page_title="Knowledge Value Lab",
page_icon="🔬",
layout="wide",
)
# ── CSS ───────────────────────────────────────────────────────────────────────
st.markdown("""
""", unsafe_allow_html=True)
# ── Sidebar ───────────────────────────────────────────────────────────────────
with st.sidebar:
st.markdown("## 🔬 Knowledge Value Lab")
st.markdown(
"KVL measures the **marginal value** of a knowledge document to an AI system "
"across five independent dimensions, producing a single weighted **Knowledge Value Score (KVS)**."
)
st.divider()
st.markdown("### Models Used")
for key, m in MODELS.items():
st.markdown(
f"{m['display']} \n"
f"{m['role']}",
unsafe_allow_html=True,
)
st.markdown("")
st.markdown(
"
"
"⚠️ Scores are model-relative. "
"Knowledge Novelty and Generation Utility reflect this document's value "
"to the specific models above. Scores will change when models are updated. "
"Always report scores alongside the model names and evaluation date."
"
",
unsafe_allow_html=True,
)
st.divider()
st.markdown("### Score Classifications")
for threshold, label, desc in KVS_CLASSIFICATION:
hi = threshold + 19 if threshold < 81 else 100
st.markdown(f"**{threshold}–{hi}** — {label}")
st.caption(desc)
st.divider()
st.markdown("### Metric Guide")
for key, dmeta in DIMENSION_META.items():
sens = dmeta["model_sensitivity"]
sc = SENSITIVITY_COLOR[sens]
with st.expander(f"{dmeta['name']} · {int(dmeta['weight']*100)}%"):
st.markdown(
f""
f"Model sensitivity: {sens}",
unsafe_allow_html=True,
)
st.markdown(dmeta["description"])
st.markdown(f"**How measured:** {dmeta['how_measured']}")
st.markdown(f"*{dmeta['sensitivity_note']}*")
st.markdown(f"**High score:** {dmeta['high_means']}")
st.markdown(f"**Low score:** {dmeta['low_means']}")
# ── Header ────────────────────────────────────────────────────────────────────
st.title("🔬 Knowledge Value Lab")
st.markdown(
"**Measuring the Marginal Value of Knowledge Assets for AI Systems** \n"
"Upload a Markdown document to receive a quantified Knowledge Value Score across five dimensions."
)
st.divider()
# ── Cached resources ──────────────────────────────────────────────────────────
@st.cache_resource(show_spinner="Loading embedding model (all-MiniLM-L6-v2)...")
def load_embedder():
from sentence_transformers import SentenceTransformer
return SentenceTransformer("all-MiniLM-L6-v2")
@st.cache_resource
def load_client():
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
st.error("ANTHROPIC_API_KEY not found. Add it to your .env file.")
st.stop()
return anthropic.Anthropic(api_key=api_key)
# ── Upload section ────────────────────────────────────────────────────────────
col_upload, col_preview = st.columns([1, 1], gap="large")
with col_upload:
st.markdown("### Upload Document")
uploaded = st.file_uploader(
"Choose a Markdown file",
type=["md"],
help="Upload a .md file to evaluate its knowledge value for AI systems.",
label_visibility="collapsed",
)
if uploaded:
md_text = uploaded.read().decode("utf-8")
doc = ingestor.parse(md_text)
st.success(
f"**{doc.title}** \n"
f"{doc.word_count:,} words · {len(doc.sections)} sections · {len(doc.chunks)} chunks"
)
run = st.button("▶ Evaluate Knowledge Value", type="primary", use_container_width=True)
else:
st.info("Drag and drop a `.md` file above, or click to browse.")
run = False
with col_preview:
st.markdown("### Document Preview")
if uploaded:
preview_text = md_text[:1200] + ("…" if len(md_text) > 1200 else "")
st.markdown(
f""
f"{preview_text}
",
unsafe_allow_html=True,
)
else:
st.markdown(
"No document uploaded yet
",
unsafe_allow_html=True,
)
# ── Evaluation pipeline ───────────────────────────────────────────────────────
if run and uploaded:
st.divider()
st.markdown("### Evaluation in Progress")
client = load_client()
embedder = load_embedder()
steps = [
"Module A: Knowledge Novelty",
"Module B: Retrieval Utility",
"Module C: Generation Utility",
"Module D: Attribution & Grounding",
"Module E: Demand Utility",
"Computing Knowledge Value Score",
]
progress_bar = st.progress(0)
step_statuses = {s: "pending" for s in steps}
_sub_msg = [""] # mutable cell so sub_progress can update it
step_placeholder = st.empty()
def render_steps(elapsed: int | None = None):
rows = []
for s, state in step_statuses.items():
is_running = state == "running"
icon = {"pending": "○", "running": "⟳", "done": "✓"}[state]
fg = {"pending": "#555", "running": "#e8f0fe", "done": "#4caf87"}[state]
bg = "background:#0d1f35;" if is_running else ""
bold = "font-weight:600;" if is_running else ""
sub = (
f"{_sub_msg[0]}
"
if is_running and _sub_msg[0] else ""
)
rows.append(
f""
f"{icon} {s}"
f"{sub}
"
)
footer = ""
if elapsed is not None:
footer = (
f""
f"✓ Evaluation complete in {elapsed}s
"
)
step_placeholder.markdown(
f""
+ "".join(rows) + footer +
"
",
unsafe_allow_html=True,
)
def sub_progress(msg: str):
_sub_msg[0] = msg
render_steps()
module_results = {}
eval_start = datetime.now()
t0 = time.time()
step_statuses[steps[0]] = "running"; render_steps(); progress_bar.progress(5)
module_results["novelty"] = novelty.evaluate(client, doc, progress_cb=sub_progress)
step_statuses[steps[0]] = "done"; progress_bar.progress(20)
step_statuses[steps[1]] = "running"; render_steps()
module_results["retrieval"] = retrieval.evaluate(client, doc, embedder, progress_cb=sub_progress)
step_statuses[steps[1]] = "done"; progress_bar.progress(40)
step_statuses[steps[2]] = "running"; render_steps()
module_results["generation"] = generation.evaluate(client, doc, progress_cb=sub_progress)
step_statuses[steps[2]] = "done"; progress_bar.progress(60)
step_statuses[steps[3]] = "running"; render_steps()
module_results["attribution"] = attribution.evaluate(
client, doc, module_results["generation"], embedder, progress_cb=sub_progress
)
step_statuses[steps[3]] = "done"; progress_bar.progress(80)
step_statuses[steps[4]] = "running"; render_steps()
module_results["demand"] = demand.evaluate(client, doc, progress_cb=sub_progress)
step_statuses[steps[4]] = "done"; progress_bar.progress(92)
step_statuses[steps[5]] = "running"; _sub_msg[0] = "Computing weighted Knowledge Value Score..."; render_steps()
dim_scores = {k: module_results[k]["score"] for k in module_results}
kvs_result = scorer.compute(dim_scores)
step_statuses[steps[5]] = "done"; progress_bar.progress(100)
elapsed = round(time.time() - t0)
_sub_msg[0] = ""
render_steps(elapsed=elapsed)
eval_date_str = eval_start.strftime("%Y-%m-%d %H:%M UTC")
meta = model_meta(eval_date_str)
# ── Results ───────────────────────────────────────────────────────────────
st.divider()
st.markdown("## Knowledge Value Report")
kvs = kvs_result["kvs"]
classification = kvs_result["classification"]
color_map = {
"Transformational Value": "#ffd166",
"High Value": "#06d6a0",
"Moderate Value": "#8ab4f8",
"Incremental Value": "#f8961e",
"Minimal Value": "#ef476f",
}
badge_color = color_map.get(classification, "#8ab4f8")
# KVS hero with model metadata
st.markdown(
f"""
{kvs}
Knowledge Value Score / 100
{classification}
Evaluated {eval_date_str}
Judge: {MODELS['judge']['display']} ·
Worker: {MODELS['worker']['display']} ·
Embeddings: {MODELS['embedder']['display']}
""",
unsafe_allow_html=True,
)
# Model-relativity warning
st.markdown(
""
"⚠️ Score validity: "
"Knowledge Novelty and Generation Utility are model-relative — "
"they reflect this document's marginal value to the models listed above. "
"Scores will change if the underlying models are updated or replaced. "
"Always report scores alongside model names and evaluation date."
"
",
unsafe_allow_html=True,
)
# ── Dimension breakdown ───────────────────────────────────────────────────
st.markdown("### Dimension Breakdown")
st.caption(
"Each dimension is scored 0–100 and weighted by its contribution to the overall KVS. "
"The sensitivity badge shows how much the score depends on the specific AI model used."
)
dims = [
("Knowledge Novelty", "novelty", 0.30),
("Retrieval Utility", "retrieval", 0.20),
("Generation Utility", "generation", 0.25),
("Attribution", "attribution", 0.15),
("Demand Utility", "demand", 0.10),
]
for label, key, weight in dims:
sc = dim_scores[key]
contrib = kvs_result["weighted_contributions"][key]
filled = round(sc / 100 * 20)
bar_str = "█" * filled + "░" * (20 - filled)
pct = int(weight * 100)
sens = DIMENSION_META[key]["model_sensitivity"]
sc_color = SENSITIVITY_COLOR[sens]
col1, col2 = st.columns([4, 1])
with col1:
st.markdown(
f"**{label}** "
f"sensitivity: {sens} \n"
f"`{bar_str}` **{sc}/100** "
f" ×{pct}% = {contrib} pts",
unsafe_allow_html=True,
)
with col2:
st.metric(label="score", value=str(sc), label_visibility="collapsed")
# ── Detailed analysis expanders ───────────────────────────────────────────
st.markdown("### Detailed Analysis")
# Module A
dmeta = DIMENSION_META["novelty"]
with st.expander(f"Module A — Knowledge Novelty · {dim_scores['novelty']}/100"):
sens = dmeta["model_sensitivity"]
sc = SENSITIVITY_COLOR[sens]
st.markdown(
f""
f"Model sensitivity: {sens}",
unsafe_allow_html=True,
)
st.markdown(f"**What this measures:** {dmeta['description']}")
st.markdown(f"**How it's measured:** {dmeta['how_measured']}")
st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}")
st.info(dmeta["sensitivity_note"])
st.divider()
st.markdown(f"**Result:** {module_results['novelty']['summary']}")
details = module_results["novelty"].get("details", [])
if details:
st.markdown("**Claim analysis** (🟢 novel · 🟡 partial · 🔴 already known):")
for d in details:
known_pct = round(d["known_score"] * 100)
icon = "🟢" if d["known_score"] < 0.4 else ("🟡" if d["known_score"] < 0.7 else "🔴")
st.markdown(
f"{icon} **{d['claim'][:130]}** \n"
f"*Known to model: {known_pct}% — {d['reason']}*"
)
# Module B
dmeta = DIMENSION_META["retrieval"]
with st.expander(f"Module B — Retrieval Utility · {dim_scores['retrieval']}/100"):
sens = dmeta["model_sensitivity"]
sc = SENSITIVITY_COLOR[sens]
st.markdown(
f""
f"Model sensitivity: {sens}",
unsafe_allow_html=True,
)
st.markdown(f"**What this measures:** {dmeta['description']}")
st.markdown(f"**How it's measured:** {dmeta['how_measured']}")
st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}")
st.info(dmeta["sensitivity_note"])
st.divider()
st.markdown(f"**Result:** {module_results['retrieval']['summary']}")
details = module_results["retrieval"].get("details", [])
if details:
st.caption(
"**Recall@3** — fraction of queries where the correct chunk appears in top 3 results (1.0 = perfect). \n"
"**MRR** — Mean Reciprocal Rank; how high the correct chunk ranks on average (1.0 = always first)."
)
st.table({
"Query": [d["query"] for d in details],
"Recall@3": [f"{d['recall_at_3']:.2f}" for d in details],
"MRR": [f"{d['reciprocal_rank']:.2f}" for d in details],
})
# Module C
dmeta = DIMENSION_META["generation"]
with st.expander(f"Module C — Generation Utility · {dim_scores['generation']}/100"):
sens = dmeta["model_sensitivity"]
sc = SENSITIVITY_COLOR[sens]
st.markdown(
f""
f"Model sensitivity: {sens}",
unsafe_allow_html=True,
)
st.markdown(f"**What this measures:** {dmeta['description']}")
st.markdown(f"**How it's measured:** {dmeta['how_measured']}")
st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}")
st.info(dmeta["sensitivity_note"])
st.divider()
st.markdown(f"**Result:** {module_results['generation']['summary']}")
for d in module_results["generation"].get("details", []):
st.markdown(f"**Q: {d['question']}**")
c1, c2 = st.columns(2)
with c1:
st.markdown("*Baseline — no document:*")
st.markdown(f"> {d['baseline_answer'][:350]}")
with c2:
st.markdown("*RAG — with document:*")
st.markdown(f"> {d['rag_answer'][:350]}")
st.caption(
f"Improvement: **{d['improvement']}/100** | "
f"Accuracy: {d['accuracy']}/5 | "
f"Completeness: {d['completeness']}/5 | "
f"Specificity: {d['specificity']}/5 \n{d['reason']}"
)
st.divider()
# Module D
dmeta = DIMENSION_META["attribution"]
with st.expander(f"Module D — Attribution & Grounding · {dim_scores['attribution']}/100"):
sens = dmeta["model_sensitivity"]
sc = SENSITIVITY_COLOR[sens]
st.markdown(
f""
f"Model sensitivity: {sens}",
unsafe_allow_html=True,
)
st.markdown(f"**What this measures:** {dmeta['description']}")
st.markdown(f"**How it's measured:** {dmeta['how_measured']}")
st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}")
st.info(dmeta["sensitivity_note"])
st.divider()
st.markdown(f"**Result:** {module_results['attribution']['summary']}")
for d in module_results["attribution"].get("details", []):
halluc = "⚠️ Hallucination detected" if d.get("hallucination_detected") else "✓ No hallucination"
st.markdown(
f"**Q: {d['question'][:110]}** \n"
f"Grounding: **{round(d['grounding_fraction']*100)}%** | "
f"Semantic similarity: {d['semantic_similarity']} | {halluc}"
)
if d.get("ungrounded_claims"):
st.caption("Ungrounded claims: " + "; ".join(d["ungrounded_claims"][:3]))
if d.get("reason"):
st.caption(d["reason"])
# Module E
dmeta = DIMENSION_META["demand"]
with st.expander(f"Module E — Demand Utility · {dim_scores['demand']}/100"):
sens = dmeta["model_sensitivity"]
sc = SENSITIVITY_COLOR[sens]
st.markdown(
f""
f"Model sensitivity: {sens}",
unsafe_allow_html=True,
)
st.markdown(f"**What this measures:** {dmeta['description']}")
st.markdown(f"**How it's measured:** {dmeta['how_measured']}")
st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}")
st.info(dmeta["sensitivity_note"])
st.divider()
st.markdown(f"**Result:** {module_results['demand']['summary']}")
topics = module_results["demand"].get("topics", [])
if topics:
st.caption(
"**Query Freq** — estimated user query frequency for this topic (1 = rare, 10 = very common). \n"
"**Priority Domain** — whether this is a high-impact sector (health, climate, food, policy, etc.). \n"
"**Unmet Need** — whether existing AI models fall short in covering this topic."
)
st.table({
"Topic": [t.get("topic", "") for t in topics],
"Query Freq (1-10)": [t.get("query_frequency", "-") for t in topics],
"Priority Domain": ["Yes" if t.get("priority_domain") else "No" for t in topics],
"Unmet Need": ["Yes" if t.get("unmet_need") else "No" for t in topics],
"Rationale": [t.get("rationale", "")[:80] for t in topics],
})
# ── Recommendations ───────────────────────────────────────────────────────
st.markdown("### Recommended Actions")
for rec in kvs_result["recommendations"]:
st.markdown(f"- {rec}")
# ── Download ──────────────────────────────────────────────────────────────
st.divider()
report_md = report.generate(doc.title, kvs_result, module_results, meta)
st.download_button(
label="⬇ Download Full Report (Markdown)",
data=report_md,
file_name=f"kvl_report_{doc.title[:40].replace(' ', '_')}.md",
mime="text/markdown",
use_container_width=True,
)
elif not uploaded:
st.info("Upload a `.md` file above to begin evaluation.")