Spaces:

CGIAR
/

knowledge-value-lab

Sleeping

App Files Files Community

knowledge-value-lab / app.py

feedcomposer

Upload app.py with huggingface_hub

7f6792b verified 28 days ago

Raw

History Blame Contribute Delete

23.4 kB

	"""Knowledge Value Lab — Streamlit prototype."""

	from __future__ import annotations
	import os
	import time
	import anthropic
	import streamlit as st
	from dotenv import load_dotenv
	from datetime import datetime

	load_dotenv()

	from kvl import ingestor, scorer, report
	from kvl.modules import novelty, retrieval, generation, attribution, demand
	from kvl.config import DIMENSION_META, KVS_CLASSIFICATION, MODELS, SENSITIVITY_COLOR, model_meta

	# ── Page config ───────────────────────────────────────────────────────────────
	st.set_page_config(
	page_title="Knowledge Value Lab",
	page_icon="🔬",
	layout="wide",
	)

	# ── CSS ───────────────────────────────────────────────────────────────────────
	st.markdown("""
	<style>
	.kvs-box {
	background: linear-gradient(135deg, #1e3a5f 0%, #0d2137 100%);
	border-radius: 12px;
	padding: 28px 36px;
	text-align: center;
	margin-bottom: 8px;
	}
	.kvs-number { font-size: 64px; font-weight: 800; color: #f0f4ff; line-height: 1; }
	.kvs-label { font-size: 16px; color: #8ab4f8; margin-top: 4px; }
	.kvs-class { font-size: 22px; font-weight: 600; margin-top: 8px; }
	.kvs-meta { font-size: 12px; color: #556; margin-top: 10px; font-family: monospace; }
	.model-chip {
	display: inline-block;
	background: #1a2740;
	border: 1px solid #2a4060;
	border-radius: 4px;
	padding: 2px 8px;
	font-size: 11px;
	font-family: monospace;
	color: #8ab4f8;
	margin: 2px;
	}
	.sens-badge {
	display: inline-block;
	border-radius: 4px;
	padding: 1px 7px;
	font-size: 11px;
	font-weight: 600;
	}
	.warn-box {
	background: #1a1500;
	border-left: 3px solid #f8961e;
	border-radius: 4px;
	padding: 10px 14px;
	font-size: 13px;
	margin: 8px 0 16px 0;
	color: #fff;
	}
	</style>
	""", unsafe_allow_html=True)


	# ── Sidebar ───────────────────────────────────────────────────────────────────
	with st.sidebar:
	st.markdown("## 🔬 Knowledge Value Lab")
	st.markdown(
	"KVL measures the marginal value of a knowledge document to an AI system "
	"across five independent dimensions, producing a single weighted Knowledge Value Score (KVS)."
	)

	st.divider()
	st.markdown("### Models Used")
	for key, m in MODELS.items():
	st.markdown(
	f"<span class='model-chip'>{m['display']}</span> \n"
	f"<span style='font-size:11px;color:#888;'>{m['role']}</span>",
	unsafe_allow_html=True,
	)
	st.markdown("")

	st.markdown(
	"<div class='warn-box'>"
	"⚠️ <strong>Scores are model-relative.</strong> "
	"Knowledge Novelty and Generation Utility reflect this document's value "
	"to the <em>specific models above</em>. Scores will change when models are updated. "
	"Always report scores alongside the model names and evaluation date."
	"</div>",
	unsafe_allow_html=True,
	)

	st.divider()
	st.markdown("### Score Classifications")
	for threshold, label, desc in KVS_CLASSIFICATION:
	hi = threshold + 19 if threshold < 81 else 100
	st.markdown(f"{threshold}–{hi} — {label}")
	st.caption(desc)

	st.divider()
	st.markdown("### Metric Guide")
	for key, dmeta in DIMENSION_META.items():
	sens = dmeta["model_sensitivity"]
	sc = SENSITIVITY_COLOR[sens]
	with st.expander(f"{dmeta['name']} · {int(dmeta['weight']*100)}%"):
	st.markdown(
	f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>"
	f"Model sensitivity: {sens}</span>",
	unsafe_allow_html=True,
	)
	st.markdown(dmeta["description"])
	st.markdown(f"How measured: {dmeta['how_measured']}")
	st.markdown(f"{dmeta['sensitivity_note']}")
	st.markdown(f"High score: {dmeta['high_means']}")
	st.markdown(f"Low score: {dmeta['low_means']}")


	# ── Header ────────────────────────────────────────────────────────────────────
	st.title("🔬 Knowledge Value Lab")
	st.markdown(
	"Measuring the Marginal Value of Knowledge Assets for AI Systems \n"
	"Upload a Markdown document to receive a quantified Knowledge Value Score across five dimensions."
	)
	st.divider()


	# ── Cached resources ──────────────────────────────────────────────────────────
	@st.cache_resource(show_spinner="Loading embedding model (all-MiniLM-L6-v2)...")
	def load_embedder():
	from sentence_transformers import SentenceTransformer
	return SentenceTransformer("all-MiniLM-L6-v2")


	@st.cache_resource
	def load_client():
	api_key = os.getenv("ANTHROPIC_API_KEY")
	if not api_key:
	st.error("ANTHROPIC_API_KEY not found. Add it to your .env file.")
	st.stop()
	return anthropic.Anthropic(api_key=api_key)


	# ── Upload section ────────────────────────────────────────────────────────────
	col_upload, col_preview = st.columns([1, 1], gap="large")

	with col_upload:
	st.markdown("### Upload Document")
	uploaded = st.file_uploader(
	"Choose a Markdown file",
	type=["md"],
	help="Upload a .md file to evaluate its knowledge value for AI systems.",
	label_visibility="collapsed",
	)

	if uploaded:
	md_text = uploaded.read().decode("utf-8")
	doc = ingestor.parse(md_text)
	st.success(
	f"{doc.title} \n"
	f"{doc.word_count:,} words · {len(doc.sections)} sections · {len(doc.chunks)} chunks"
	)
	run = st.button("▶ Evaluate Knowledge Value", type="primary", use_container_width=True)
	else:
	st.info("Drag and drop a `.md` file above, or click to browse.")
	run = False

	with col_preview:
	st.markdown("### Document Preview")
	if uploaded:
	preview_text = md_text[:1200] + ("…" if len(md_text) > 1200 else "")
	st.markdown(
	f"<div style='background:#0e1117;border:1px solid #2a2a3a;border-radius:8px;"
	f"padding:16px;font-size:13px;max-height:280px;overflow:auto;white-space:pre-wrap;color:#fff;'>"
	f"{preview_text}</div>",
	unsafe_allow_html=True,
	)
	else:
	st.markdown(
	"<div style='background:#0e1117;border:1px solid #2a2a3a;border-radius:8px;"
	"padding:40px;text-align:center;color:#555;'>No document uploaded yet</div>",
	unsafe_allow_html=True,
	)


	# ── Evaluation pipeline ───────────────────────────────────────────────────────
	if run and uploaded:
	st.divider()
	st.markdown("### Evaluation in Progress")

	client = load_client()
	embedder = load_embedder()

	steps = [
	"Module A: Knowledge Novelty",
	"Module B: Retrieval Utility",
	"Module C: Generation Utility",
	"Module D: Attribution & Grounding",
	"Module E: Demand Utility",
	"Computing Knowledge Value Score",
	]
	progress_bar = st.progress(0)
	step_statuses = {s: "pending" for s in steps}
	_sub_msg = [""] # mutable cell so sub_progress can update it
	step_placeholder = st.empty()

	def render_steps(elapsed: int \| None = None):
	rows = []
	for s, state in step_statuses.items():
	is_running = state == "running"
	icon = {"pending": "○", "running": "⟳", "done": "✓"}[state]
	fg = {"pending": "#555", "running": "#e8f0fe", "done": "#4caf87"}[state]
	bg = "background:#0d1f35;" if is_running else ""
	bold = "font-weight:600;" if is_running else ""
	sub = (
	f"<div style='font-size:12px;color:#8ab4f8;margin:3px 0 0 22px;'>{_sub_msg[0]}</div>"
	if is_running and _sub_msg[0] else ""
	)
	rows.append(
	f"<div style='padding:7px 16px;{bg}border-bottom:1px solid #1a1a2e;'>"
	f"<span style='font-family:monospace;color:{fg};{bold}'>{icon}  {s}</span>"
	f"{sub}</div>"
	)

	footer = ""
	if elapsed is not None:
	footer = (
	f"<div style='padding:7px 16px;font-size:12px;color:#4caf87;'>"
	f"✓ Evaluation complete in {elapsed}s</div>"
	)

	step_placeholder.markdown(
	f"<div style='border:1px solid #2a2a3a;border-radius:8px;overflow:hidden;'>"
	+ "".join(rows) + footer +
	"</div>",
	unsafe_allow_html=True,
	)

	def sub_progress(msg: str):
	_sub_msg[0] = msg
	render_steps()

	module_results = {}
	eval_start = datetime.now()
	t0 = time.time()

	step_statuses[steps[0]] = "running"; render_steps(); progress_bar.progress(5)
	module_results["novelty"] = novelty.evaluate(client, doc, progress_cb=sub_progress)
	step_statuses[steps[0]] = "done"; progress_bar.progress(20)

	step_statuses[steps[1]] = "running"; render_steps()
	module_results["retrieval"] = retrieval.evaluate(client, doc, embedder, progress_cb=sub_progress)
	step_statuses[steps[1]] = "done"; progress_bar.progress(40)

	step_statuses[steps[2]] = "running"; render_steps()
	module_results["generation"] = generation.evaluate(client, doc, progress_cb=sub_progress)
	step_statuses[steps[2]] = "done"; progress_bar.progress(60)

	step_statuses[steps[3]] = "running"; render_steps()
	module_results["attribution"] = attribution.evaluate(
	client, doc, module_results["generation"], embedder, progress_cb=sub_progress
	)
	step_statuses[steps[3]] = "done"; progress_bar.progress(80)

	step_statuses[steps[4]] = "running"; render_steps()
	module_results["demand"] = demand.evaluate(client, doc, progress_cb=sub_progress)
	step_statuses[steps[4]] = "done"; progress_bar.progress(92)

	step_statuses[steps[5]] = "running"; _sub_msg[0] = "Computing weighted Knowledge Value Score..."; render_steps()
	dim_scores = {k: module_results[k]["score"] for k in module_results}
	kvs_result = scorer.compute(dim_scores)
	step_statuses[steps[5]] = "done"; progress_bar.progress(100)

	elapsed = round(time.time() - t0)
	_sub_msg[0] = ""
	render_steps(elapsed=elapsed)
	eval_date_str = eval_start.strftime("%Y-%m-%d %H:%M UTC")
	meta = model_meta(eval_date_str)

	# ── Results ───────────────────────────────────────────────────────────────
	st.divider()
	st.markdown("## Knowledge Value Report")

	kvs = kvs_result["kvs"]
	classification = kvs_result["classification"]
	color_map = {
	"Transformational Value": "#ffd166",
	"High Value": "#06d6a0",
	"Moderate Value": "#8ab4f8",
	"Incremental Value": "#f8961e",
	"Minimal Value": "#ef476f",
	}
	badge_color = color_map.get(classification, "#8ab4f8")

	# KVS hero with model metadata
	st.markdown(
	f"""<div class="kvs-box">
	<div class="kvs-number">{kvs}</div>
	<div class="kvs-label">Knowledge Value Score / 100</div>
	<div class="kvs-class" style="color:{badge_color};">{classification}</div>
	<div class="kvs-meta">
	Evaluated {eval_date_str}<br>
	Judge: {MODELS['judge']['display']}  ·
	Worker: {MODELS['worker']['display']}  ·
	Embeddings: {MODELS['embedder']['display']}
	</div>
	</div>""",
	unsafe_allow_html=True,
	)

	# Model-relativity warning
	st.markdown(
	"<div class='warn-box'>"
	"⚠️ <strong>Score validity:</strong> "
	"Knowledge Novelty and Generation Utility are <strong>model-relative</strong> — "
	"they reflect this document's marginal value to the models listed above. "
	"Scores will change if the underlying models are updated or replaced. "
	"Always report scores alongside model names and evaluation date."
	"</div>",
	unsafe_allow_html=True,
	)

	# ── Dimension breakdown ───────────────────────────────────────────────────
	st.markdown("### Dimension Breakdown")
	st.caption(
	"Each dimension is scored 0–100 and weighted by its contribution to the overall KVS. "
	"The sensitivity badge shows how much the score depends on the specific AI model used."
	)

	dims = [
	("Knowledge Novelty", "novelty", 0.30),
	("Retrieval Utility", "retrieval", 0.20),
	("Generation Utility", "generation", 0.25),
	("Attribution", "attribution", 0.15),
	("Demand Utility", "demand", 0.10),
	]

	for label, key, weight in dims:
	sc = dim_scores[key]
	contrib = kvs_result["weighted_contributions"][key]
	filled = round(sc / 100 * 20)
	bar_str = "█" * filled + "░" * (20 - filled)
	pct = int(weight * 100)
	sens = DIMENSION_META[key]["model_sensitivity"]
	sc_color = SENSITIVITY_COLOR[sens]

	col1, col2 = st.columns([4, 1])
	with col1:
	st.markdown(
	f"{label}  "
	f"<span class='sens-badge' style='background:{sc_color}22;color:{sc_color};"
	f"border:1px solid {sc_color}55;'>sensitivity: {sens}</span> \n"
	f"`{bar_str}`   {sc}/100 "
	f"<span style='color:#888;font-size:13px;'> ×{pct}% = {contrib} pts</span>",
	unsafe_allow_html=True,
	)
	with col2:
	st.metric(label="score", value=str(sc), label_visibility="collapsed")

	# ── Detailed analysis expanders ───────────────────────────────────────────
	st.markdown("### Detailed Analysis")

	# Module A
	dmeta = DIMENSION_META["novelty"]
	with st.expander(f"Module A — Knowledge Novelty · {dim_scores['novelty']}/100"):
	sens = dmeta["model_sensitivity"]
	sc = SENSITIVITY_COLOR[sens]
	st.markdown(
	f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>"
	f"Model sensitivity: {sens}</span>",
	unsafe_allow_html=True,
	)
	st.markdown(f"What this measures: {dmeta['description']}")
	st.markdown(f"How it's measured: {dmeta['how_measured']}")
	st.markdown(f"Models used: {', '.join(dmeta['models_used'])}")
	st.info(dmeta["sensitivity_note"])
	st.divider()
	st.markdown(f"Result: {module_results['novelty']['summary']}")
	details = module_results["novelty"].get("details", [])
	if details:
	st.markdown("Claim analysis (🟢 novel · 🟡 partial · 🔴 already known):")
	for d in details:
	known_pct = round(d["known_score"] * 100)
	icon = "🟢" if d["known_score"] < 0.4 else ("🟡" if d["known_score"] < 0.7 else "🔴")
	st.markdown(
	f"{icon} {d['claim'][:130]} \n"
	f"Known to model: {known_pct}% — {d['reason']}"
	)

	# Module B
	dmeta = DIMENSION_META["retrieval"]
	with st.expander(f"Module B — Retrieval Utility · {dim_scores['retrieval']}/100"):
	sens = dmeta["model_sensitivity"]
	sc = SENSITIVITY_COLOR[sens]
	st.markdown(
	f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>"
	f"Model sensitivity: {sens}</span>",
	unsafe_allow_html=True,
	)
	st.markdown(f"What this measures: {dmeta['description']}")
	st.markdown(f"How it's measured: {dmeta['how_measured']}")
	st.markdown(f"Models used: {', '.join(dmeta['models_used'])}")
	st.info(dmeta["sensitivity_note"])
	st.divider()
	st.markdown(f"Result: {module_results['retrieval']['summary']}")
	details = module_results["retrieval"].get("details", [])
	if details:
	st.caption(
	"Recall@3 — fraction of queries where the correct chunk appears in top 3 results (1.0 = perfect). \n"
	"MRR — Mean Reciprocal Rank; how high the correct chunk ranks on average (1.0 = always first)."
	)
	st.table({
	"Query": [d["query"] for d in details],
	"Recall@3": [f"{d['recall_at_3']:.2f}" for d in details],
	"MRR": [f"{d['reciprocal_rank']:.2f}" for d in details],
	})

	# Module C
	dmeta = DIMENSION_META["generation"]
	with st.expander(f"Module C — Generation Utility · {dim_scores['generation']}/100"):
	sens = dmeta["model_sensitivity"]
	sc = SENSITIVITY_COLOR[sens]
	st.markdown(
	f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>"
	f"Model sensitivity: {sens}</span>",
	unsafe_allow_html=True,
	)
	st.markdown(f"What this measures: {dmeta['description']}")
	st.markdown(f"How it's measured: {dmeta['how_measured']}")
	st.markdown(f"Models used: {', '.join(dmeta['models_used'])}")
	st.info(dmeta["sensitivity_note"])
	st.divider()
	st.markdown(f"Result: {module_results['generation']['summary']}")
	for d in module_results["generation"].get("details", []):
	st.markdown(f"Q: {d['question']}")
	c1, c2 = st.columns(2)
	with c1:
	st.markdown("Baseline — no document:")
	st.markdown(f"> {d['baseline_answer'][:350]}")
	with c2:
	st.markdown("RAG — with document:")
	st.markdown(f"> {d['rag_answer'][:350]}")
	st.caption(
	f"Improvement: {d['improvement']}/100  \|  "
	f"Accuracy: {d['accuracy']}/5  \|  "
	f"Completeness: {d['completeness']}/5  \|  "
	f"Specificity: {d['specificity']}/5 \n{d['reason']}"
	)
	st.divider()

	# Module D
	dmeta = DIMENSION_META["attribution"]
	with st.expander(f"Module D — Attribution & Grounding · {dim_scores['attribution']}/100"):
	sens = dmeta["model_sensitivity"]
	sc = SENSITIVITY_COLOR[sens]
	st.markdown(
	f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>"
	f"Model sensitivity: {sens}</span>",
	unsafe_allow_html=True,
	)
	st.markdown(f"What this measures: {dmeta['description']}")
	st.markdown(f"How it's measured: {dmeta['how_measured']}")
	st.markdown(f"Models used: {', '.join(dmeta['models_used'])}")
	st.info(dmeta["sensitivity_note"])
	st.divider()
	st.markdown(f"Result: {module_results['attribution']['summary']}")
	for d in module_results["attribution"].get("details", []):
	halluc = "⚠️ Hallucination detected" if d.get("hallucination_detected") else "✓ No hallucination"
	st.markdown(
	f"Q: {d['question'][:110]} \n"
	f"Grounding: *{round(d['grounding_fraction']100)}%**  \|  "
	f"Semantic similarity: {d['semantic_similarity']}  \|  {halluc}"
	)
	if d.get("ungrounded_claims"):
	st.caption("Ungrounded claims: " + "; ".join(d["ungrounded_claims"][:3]))
	if d.get("reason"):
	st.caption(d["reason"])

	# Module E
	dmeta = DIMENSION_META["demand"]
	with st.expander(f"Module E — Demand Utility · {dim_scores['demand']}/100"):
	sens = dmeta["model_sensitivity"]
	sc = SENSITIVITY_COLOR[sens]
	st.markdown(
	f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>"
	f"Model sensitivity: {sens}</span>",
	unsafe_allow_html=True,
	)
	st.markdown(f"What this measures: {dmeta['description']}")
	st.markdown(f"How it's measured: {dmeta['how_measured']}")
	st.markdown(f"Models used: {', '.join(dmeta['models_used'])}")
	st.info(dmeta["sensitivity_note"])
	st.divider()
	st.markdown(f"Result: {module_results['demand']['summary']}")
	topics = module_results["demand"].get("topics", [])
	if topics:
	st.caption(
	"Query Freq — estimated user query frequency for this topic (1 = rare, 10 = very common). \n"
	"Priority Domain — whether this is a high-impact sector (health, climate, food, policy, etc.). \n"
	"Unmet Need — whether existing AI models fall short in covering this topic."
	)
	st.table({
	"Topic": [t.get("topic", "") for t in topics],
	"Query Freq (1-10)": [t.get("query_frequency", "-") for t in topics],
	"Priority Domain": ["Yes" if t.get("priority_domain") else "No" for t in topics],
	"Unmet Need": ["Yes" if t.get("unmet_need") else "No" for t in topics],
	"Rationale": [t.get("rationale", "")[:80] for t in topics],
	})

	# ── Recommendations ───────────────────────────────────────────────────────
	st.markdown("### Recommended Actions")
	for rec in kvs_result["recommendations"]:
	st.markdown(f"- {rec}")

	# ── Download ──────────────────────────────────────────────────────────────
	st.divider()
	report_md = report.generate(doc.title, kvs_result, module_results, meta)
	st.download_button(
	label="⬇ Download Full Report (Markdown)",
	data=report_md,
	file_name=f"kvl_report_{doc.title[:40].replace(' ', '_')}.md",
	mime="text/markdown",
	use_container_width=True,
	)

	elif not uploaded:
	st.info("Upload a `.md` file above to begin evaluation.")