feedcomposer's picture
Upload app.py with huggingface_hub
7f6792b verified
Raw
History Blame Contribute Delete
23.4 kB
"""Knowledge Value Lab β€” Streamlit prototype."""
from __future__ import annotations
import os
import time
import anthropic
import streamlit as st
from dotenv import load_dotenv
from datetime import datetime
load_dotenv()
from kvl import ingestor, scorer, report
from kvl.modules import novelty, retrieval, generation, attribution, demand
from kvl.config import DIMENSION_META, KVS_CLASSIFICATION, MODELS, SENSITIVITY_COLOR, model_meta
# ── Page config ───────────────────────────────────────────────────────────────
st.set_page_config(
page_title="Knowledge Value Lab",
page_icon="πŸ”¬",
layout="wide",
)
# ── CSS ───────────────────────────────────────────────────────────────────────
st.markdown("""
<style>
.kvs-box {
background: linear-gradient(135deg, #1e3a5f 0%, #0d2137 100%);
border-radius: 12px;
padding: 28px 36px;
text-align: center;
margin-bottom: 8px;
}
.kvs-number { font-size: 64px; font-weight: 800; color: #f0f4ff; line-height: 1; }
.kvs-label { font-size: 16px; color: #8ab4f8; margin-top: 4px; }
.kvs-class { font-size: 22px; font-weight: 600; margin-top: 8px; }
.kvs-meta { font-size: 12px; color: #556; margin-top: 10px; font-family: monospace; }
.model-chip {
display: inline-block;
background: #1a2740;
border: 1px solid #2a4060;
border-radius: 4px;
padding: 2px 8px;
font-size: 11px;
font-family: monospace;
color: #8ab4f8;
margin: 2px;
}
.sens-badge {
display: inline-block;
border-radius: 4px;
padding: 1px 7px;
font-size: 11px;
font-weight: 600;
}
.warn-box {
background: #1a1500;
border-left: 3px solid #f8961e;
border-radius: 4px;
padding: 10px 14px;
font-size: 13px;
margin: 8px 0 16px 0;
color: #fff;
}
</style>
""", unsafe_allow_html=True)
# ── Sidebar ───────────────────────────────────────────────────────────────────
with st.sidebar:
st.markdown("## πŸ”¬ Knowledge Value Lab")
st.markdown(
"KVL measures the **marginal value** of a knowledge document to an AI system "
"across five independent dimensions, producing a single weighted **Knowledge Value Score (KVS)**."
)
st.divider()
st.markdown("### Models Used")
for key, m in MODELS.items():
st.markdown(
f"<span class='model-chip'>{m['display']}</span> \n"
f"<span style='font-size:11px;color:#888;'>{m['role']}</span>",
unsafe_allow_html=True,
)
st.markdown("")
st.markdown(
"<div class='warn-box'>"
"⚠️ <strong>Scores are model-relative.</strong> "
"Knowledge Novelty and Generation Utility reflect this document's value "
"to the <em>specific models above</em>. Scores will change when models are updated. "
"Always report scores alongside the model names and evaluation date."
"</div>",
unsafe_allow_html=True,
)
st.divider()
st.markdown("### Score Classifications")
for threshold, label, desc in KVS_CLASSIFICATION:
hi = threshold + 19 if threshold < 81 else 100
st.markdown(f"**{threshold}–{hi}** β€” {label}")
st.caption(desc)
st.divider()
st.markdown("### Metric Guide")
for key, dmeta in DIMENSION_META.items():
sens = dmeta["model_sensitivity"]
sc = SENSITIVITY_COLOR[sens]
with st.expander(f"{dmeta['name']} Β· {int(dmeta['weight']*100)}%"):
st.markdown(
f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>"
f"Model sensitivity: {sens}</span>",
unsafe_allow_html=True,
)
st.markdown(dmeta["description"])
st.markdown(f"**How measured:** {dmeta['how_measured']}")
st.markdown(f"*{dmeta['sensitivity_note']}*")
st.markdown(f"**High score:** {dmeta['high_means']}")
st.markdown(f"**Low score:** {dmeta['low_means']}")
# ── Header ────────────────────────────────────────────────────────────────────
st.title("πŸ”¬ Knowledge Value Lab")
st.markdown(
"**Measuring the Marginal Value of Knowledge Assets for AI Systems** \n"
"Upload a Markdown document to receive a quantified Knowledge Value Score across five dimensions."
)
st.divider()
# ── Cached resources ──────────────────────────────────────────────────────────
@st.cache_resource(show_spinner="Loading embedding model (all-MiniLM-L6-v2)...")
def load_embedder():
from sentence_transformers import SentenceTransformer
return SentenceTransformer("all-MiniLM-L6-v2")
@st.cache_resource
def load_client():
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
st.error("ANTHROPIC_API_KEY not found. Add it to your .env file.")
st.stop()
return anthropic.Anthropic(api_key=api_key)
# ── Upload section ────────────────────────────────────────────────────────────
col_upload, col_preview = st.columns([1, 1], gap="large")
with col_upload:
st.markdown("### Upload Document")
uploaded = st.file_uploader(
"Choose a Markdown file",
type=["md"],
help="Upload a .md file to evaluate its knowledge value for AI systems.",
label_visibility="collapsed",
)
if uploaded:
md_text = uploaded.read().decode("utf-8")
doc = ingestor.parse(md_text)
st.success(
f"**{doc.title}** \n"
f"{doc.word_count:,} words Β· {len(doc.sections)} sections Β· {len(doc.chunks)} chunks"
)
run = st.button("β–Ά Evaluate Knowledge Value", type="primary", use_container_width=True)
else:
st.info("Drag and drop a `.md` file above, or click to browse.")
run = False
with col_preview:
st.markdown("### Document Preview")
if uploaded:
preview_text = md_text[:1200] + ("…" if len(md_text) > 1200 else "")
st.markdown(
f"<div style='background:#0e1117;border:1px solid #2a2a3a;border-radius:8px;"
f"padding:16px;font-size:13px;max-height:280px;overflow:auto;white-space:pre-wrap;color:#fff;'>"
f"{preview_text}</div>",
unsafe_allow_html=True,
)
else:
st.markdown(
"<div style='background:#0e1117;border:1px solid #2a2a3a;border-radius:8px;"
"padding:40px;text-align:center;color:#555;'>No document uploaded yet</div>",
unsafe_allow_html=True,
)
# ── Evaluation pipeline ───────────────────────────────────────────────────────
if run and uploaded:
st.divider()
st.markdown("### Evaluation in Progress")
client = load_client()
embedder = load_embedder()
steps = [
"Module A: Knowledge Novelty",
"Module B: Retrieval Utility",
"Module C: Generation Utility",
"Module D: Attribution & Grounding",
"Module E: Demand Utility",
"Computing Knowledge Value Score",
]
progress_bar = st.progress(0)
step_statuses = {s: "pending" for s in steps}
_sub_msg = [""] # mutable cell so sub_progress can update it
step_placeholder = st.empty()
def render_steps(elapsed: int | None = None):
rows = []
for s, state in step_statuses.items():
is_running = state == "running"
icon = {"pending": "β—‹", "running": "⟳", "done": "βœ“"}[state]
fg = {"pending": "#555", "running": "#e8f0fe", "done": "#4caf87"}[state]
bg = "background:#0d1f35;" if is_running else ""
bold = "font-weight:600;" if is_running else ""
sub = (
f"<div style='font-size:12px;color:#8ab4f8;margin:3px 0 0 22px;'>{_sub_msg[0]}</div>"
if is_running and _sub_msg[0] else ""
)
rows.append(
f"<div style='padding:7px 16px;{bg}border-bottom:1px solid #1a1a2e;'>"
f"<span style='font-family:monospace;color:{fg};{bold}'>{icon}&nbsp;&nbsp;{s}</span>"
f"{sub}</div>"
)
footer = ""
if elapsed is not None:
footer = (
f"<div style='padding:7px 16px;font-size:12px;color:#4caf87;'>"
f"βœ“ Evaluation complete in {elapsed}s</div>"
)
step_placeholder.markdown(
f"<div style='border:1px solid #2a2a3a;border-radius:8px;overflow:hidden;'>"
+ "".join(rows) + footer +
"</div>",
unsafe_allow_html=True,
)
def sub_progress(msg: str):
_sub_msg[0] = msg
render_steps()
module_results = {}
eval_start = datetime.now()
t0 = time.time()
step_statuses[steps[0]] = "running"; render_steps(); progress_bar.progress(5)
module_results["novelty"] = novelty.evaluate(client, doc, progress_cb=sub_progress)
step_statuses[steps[0]] = "done"; progress_bar.progress(20)
step_statuses[steps[1]] = "running"; render_steps()
module_results["retrieval"] = retrieval.evaluate(client, doc, embedder, progress_cb=sub_progress)
step_statuses[steps[1]] = "done"; progress_bar.progress(40)
step_statuses[steps[2]] = "running"; render_steps()
module_results["generation"] = generation.evaluate(client, doc, progress_cb=sub_progress)
step_statuses[steps[2]] = "done"; progress_bar.progress(60)
step_statuses[steps[3]] = "running"; render_steps()
module_results["attribution"] = attribution.evaluate(
client, doc, module_results["generation"], embedder, progress_cb=sub_progress
)
step_statuses[steps[3]] = "done"; progress_bar.progress(80)
step_statuses[steps[4]] = "running"; render_steps()
module_results["demand"] = demand.evaluate(client, doc, progress_cb=sub_progress)
step_statuses[steps[4]] = "done"; progress_bar.progress(92)
step_statuses[steps[5]] = "running"; _sub_msg[0] = "Computing weighted Knowledge Value Score..."; render_steps()
dim_scores = {k: module_results[k]["score"] for k in module_results}
kvs_result = scorer.compute(dim_scores)
step_statuses[steps[5]] = "done"; progress_bar.progress(100)
elapsed = round(time.time() - t0)
_sub_msg[0] = ""
render_steps(elapsed=elapsed)
eval_date_str = eval_start.strftime("%Y-%m-%d %H:%M UTC")
meta = model_meta(eval_date_str)
# ── Results ───────────────────────────────────────────────────────────────
st.divider()
st.markdown("## Knowledge Value Report")
kvs = kvs_result["kvs"]
classification = kvs_result["classification"]
color_map = {
"Transformational Value": "#ffd166",
"High Value": "#06d6a0",
"Moderate Value": "#8ab4f8",
"Incremental Value": "#f8961e",
"Minimal Value": "#ef476f",
}
badge_color = color_map.get(classification, "#8ab4f8")
# KVS hero with model metadata
st.markdown(
f"""<div class="kvs-box">
<div class="kvs-number">{kvs}</div>
<div class="kvs-label">Knowledge Value Score / 100</div>
<div class="kvs-class" style="color:{badge_color};">{classification}</div>
<div class="kvs-meta">
Evaluated {eval_date_str}<br>
Judge: {MODELS['judge']['display']} &nbsp;Β·&nbsp;
Worker: {MODELS['worker']['display']} &nbsp;Β·&nbsp;
Embeddings: {MODELS['embedder']['display']}
</div>
</div>""",
unsafe_allow_html=True,
)
# Model-relativity warning
st.markdown(
"<div class='warn-box'>"
"⚠️ <strong>Score validity:</strong> "
"Knowledge Novelty and Generation Utility are <strong>model-relative</strong> β€” "
"they reflect this document's marginal value to the models listed above. "
"Scores will change if the underlying models are updated or replaced. "
"Always report scores alongside model names and evaluation date."
"</div>",
unsafe_allow_html=True,
)
# ── Dimension breakdown ───────────────────────────────────────────────────
st.markdown("### Dimension Breakdown")
st.caption(
"Each dimension is scored 0–100 and weighted by its contribution to the overall KVS. "
"The sensitivity badge shows how much the score depends on the specific AI model used."
)
dims = [
("Knowledge Novelty", "novelty", 0.30),
("Retrieval Utility", "retrieval", 0.20),
("Generation Utility", "generation", 0.25),
("Attribution", "attribution", 0.15),
("Demand Utility", "demand", 0.10),
]
for label, key, weight in dims:
sc = dim_scores[key]
contrib = kvs_result["weighted_contributions"][key]
filled = round(sc / 100 * 20)
bar_str = "β–ˆ" * filled + "β–‘" * (20 - filled)
pct = int(weight * 100)
sens = DIMENSION_META[key]["model_sensitivity"]
sc_color = SENSITIVITY_COLOR[sens]
col1, col2 = st.columns([4, 1])
with col1:
st.markdown(
f"**{label}** &nbsp;"
f"<span class='sens-badge' style='background:{sc_color}22;color:{sc_color};"
f"border:1px solid {sc_color}55;'>sensitivity: {sens}</span> \n"
f"`{bar_str}` &nbsp; **{sc}/100** "
f"<span style='color:#888;font-size:13px;'> Γ—{pct}% = {contrib} pts</span>",
unsafe_allow_html=True,
)
with col2:
st.metric(label="score", value=str(sc), label_visibility="collapsed")
# ── Detailed analysis expanders ───────────────────────────────────────────
st.markdown("### Detailed Analysis")
# Module A
dmeta = DIMENSION_META["novelty"]
with st.expander(f"Module A β€” Knowledge Novelty Β· {dim_scores['novelty']}/100"):
sens = dmeta["model_sensitivity"]
sc = SENSITIVITY_COLOR[sens]
st.markdown(
f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>"
f"Model sensitivity: {sens}</span>",
unsafe_allow_html=True,
)
st.markdown(f"**What this measures:** {dmeta['description']}")
st.markdown(f"**How it's measured:** {dmeta['how_measured']}")
st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}")
st.info(dmeta["sensitivity_note"])
st.divider()
st.markdown(f"**Result:** {module_results['novelty']['summary']}")
details = module_results["novelty"].get("details", [])
if details:
st.markdown("**Claim analysis** (🟒 novel Β· 🟑 partial Β· πŸ”΄ already known):")
for d in details:
known_pct = round(d["known_score"] * 100)
icon = "🟒" if d["known_score"] < 0.4 else ("🟑" if d["known_score"] < 0.7 else "πŸ”΄")
st.markdown(
f"{icon} **{d['claim'][:130]}** \n"
f"*Known to model: {known_pct}% β€” {d['reason']}*"
)
# Module B
dmeta = DIMENSION_META["retrieval"]
with st.expander(f"Module B β€” Retrieval Utility Β· {dim_scores['retrieval']}/100"):
sens = dmeta["model_sensitivity"]
sc = SENSITIVITY_COLOR[sens]
st.markdown(
f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>"
f"Model sensitivity: {sens}</span>",
unsafe_allow_html=True,
)
st.markdown(f"**What this measures:** {dmeta['description']}")
st.markdown(f"**How it's measured:** {dmeta['how_measured']}")
st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}")
st.info(dmeta["sensitivity_note"])
st.divider()
st.markdown(f"**Result:** {module_results['retrieval']['summary']}")
details = module_results["retrieval"].get("details", [])
if details:
st.caption(
"**Recall@3** β€” fraction of queries where the correct chunk appears in top 3 results (1.0 = perfect). \n"
"**MRR** β€” Mean Reciprocal Rank; how high the correct chunk ranks on average (1.0 = always first)."
)
st.table({
"Query": [d["query"] for d in details],
"Recall@3": [f"{d['recall_at_3']:.2f}" for d in details],
"MRR": [f"{d['reciprocal_rank']:.2f}" for d in details],
})
# Module C
dmeta = DIMENSION_META["generation"]
with st.expander(f"Module C β€” Generation Utility Β· {dim_scores['generation']}/100"):
sens = dmeta["model_sensitivity"]
sc = SENSITIVITY_COLOR[sens]
st.markdown(
f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>"
f"Model sensitivity: {sens}</span>",
unsafe_allow_html=True,
)
st.markdown(f"**What this measures:** {dmeta['description']}")
st.markdown(f"**How it's measured:** {dmeta['how_measured']}")
st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}")
st.info(dmeta["sensitivity_note"])
st.divider()
st.markdown(f"**Result:** {module_results['generation']['summary']}")
for d in module_results["generation"].get("details", []):
st.markdown(f"**Q: {d['question']}**")
c1, c2 = st.columns(2)
with c1:
st.markdown("*Baseline β€” no document:*")
st.markdown(f"> {d['baseline_answer'][:350]}")
with c2:
st.markdown("*RAG β€” with document:*")
st.markdown(f"> {d['rag_answer'][:350]}")
st.caption(
f"Improvement: **{d['improvement']}/100** &nbsp;|&nbsp; "
f"Accuracy: {d['accuracy']}/5 &nbsp;|&nbsp; "
f"Completeness: {d['completeness']}/5 &nbsp;|&nbsp; "
f"Specificity: {d['specificity']}/5 \n{d['reason']}"
)
st.divider()
# Module D
dmeta = DIMENSION_META["attribution"]
with st.expander(f"Module D β€” Attribution & Grounding Β· {dim_scores['attribution']}/100"):
sens = dmeta["model_sensitivity"]
sc = SENSITIVITY_COLOR[sens]
st.markdown(
f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>"
f"Model sensitivity: {sens}</span>",
unsafe_allow_html=True,
)
st.markdown(f"**What this measures:** {dmeta['description']}")
st.markdown(f"**How it's measured:** {dmeta['how_measured']}")
st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}")
st.info(dmeta["sensitivity_note"])
st.divider()
st.markdown(f"**Result:** {module_results['attribution']['summary']}")
for d in module_results["attribution"].get("details", []):
halluc = "⚠️ Hallucination detected" if d.get("hallucination_detected") else "βœ“ No hallucination"
st.markdown(
f"**Q: {d['question'][:110]}** \n"
f"Grounding: **{round(d['grounding_fraction']*100)}%** &nbsp;|&nbsp; "
f"Semantic similarity: {d['semantic_similarity']} &nbsp;|&nbsp; {halluc}"
)
if d.get("ungrounded_claims"):
st.caption("Ungrounded claims: " + "; ".join(d["ungrounded_claims"][:3]))
if d.get("reason"):
st.caption(d["reason"])
# Module E
dmeta = DIMENSION_META["demand"]
with st.expander(f"Module E β€” Demand Utility Β· {dim_scores['demand']}/100"):
sens = dmeta["model_sensitivity"]
sc = SENSITIVITY_COLOR[sens]
st.markdown(
f"<span class='sens-badge' style='background:{sc}22;color:{sc};border:1px solid {sc}55;'>"
f"Model sensitivity: {sens}</span>",
unsafe_allow_html=True,
)
st.markdown(f"**What this measures:** {dmeta['description']}")
st.markdown(f"**How it's measured:** {dmeta['how_measured']}")
st.markdown(f"**Models used:** {', '.join(dmeta['models_used'])}")
st.info(dmeta["sensitivity_note"])
st.divider()
st.markdown(f"**Result:** {module_results['demand']['summary']}")
topics = module_results["demand"].get("topics", [])
if topics:
st.caption(
"**Query Freq** β€” estimated user query frequency for this topic (1 = rare, 10 = very common). \n"
"**Priority Domain** β€” whether this is a high-impact sector (health, climate, food, policy, etc.). \n"
"**Unmet Need** β€” whether existing AI models fall short in covering this topic."
)
st.table({
"Topic": [t.get("topic", "") for t in topics],
"Query Freq (1-10)": [t.get("query_frequency", "-") for t in topics],
"Priority Domain": ["Yes" if t.get("priority_domain") else "No" for t in topics],
"Unmet Need": ["Yes" if t.get("unmet_need") else "No" for t in topics],
"Rationale": [t.get("rationale", "")[:80] for t in topics],
})
# ── Recommendations ───────────────────────────────────────────────────────
st.markdown("### Recommended Actions")
for rec in kvs_result["recommendations"]:
st.markdown(f"- {rec}")
# ── Download ──────────────────────────────────────────────────────────────
st.divider()
report_md = report.generate(doc.title, kvs_result, module_results, meta)
st.download_button(
label="⬇ Download Full Report (Markdown)",
data=report_md,
file_name=f"kvl_report_{doc.title[:40].replace(' ', '_')}.md",
mime="text/markdown",
use_container_width=True,
)
elif not uploaded:
st.info("Upload a `.md` file above to begin evaluation.")