promptstat / ui /data.py
xxixx1028's picture
Deploy PromptStat — UI shell + MiniCPM4.1-8B + 4-LoRA hybrid (Modal)
dc9f530 verified
Raw
History Blame Contribute Delete
5.58 kB
"""The data contract for the card UI — and the STUB provider that fills it with fake data.
THIS IS THE BACKEND ADOPTION SEAM. The UI renders whatever `CardData` it is handed; it does
not care where the numbers came from. To go live, the backend replaces `get_stub_card()` with
a real builder that returns a `CardData`, and feeds `ProcessingFacts` from real progress.
Nothing in `screens/` or `components/` needs to change.
"""
from __future__ import annotations
from dataclasses import dataclass, field
# Canonical axis order. Display name -> used everywhere (radar labels, bars, accordions).
AXES = ["Focus", "Technique", "Critical", "Interaction", "Input Quality"]
# Tier ramp: (min_inclusive, letter, hex color). Checked high-to-low.
TIERS = [
(8.5, "S", "#A78BFA"),
(7.0, "A", "#E8B84B"),
(5.5, "B", "#C7CDD6"),
(4.0, "C", "#B08D57"),
(0.0, "D", "#6B7280"),
]
@dataclass
class Axis:
"""One scored axis. `confidence` is one of 'high' | 'medium' | 'low'."""
name: str
score: float # 0-10
confidence: str # high | medium | low
evidence: list[str] # quoted user turns shown in the drilldown accordion
tip: str # one-line improvement hint
@dataclass
class CriticalBreakdown:
"""Counts behind the Critical axis, shown on the card back."""
skepticism: int
source_req: int
rebuttal: int
verify: int
re_ask: int
def as_pairs(self) -> list[tuple[str, int]]:
return [
("skepticism", self.skepticism),
("source-req", self.source_req),
("rebuttal", self.rebuttal),
("verify", self.verify),
("re-ask", self.re_ask),
]
@dataclass
class CardData:
"""Everything the result screen needs to render one card."""
name: str
axes: list[Axis]
critical: CriticalBreakdown
improvement: str
# scoring provenance for the honesty tag: "placeholder" (DummyScorer) | "real-base" (MiniCPM-8B)
# | "real-lora" (MiniCPM-8B + the locked LoRA hybrid). Set by app.py from the active scorer.
provenance: str = "placeholder"
# True when scored from a single pasted conversation (quick demo) vs a full export. The card-design
# session keys "Sample analysis" framing off this; the result screen shows a scope disclaimer.
single_conversation: bool = False
@property
def overall(self) -> float:
return round(sum(a.score for a in self.axes) / len(self.axes), 1)
@property
def tier(self) -> tuple[str, str]:
"""Returns (letter, hex_color) for the current overall."""
ovr = self.overall
for minimum, letter, color in TIERS:
if ovr >= minimum:
return letter, color
return TIERS[-1][1], TIERS[-1][2]
def radar_scores(self) -> list[float]:
return [a.score for a in self.axes]
@dataclass
class ProcessingFacts:
"""Fake facts revealed one at a time on the processing screen.
Backend swap: feed real values (or stream real progress events) instead of these stubs.
"""
export_label: str = "ChatGPT export detected"
turns_total: int = 1204
date_range: str = "Mar 2024 – Jun 2026"
english_scored: int = 1050
other_unscored: int = 154
highlight: str = "Most active on Tuesday nights"
# --------------------------------------------------------------------------------------
# STUB DATA — fake, fixed. Replace these two providers to integrate the real backend.
# --------------------------------------------------------------------------------------
def get_stub_card(name: str = "") -> CardData:
"""Fixed fake card. Scores: Focus 8 / Technique 6 / Critical 9 / Interaction 5 /
Input Quality 7 -> mean 7.0 -> tier A."""
return CardData(
name=(name or "").strip() or "Anonymous",
axes=[
Axis(
"Focus", 8, "low",
['"let\'s keep this thread strictly about the migration script"',
'"ignore the earlier tangent — back to the failing test"'],
"Open a fresh chat per task to keep each thread on one goal.",
),
Axis(
"Technique", 6, "high",
['"act as a senior reviewer and critique this diff"',
'"give me 3 options, then we pick"'],
"Try few-shot examples for formatting-heavy asks.",
),
Axis(
"Critical", 9, "high",
['"that benchmark looks cherry-picked — what\'s the baseline?"',
'"cite the source before I trust that number"'],
"Keep pushing back; maybe verify claims against a second source.",
),
Axis(
"Interaction", 5, "medium",
['"can you walk me through why, step by step?"'],
"Ask follow-ups that build on the model\'s reasoning, not just redo it.",
),
Axis(
"Input Quality", 7, "medium",
['"context: Python 3.12, FastAPI, here\'s the failing trace …"',
'"constraints: no new deps, must stay under 400 lines"'],
"Lead with role + constraints + a concrete example for richer prompts.",
),
],
critical=CriticalBreakdown(skepticism=14, source_req=6, rebuttal=9, verify=4, re_ask=3),
improvement="Biggest lever: tighten focus — split multi-topic chats into separate threads.",
)
def get_stub_facts() -> ProcessingFacts:
return ProcessingFacts()