Spaces:

build-small-hackathon
/

promptstat

Runtime error

App Files Files Community

promptstat / ui /data.py

xxixx1028

Deploy PromptStat — UI shell + MiniCPM4.1-8B + 4-LoRA hybrid (Modal)

dc9f530 verified 14 days ago

Raw

History Blame Contribute Delete

5.58 kB

	"""The data contract for the card UI — and the STUB provider that fills it with fake data.

	THIS IS THE BACKEND ADOPTION SEAM. The UI renders whatever `CardData` it is handed; it does
	not care where the numbers came from. To go live, the backend replaces `get_stub_card()` with
	a real builder that returns a `CardData`, and feeds `ProcessingFacts` from real progress.
	Nothing in `screens/` or `components/` needs to change.
	"""
	from __future__ import annotations

	from dataclasses import dataclass, field

	# Canonical axis order. Display name -> used everywhere (radar labels, bars, accordions).
	AXES = ["Focus", "Technique", "Critical", "Interaction", "Input Quality"]

	# Tier ramp: (min_inclusive, letter, hex color). Checked high-to-low.
	TIERS = [
	(8.5, "S", "#A78BFA"),
	(7.0, "A", "#E8B84B"),
	(5.5, "B", "#C7CDD6"),
	(4.0, "C", "#B08D57"),
	(0.0, "D", "#6B7280"),
	]


	@dataclass
	class Axis:
	"""One scored axis. `confidence` is one of 'high' \| 'medium' \| 'low'."""
	name: str
	score: float # 0-10
	confidence: str # high \| medium \| low
	evidence: list[str] # quoted user turns shown in the drilldown accordion
	tip: str # one-line improvement hint


	@dataclass
	class CriticalBreakdown:
	"""Counts behind the Critical axis, shown on the card back."""
	skepticism: int
	source_req: int
	rebuttal: int
	verify: int
	re_ask: int

	def as_pairs(self) -> list[tuple[str, int]]:
	return [
	("skepticism", self.skepticism),
	("source-req", self.source_req),
	("rebuttal", self.rebuttal),
	("verify", self.verify),
	("re-ask", self.re_ask),
	]


	@dataclass
	class CardData:
	"""Everything the result screen needs to render one card."""
	name: str
	axes: list[Axis]
	critical: CriticalBreakdown
	improvement: str
	# scoring provenance for the honesty tag: "placeholder" (DummyScorer) \| "real-base" (MiniCPM-8B)
	# \| "real-lora" (MiniCPM-8B + the locked LoRA hybrid). Set by app.py from the active scorer.
	provenance: str = "placeholder"
	# True when scored from a single pasted conversation (quick demo) vs a full export. The card-design
	# session keys "Sample analysis" framing off this; the result screen shows a scope disclaimer.
	single_conversation: bool = False

	@property
	def overall(self) -> float:
	return round(sum(a.score for a in self.axes) / len(self.axes), 1)

	@property
	def tier(self) -> tuple[str, str]:
	"""Returns (letter, hex_color) for the current overall."""
	ovr = self.overall
	for minimum, letter, color in TIERS:
	if ovr >= minimum:
	return letter, color
	return TIERS[-1][1], TIERS[-1][2]

	def radar_scores(self) -> list[float]:
	return [a.score for a in self.axes]


	@dataclass
	class ProcessingFacts:
	"""Fake facts revealed one at a time on the processing screen.

	Backend swap: feed real values (or stream real progress events) instead of these stubs.
	"""
	export_label: str = "ChatGPT export detected"
	turns_total: int = 1204
	date_range: str = "Mar 2024 – Jun 2026"
	english_scored: int = 1050
	other_unscored: int = 154
	highlight: str = "Most active on Tuesday nights"


	# --------------------------------------------------------------------------------------
	# STUB DATA — fake, fixed. Replace these two providers to integrate the real backend.
	# --------------------------------------------------------------------------------------

	def get_stub_card(name: str = "") -> CardData:
	"""Fixed fake card. Scores: Focus 8 / Technique 6 / Critical 9 / Interaction 5 /
	Input Quality 7 -> mean 7.0 -> tier A."""
	return CardData(
	name=(name or "").strip() or "Anonymous",
	axes=[
	Axis(
	"Focus", 8, "low",
	['"let\'s keep this thread strictly about the migration script"',
	'"ignore the earlier tangent — back to the failing test"'],
	"Open a fresh chat per task to keep each thread on one goal.",
	),
	Axis(
	"Technique", 6, "high",
	['"act as a senior reviewer and critique this diff"',
	'"give me 3 options, then we pick"'],
	"Try few-shot examples for formatting-heavy asks.",
	),
	Axis(
	"Critical", 9, "high",
	['"that benchmark looks cherry-picked — what\'s the baseline?"',
	'"cite the source before I trust that number"'],
	"Keep pushing back; maybe verify claims against a second source.",
	),
	Axis(
	"Interaction", 5, "medium",
	['"can you walk me through why, step by step?"'],
	"Ask follow-ups that build on the model\'s reasoning, not just redo it.",
	),
	Axis(
	"Input Quality", 7, "medium",
	['"context: Python 3.12, FastAPI, here\'s the failing trace …"',
	'"constraints: no new deps, must stay under 400 lines"'],
	"Lead with role + constraints + a concrete example for richer prompts.",
	),
	],
	critical=CriticalBreakdown(skepticism=14, source_req=6, rebuttal=9, verify=4, re_ask=3),
	improvement="Biggest lever: tighten focus — split multi-topic chats into separate threads.",
	)


	def get_stub_facts() -> ProcessingFacts:
	return ProcessingFacts()