Spaces:
Sleeping
feat(hf-space): user-friendly copy + API key field + 4 sample initiatives
Browse filesUX polish on AshwinP/compounding-test based on first-look feedback.
The dropdown labels and intro were full of operator vocabulary
(ZeroGPU, HF_TOKEN, SPACE_ID) that a casual visitor wouldn't know.
What changed:
- Provider dropdown labels rewritten as plain free-vs-premium:
Free Β· Phi-4-mini-instruct (Microsoft) β runs on GPU
Free Β· Gemma 2 9B (Google) β runs via HuggingFace
Premium Β· Claude Opus 4.7 (Anthropic) β paste your API key below
Free options listed first; "Premium" framing for paid.
- Default selection is now the first free option (never anthropic
on a public Space β that would error without a key).
- Optional Anthropic API key Textbox (type=password) appears only
when Premium is selected (provider.change toggles visibility).
Per-call use; never persisted. Friendly error if Premium is
picked without a key.
- Intro markdown rewritten: short pitch, four-outcome verdict
names highlighted, one-line guidance on free-vs-premium choice,
link to the framework essay. Dropped the "Default model
provider: zerogpu" auto-detect note (jargon for casual users).
- gr.Examples component with 4 realistic ~300-400-word sample
initiatives β one per verdict quadrant:
compounder β regional commercial insurance carrier
one-shot win β community bank loan document extraction
wrong thing β 3PL warehouse picking-route AI
Roman Candle β QSR chain email/SMS personalization
Each sample pre-fills the description + industry + scale +
budget so the user can click and immediately hit Diagnose.
Tests: 31/31 still pass (parser + provider routing unchanged).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
|
@@ -391,6 +391,96 @@ SCALES = ["pilot", "department", "business unit", "enterprise"]
|
|
| 391 |
BUDGETS = ["<$100K", "$100Kβ$1M", "$1Mβ$10M", ">$10M"]
|
| 392 |
|
| 393 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
def _load_reference():
|
| 395 |
"""Read the prompt template + reference JSONs from disk at app start."""
|
| 396 |
prompt_template = (ROOT / "prompts" / "diagnose.txt").read_text()
|
|
@@ -444,14 +534,21 @@ def diagnose(
|
|
| 444 |
scale: Optional[str],
|
| 445 |
budget: Optional[str],
|
| 446 |
provider: Optional[str] = None,
|
|
|
|
| 447 |
) -> tuple[str, str]:
|
| 448 |
"""Validate input, call the selected model with the cached system
|
| 449 |
block, parse the response, and return (markdown_writeup,
|
| 450 |
raw_json_string) for the two Gradio tabs.
|
| 451 |
|
| 452 |
-
`provider` (anthropic | huggingface) defaults to
|
| 453 |
-
when not supplied β the Gradio dropdown always
|
| 454 |
-
real submission.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 455 |
|
| 456 |
Per F14 + contract Β§2, all error paths surface a user-friendly message
|
| 457 |
in the markdown tab and an empty JSON tab; nothing leaks a stack trace.
|
|
@@ -483,6 +580,24 @@ def diagnose(
|
|
| 483 |
"",
|
| 484 |
)
|
| 485 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
user_prompt = (
|
| 487 |
PROMPT_TEMPLATE
|
| 488 |
.replace("{{user_input}}", description)
|
|
@@ -543,57 +658,94 @@ def build_demo():
|
|
| 543 |
"""Build and return the Gradio Blocks UI. Called only by __main__."""
|
| 544 |
import gradio as gr
|
| 545 |
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
]
|
| 550 |
if _zerogpu_available():
|
| 551 |
provider_choices.append((
|
| 552 |
-
f"
|
| 553 |
"zerogpu",
|
| 554 |
))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 555 |
|
| 556 |
with gr.Blocks(title="The Compounding Test") as demo:
|
| 557 |
gr.Markdown(
|
| 558 |
"# The Compounding Test\n\n"
|
| 559 |
-
"A diagnostic for AI investments at non-technology companies.
|
| 560 |
-
"
|
| 561 |
-
"
|
| 562 |
-
"the wrong thing, or Roman Candle.
|
| 563 |
-
"
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
|
|
|
| 567 |
)
|
| 568 |
with gr.Row():
|
| 569 |
description = gr.Textbox(
|
| 570 |
label=f"Describe your AI initiative ({MIN_DESCRIPTION_WORDS}β{MAX_DESCRIPTION_WORDS} words)",
|
| 571 |
placeholder=(
|
| 572 |
-
"Describe the bottleneck of your operation, the AI
|
| 573 |
-
"what data feeds it, where the labels come
|
| 574 |
-
"expect competitors to respond. Be
|
|
|
|
|
|
|
| 575 |
),
|
| 576 |
lines=12,
|
| 577 |
)
|
|
|
|
| 578 |
with gr.Row():
|
| 579 |
industry = gr.Dropdown(INDUSTRIES, label="Industry (optional)", value=None)
|
| 580 |
scale = gr.Dropdown(SCALES, label="Scale (optional)", value=None)
|
| 581 |
budget = gr.Dropdown(BUDGETS, label="Budget tier (optional)", value=None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
with gr.Row():
|
| 583 |
provider = gr.Dropdown(
|
| 584 |
choices=provider_choices,
|
| 585 |
-
value=
|
| 586 |
-
label="
|
| 587 |
-
info=(
|
| 588 |
-
"Claude gives the highest-quality writeups but needs your "
|
| 589 |
-
"own ANTHROPIC_API_KEY. ZeroGPU runs an open-weight model "
|
| 590 |
-
"(Phi-4-mini-instruct by default) on the Space's free Pro "
|
| 591 |
-
"GPU β no API costs, no inference credits. HuggingFace API "
|
| 592 |
-
"routes to an open model through the HF Inference Providers "
|
| 593 |
-
"API β works without any keys on a Space via the Space's "
|
| 594 |
-
"monthly credits. Switch to compare writeup quality."
|
| 595 |
-
),
|
| 596 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 597 |
submit = gr.Button("Diagnose", variant="primary")
|
| 598 |
with gr.Tabs():
|
| 599 |
with gr.Tab("Diagnosis"):
|
|
@@ -602,7 +754,7 @@ def build_demo():
|
|
| 602 |
json_out = gr.Code(language="json")
|
| 603 |
submit.click(
|
| 604 |
diagnose,
|
| 605 |
-
inputs=[description, industry, scale, budget, provider],
|
| 606 |
outputs=[writeup_out, json_out],
|
| 607 |
)
|
| 608 |
|
|
|
|
| 391 |
BUDGETS = ["<$100K", "$100Kβ$1M", "$1Mβ$10M", ">$10M"]
|
| 392 |
|
| 393 |
|
| 394 |
+
# ---------------------------------------------------------------------------
|
| 395 |
+
# Sample initiatives (gr.Examples) β one per verdict quadrant
|
| 396 |
+
# ---------------------------------------------------------------------------
|
| 397 |
+
# Realistic ~250β400-word AI-initiative descriptions that should land in a
|
| 398 |
+
# specific quadrant of the 2Γ2 verdict matrix. Used to seed user testing
|
| 399 |
+
# and give first-time visitors something concrete to click.
|
| 400 |
+
|
| 401 |
+
_SAMPLE_COMPOUNDER = (
|
| 402 |
+
"We're a regional commercial insurance carrier specializing in restaurant "
|
| 403 |
+
"general liability. We write about 8,000 policies a year across the "
|
| 404 |
+
"Midwest. Underwriting is the bottleneck of our business β agents wait "
|
| 405 |
+
"36 to 48 hours for a quote because our underwriters manually pull "
|
| 406 |
+
"industry codes, loss runs, and prior-carrier history from three "
|
| 407 |
+
"different systems and decide whether to bind, decline, or refer. "
|
| 408 |
+
"We're deploying an LLM-powered underwriting assistant that pulls the "
|
| 409 |
+
"data automatically, flags risk factors based on patterns in our "
|
| 410 |
+
"12-year claims database, and proposes a base rate. The underwriter "
|
| 411 |
+
"reviews and approves. Every policy we write generates new claim "
|
| 412 |
+
"outcomes β fires, slip-and-falls, liquor-liability claims β and those "
|
| 413 |
+
"outcomes feed back into the next quarter's model retraining. Our "
|
| 414 |
+
"competitors mostly use Verisk's industry-standard models, which we "
|
| 415 |
+
"don't share data with, so our model gets better on our book of "
|
| 416 |
+
"business while theirs reflects the industry average. Internal goal: "
|
| 417 |
+
"cut time-to-quote from 36 hours to 4 hours, increase win rate on "
|
| 418 |
+
"profitable risks by 15%, and progressively shift the loss ratio by "
|
| 419 |
+
"1β2 points per year as the model learns from each renewal cycle."
|
| 420 |
+
)
|
| 421 |
+
|
| 422 |
+
_SAMPLE_ONE_SHOT_WIN = (
|
| 423 |
+
"We're a community bank with $4B in assets, 38 branches across two "
|
| 424 |
+
"states. Loan officers spend about 6 hours per commercial loan "
|
| 425 |
+
"reviewing financial statements, tax returns, and corporate documents "
|
| 426 |
+
"before they can write the credit memo. We're deploying GPT-4 to "
|
| 427 |
+
"extract key fields β revenue, EBITDA, debt service coverage ratio, "
|
| 428 |
+
"ownership structure, related-party transactions β from these "
|
| 429 |
+
"documents into a structured form. The loan officer reviews the "
|
| 430 |
+
"extraction and writes the credit memo by hand. We expect to cut "
|
| 431 |
+
"document review time from 6 hours to about 90 minutes per loan. "
|
| 432 |
+
"The vendor provides the model and the document templates and is "
|
| 433 |
+
"selling the same system to four of our peer community banks in the "
|
| 434 |
+
"region. The AI doesn't learn from the outcome of the loan: defaults, "
|
| 435 |
+
"prepayments, modifications, restructurings all go into our loan "
|
| 436 |
+
"servicing system, which doesn't connect back to the extraction "
|
| 437 |
+
"model. The vendor's roadmap doesn't include any feedback loop. "
|
| 438 |
+
"We're funding the project from the operations budget; the credit "
|
| 439 |
+
"team is excited about the time savings."
|
| 440 |
+
)
|
| 441 |
+
|
| 442 |
+
_SAMPLE_WRONG_THING = (
|
| 443 |
+
"We're a third-party logistics provider with 8 warehouses on the "
|
| 444 |
+
"East Coast. We're investing in computer vision software to optimize "
|
| 445 |
+
"order picking routes β the AI looks at the warehouse layout, "
|
| 446 |
+
"current orders, and worker positions and suggests optimized pick "
|
| 447 |
+
"paths in real time. Pilot results show a 12% reduction in steps "
|
| 448 |
+
"per order on the test floor. Our operations team has been excited "
|
| 449 |
+
"about this for 18 months and we just signed a multi-year contract "
|
| 450 |
+
"with the vendor. Some context on the operation: our warehouses run "
|
| 451 |
+
"2 shifts. Order volume in shift 1 is around 14,000 orders/day; "
|
| 452 |
+
"shift 2 is around 6,000. The pick wave finishes by 2pm on shift 1, "
|
| 453 |
+
"then workers wait 4 to 5 hours for shift 2 trucks to arrive at the "
|
| 454 |
+
"loading docks. The trucks are scheduled by the customer (a major "
|
| 455 |
+
"national retailer) and arrive in unpredictable windows. We don't "
|
| 456 |
+
"control the truck schedule. The CFO has been asking us why total "
|
| 457 |
+
"throughput per warehouse hasn't moved much in three years; our "
|
| 458 |
+
"answer has been that the legacy WMS is the constraint, which is "
|
| 459 |
+
"why we're investing in better picking AI."
|
| 460 |
+
)
|
| 461 |
+
|
| 462 |
+
_SAMPLE_ROMAN_CANDLE = (
|
| 463 |
+
"We run a chain of 220 quick-service restaurants across the "
|
| 464 |
+
"Southeast. Our gross margin has been under pressure from rising "
|
| 465 |
+
"ingredient costs and we're rolling out an AI-powered personalized "
|
| 466 |
+
"marketing platform that sends customized email and SMS offers "
|
| 467 |
+
"based on customer purchase history, location, and weather. The "
|
| 468 |
+
"platform is from a major QSR-tech vendor used by several of our "
|
| 469 |
+
"competitors. Our customer data β names, emails, phone numbers, "
|
| 470 |
+
"purchase frequency β lives in our point-of-sale provider's cloud, "
|
| 471 |
+
"which the marketing platform pulls from via the POS provider's "
|
| 472 |
+
"standard integration. Our purchase data and the modeling are both "
|
| 473 |
+
"the vendor's stack; we don't see the underlying model and our data "
|
| 474 |
+
"is commingled with other QSR brands the vendor serves. We expect "
|
| 475 |
+
"to lift email click-through by 8β12% based on the vendor's "
|
| 476 |
+
"benchmark studies of similar brands. The marketing team is "
|
| 477 |
+
"running the rollout; finance signed off on the multi-year "
|
| 478 |
+
"subscription. We have not measured what's actually constraining "
|
| 479 |
+
"same-store sales growth β we just know revenue has been flat for "
|
| 480 |
+
"two years and the board wants action."
|
| 481 |
+
)
|
| 482 |
+
|
| 483 |
+
|
| 484 |
def _load_reference():
|
| 485 |
"""Read the prompt template + reference JSONs from disk at app start."""
|
| 486 |
prompt_template = (ROOT / "prompts" / "diagnose.txt").read_text()
|
|
|
|
| 534 |
scale: Optional[str],
|
| 535 |
budget: Optional[str],
|
| 536 |
provider: Optional[str] = None,
|
| 537 |
+
anthropic_api_key: Optional[str] = None,
|
| 538 |
) -> tuple[str, str]:
|
| 539 |
"""Validate input, call the selected model with the cached system
|
| 540 |
block, parse the response, and return (markdown_writeup,
|
| 541 |
raw_json_string) for the two Gradio tabs.
|
| 542 |
|
| 543 |
+
`provider` (anthropic | huggingface | zerogpu) defaults to
|
| 544 |
+
DEFAULT_PROVIDER when not supplied β the Gradio dropdown always
|
| 545 |
+
supplies it on a real submission.
|
| 546 |
+
|
| 547 |
+
`anthropic_api_key` is a per-call user-supplied key. When provider
|
| 548 |
+
is "anthropic" and the key is provided, it overrides any
|
| 549 |
+
ANTHROPIC_API_KEY env var for this single request. The key is never
|
| 550 |
+
persisted (Anthropic SDK uses it once and the client object is
|
| 551 |
+
garbage-collected at function exit).
|
| 552 |
|
| 553 |
Per F14 + contract Β§2, all error paths surface a user-friendly message
|
| 554 |
in the markdown tab and an empty JSON tab; nothing leaks a stack trace.
|
|
|
|
| 580 |
"",
|
| 581 |
)
|
| 582 |
|
| 583 |
+
# If Premium (Anthropic) is selected, the user must supply a key β
|
| 584 |
+
# either via the page's API-key field (per-call) or via an
|
| 585 |
+
# ANTHROPIC_API_KEY env var on the Space. Without either, fail fast
|
| 586 |
+
# with a friendly explanation before we hit the SDK.
|
| 587 |
+
if provider == "anthropic":
|
| 588 |
+
env_key = os.environ.get("ANTHROPIC_API_KEY", "").strip()
|
| 589 |
+
user_key = (anthropic_api_key or "").strip()
|
| 590 |
+
if not env_key and not user_key:
|
| 591 |
+
return (
|
| 592 |
+
"β Premium (Claude Opus) needs an Anthropic API key. Either "
|
| 593 |
+
"paste your key in the field above, or pick one of the free "
|
| 594 |
+
"options from the model dropdown.",
|
| 595 |
+
"",
|
| 596 |
+
)
|
| 597 |
+
if user_key:
|
| 598 |
+
# Per-call override; never persisted beyond this request.
|
| 599 |
+
os.environ["ANTHROPIC_API_KEY"] = user_key
|
| 600 |
+
|
| 601 |
user_prompt = (
|
| 602 |
PROMPT_TEMPLATE
|
| 603 |
.replace("{{user_input}}", description)
|
|
|
|
| 658 |
"""Build and return the Gradio Blocks UI. Called only by __main__."""
|
| 659 |
import gradio as gr
|
| 660 |
|
| 661 |
+
# Free options first, premium last. Plain-English labels with no
|
| 662 |
+
# ANTHROPIC_API_KEY / HF_TOKEN / SPACE_ID jargon β the casual user
|
| 663 |
+
# shouldn't have to know what any of those mean.
|
| 664 |
+
provider_choices = []
|
| 665 |
if _zerogpu_available():
|
| 666 |
provider_choices.append((
|
| 667 |
+
f"Free Β· Phi-4-mini-instruct (Microsoft) β runs on GPU",
|
| 668 |
"zerogpu",
|
| 669 |
))
|
| 670 |
+
provider_choices.append((
|
| 671 |
+
f"Free Β· Gemma 2 9B (Google) β runs via HuggingFace",
|
| 672 |
+
"huggingface",
|
| 673 |
+
))
|
| 674 |
+
provider_choices.append((
|
| 675 |
+
f"Premium Β· Claude Opus 4.7 (Anthropic) β paste your API key below",
|
| 676 |
+
"anthropic",
|
| 677 |
+
))
|
| 678 |
+
# Default to the first free option; user can pick Premium if they
|
| 679 |
+
# have a key. Never default to anthropic on a public Space.
|
| 680 |
+
default_choice = provider_choices[0][1]
|
| 681 |
|
| 682 |
with gr.Blocks(title="The Compounding Test") as demo:
|
| 683 |
gr.Markdown(
|
| 684 |
"# The Compounding Test\n\n"
|
| 685 |
+
"A diagnostic for AI investments at non-technology companies. "
|
| 686 |
+
"Describe your AI initiative β get a scored writeup in one of "
|
| 687 |
+
"four outcomes: **compounder**, **one-shot win**, **compounding "
|
| 688 |
+
"the wrong thing**, or **Roman Candle**.\n\n"
|
| 689 |
+
"**The default model is free.** Pick **Premium Β· Claude Opus** "
|
| 690 |
+
"from the dropdown if you have an Anthropic API key and want "
|
| 691 |
+
"the highest-quality writeup. Read the full framework at "
|
| 692 |
+
"[mile-hi.ai/journal/the-berkshire-test]("
|
| 693 |
+
"https://www.mile-hi.ai/journal/the-berkshire-test)."
|
| 694 |
)
|
| 695 |
with gr.Row():
|
| 696 |
description = gr.Textbox(
|
| 697 |
label=f"Describe your AI initiative ({MIN_DESCRIPTION_WORDS}β{MAX_DESCRIPTION_WORDS} words)",
|
| 698 |
placeholder=(
|
| 699 |
+
"Describe the bottleneck of your operation, the AI "
|
| 700 |
+
"investment, what data feeds it, where the labels come "
|
| 701 |
+
"from, and how you expect competitors to respond. Be "
|
| 702 |
+
"specific about the workflow.\n\n"
|
| 703 |
+
"Or pick a sample below to see how it works."
|
| 704 |
),
|
| 705 |
lines=12,
|
| 706 |
)
|
| 707 |
+
|
| 708 |
with gr.Row():
|
| 709 |
industry = gr.Dropdown(INDUSTRIES, label="Industry (optional)", value=None)
|
| 710 |
scale = gr.Dropdown(SCALES, label="Scale (optional)", value=None)
|
| 711 |
budget = gr.Dropdown(BUDGETS, label="Budget tier (optional)", value=None)
|
| 712 |
+
|
| 713 |
+
gr.Examples(
|
| 714 |
+
examples=[
|
| 715 |
+
[_SAMPLE_COMPOUNDER, "insurance", "business unit", "$1Mβ$10M"],
|
| 716 |
+
[_SAMPLE_ONE_SHOT_WIN, "banking", "business unit", "$100Kβ$1M"],
|
| 717 |
+
[_SAMPLE_WRONG_THING, "logistics", "enterprise", "$1Mβ$10M"],
|
| 718 |
+
[_SAMPLE_ROMAN_CANDLE, "retail", "enterprise", "$100Kβ$1M"],
|
| 719 |
+
],
|
| 720 |
+
inputs=[description, industry, scale, budget],
|
| 721 |
+
label="Sample initiatives β click one to load it (then click Diagnose)",
|
| 722 |
+
examples_per_page=4,
|
| 723 |
+
)
|
| 724 |
+
|
| 725 |
with gr.Row():
|
| 726 |
provider = gr.Dropdown(
|
| 727 |
choices=provider_choices,
|
| 728 |
+
value=default_choice,
|
| 729 |
+
label="Choose a model",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 730 |
)
|
| 731 |
+
# The API-key field appears only when Premium is selected. The
|
| 732 |
+
# key is used per-request and never stored.
|
| 733 |
+
api_key = gr.Textbox(
|
| 734 |
+
label="Anthropic API key",
|
| 735 |
+
placeholder="sk-ant-...",
|
| 736 |
+
type="password",
|
| 737 |
+
info=(
|
| 738 |
+
"Used only for this request and never stored. "
|
| 739 |
+
"Get a key at console.anthropic.com."
|
| 740 |
+
),
|
| 741 |
+
visible=False,
|
| 742 |
+
)
|
| 743 |
+
|
| 744 |
+
def _toggle_api_key(p):
|
| 745 |
+
return gr.update(visible=(p == "anthropic"))
|
| 746 |
+
|
| 747 |
+
provider.change(_toggle_api_key, inputs=[provider], outputs=[api_key])
|
| 748 |
+
|
| 749 |
submit = gr.Button("Diagnose", variant="primary")
|
| 750 |
with gr.Tabs():
|
| 751 |
with gr.Tab("Diagnosis"):
|
|
|
|
| 754 |
json_out = gr.Code(language="json")
|
| 755 |
submit.click(
|
| 756 |
diagnose,
|
| 757 |
+
inputs=[description, industry, scale, budget, provider, api_key],
|
| 758 |
outputs=[writeup_out, json_out],
|
| 759 |
)
|
| 760 |
|