Spaces:
Running on Zero
Running on Zero
| from __future__ import annotations | |
| import sys | |
| from collections.abc import Callable | |
| from pathlib import Path | |
| ROOT = Path(__file__).resolve().parents[1] | |
| sys.path.insert(0, str(ROOT / "src")) | |
| from unstuck.model_adapter import ModelAdapter | |
| from unstuck.schema import StepValidationError | |
| SAMPLE_TASKS = [ | |
| "Clean my apartment before a friend visits tonight", | |
| "Start the first draft of a hackathon demo script", | |
| "Catch up on overdue email without losing the whole morning", | |
| "Prepare to call the dentist and book an appointment", | |
| "Make progress on a bug report that feels too vague to start", | |
| ] | |
| MODELS = [ | |
| "Qwen/Qwen3-4B-Instruct-2507", | |
| "openbmb/MiniCPM3-4B", | |
| "nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16", | |
| ] | |
| def make_generate(model_id: str) -> Callable[[str], str]: | |
| """Create a serverless HF chat generator for manual model bake-offs.""" | |
| from huggingface_hub import InferenceClient | |
| client = InferenceClient(model_id) | |
| def generate(prompt: str) -> str: | |
| response = client.chat_completion( | |
| messages=[{"role": "user", "content": prompt}], | |
| max_tokens=512, | |
| temperature=0, | |
| ) | |
| return str(response.choices[0].message.content) | |
| return generate | |
| def score(model_id: str) -> float: | |
| """Return the fraction of sample tasks that produce validated step JSON.""" | |
| adapter = ModelAdapter(make_generate(model_id), max_repairs=1) | |
| successes = 0 | |
| for task in SAMPLE_TASKS: | |
| try: | |
| adapter.breakdown(task) | |
| except StepValidationError: | |
| continue | |
| successes += 1 | |
| return successes / len(SAMPLE_TASKS) | |
| if __name__ == "__main__": | |
| for model_id in MODELS: | |
| print(f"{model_id}: {score(model_id):.0%}") | |