Spaces:
Running on Zero
Running on Zero
File size: 4,239 Bytes
0e8e243 bca1ea2 0e8e243 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | """
The spec-parser brick: messy human text -> the advisor's form fields.
Serves cn0303/fitcheck-spec-parser (Qwen3-1.7B + LoRA, trained in this repo —
see scripts/train_spec_lora.py and the model card for the honest eval). Same
serving pattern as the narrator: lazy load inside @spaces.GPU, loud errors,
no fake fallbacks. Missing info comes back null — the model is specifically
gated against inventing specs.
"""
import json
import re
import sys
from model_brick import _should_load
ADAPTER_ID = "cn0303/fitcheck-spec-parser"
BASE_ID = "unsloth/Qwen3-1.7B"
# MUST stay in sync with scripts/build_spec_dataset.py (the training prompt).
SYSTEM_PROMPT = """\
You turn a person's description of their computer into JSON for a hardware checker.
Output ONLY a JSON object with exactly these fields:
{"computer": "Windows laptop"|"Windows desktop"|"Mac"|"Linux PC"|"Mini PC / Raspberry Pi"|null,
"ram_gb": number|null, "provider": "nvidia"|"amd"|"apple"|"intel"|"none"|null,
"gpu": string|null, "vram_gb": number|null}
Rules:
- Extract ONLY what the text states or directly implies. Anything not stated is null. Never guess or invent a spec.
- "provider": "none" ONLY when the text says there is no separate graphics card (e.g. "no GPU", "integrated only"). Graphics simply not mentioned or unknown -> null.
- "gpu" must be a specific model (e.g. "RTX 3060"). A brand or series alone ("geforce", "gtx", "radeon") is NOT a gpu -> set provider, leave gpu null.
- If the text describes two or more different machines or a choice between them, every field is null."""
FIELDS = ("computer", "ram_gb", "provider", "gpu", "vram_gb")
_GENERATE = None
_state = {"tok": None, "model": None}
if _should_load():
try:
import spaces
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
def _load():
from peft import PeftModel
tok = AutoTokenizer.from_pretrained(ADAPTER_ID)
model = AutoModelForCausalLM.from_pretrained(BASE_ID, dtype=torch.bfloat16)
model = PeftModel.from_pretrained(model, ADAPTER_ID)
_state["tok"] = tok
_state["model"] = model.to("cuda").eval()
@spaces.GPU(duration=120) # cold path = 3.4GB download + load + generate
def _generate(text: str) -> str:
if _state["model"] is None:
_load()
tok, model = _state["tok"], _state["model"]
msgs = [{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": text}]
kw = dict(add_generation_prompt=True, return_tensors="pt", return_dict=True)
try:
inputs = tok.apply_chat_template(msgs, enable_thinking=False, **kw)
except TypeError:
inputs = tok.apply_chat_template(msgs, **kw)
inputs = inputs.to("cuda")
n = inputs["input_ids"].shape[1]
with torch.no_grad():
out = model.generate(**inputs, max_new_tokens=96, do_sample=False,
pad_token_id=tok.eos_token_id)
return tok.decode(out[0][n:], skip_special_tokens=True).strip()
_GENERATE = _generate
except Exception as e: # noqa: BLE001
print(f"[FitCheck] spec parser unavailable: {e!r}", file=sys.stderr, flush=True)
def parse_specs(text: str) -> dict:
"""Returns the parsed fields, or {error} — never invented content."""
text = (text or "").strip()
if not text:
return {"error": "Nothing to parse — paste or type a description first."}
if len(text) > 4000:
text = text[:4000]
if _GENERATE is None:
return {"error": "The spec parser model isn't loaded in this environment."}
try:
raw = _GENERATE(text)
except Exception as e: # noqa: BLE001
return {"error": f"Spec parser failed: {e}"}
m = re.search(r"\{.*\}", raw, re.DOTALL)
if not m:
return {"error": f"The parser didn't return JSON. Raw output: {raw[:200]}"}
try:
obj = json.loads(m.group(0))
except json.JSONDecodeError:
return {"error": f"The parser returned malformed JSON: {m.group(0)[:200]}"}
return {f: obj.get(f) for f in FIELDS}
|