import json import os from huggingface_hub import InferenceClient DEFAULT_HF_API_MODEL = "Qwen/Qwen3-4B-Instruct-2507:nscale" def get_hf_token() -> str | None: return os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") def extract_json_object(text: str) -> dict | None: cleaned = text.strip() if cleaned.startswith("```"): cleaned = cleaned.strip("`") if "\n" in cleaned: cleaned = cleaned.split("\n", maxsplit=1)[1] start = cleaned.find("{") end = cleaned.rfind("}") if start != -1 and end != -1 and end > start: cleaned = cleaned[start : end + 1] try: decoded = json.loads(cleaned) except json.JSONDecodeError: return None return decoded if isinstance(decoded, dict) else None def main() -> None: token = get_hf_token() if not token: raise RuntimeError("HF_TOKEN is not configured") model_id = os.getenv("HF_API_MODEL_ID", DEFAULT_HF_API_MODEL) client = InferenceClient(token=token) completion = client.chat.completions.create( model=model_id, messages=[ { "role": "system", "content": "You are a puppet actor in a short absurd theater scene. Return valid JSON only.", }, { "role": "user", "content": ( "Scene: A moon library where the sun is overdue.\n" "Actor: Mina Moonbutton\n" "Goal: recover the missing sun\n" "Style: stern, poetic, tiny\n" "Recent transcript: none\n" "Write one short puppet line under 25 words.\n\n" "Return JSON with keys: line, emotion, gesture, stage_effect, tool_request." ), }, ], max_tokens=120, temperature=0.75, top_p=0.9, ) text = completion.choices[0].message.content or "" parsed = extract_json_object(text) print("Raw model output:") print(text) print(f"JSON parsing succeeded: {parsed is not None}") if __name__ == "__main__": main()