import argparse import json import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parents[1])) from config import Settings from llm.omni_client import OmniClient, _is_reasoning_model parser = argparse.ArgumentParser() parser.add_argument("--model-path", required=True) parser.add_argument("--model-name", default="") parser.add_argument("--base-url", default="http://127.0.0.1:8080/v1") args = parser.parse_args() model_path = Path(args.model_path) model_name = args.model_name or model_path.name settings = Settings( llm_provider="llama_cpp_server", llamacpp_model_path=model_path, llm_model=model_name, llamacpp_base_url=args.base_url, witness_chat_tts=False, ) client = OmniClient(settings) print("model:", model_name) print("reasoning-gated (/no_think injected):", _is_reasoning_model(model_name, model_path)) print("health ready:", client.health().get("ready")) r = client.chat( "You are a terse witness in an English detective game. Answer in one short English sentence.", "What color was the folder the suspect carried?", task="interview", temperature=0.2, ) print("INTERVIEW:", repr(r.text)) try: d = client.json_chat( "You output only a JSON object with keys: summary (string), confidence (number 0-1).", "Summarize this sighting: a man in a grey raincoat boarded a bus at 9pm carrying a blue folder.", task="witness", ) print("JSON OK:", json.dumps(d)[:300]) except Exception as e: print("JSON ERR:", type(e).__name__, e)