"""
KnowForge-0.6B inference — standalone, no extra deps beyond transformers.
CLI: python inference.py "ZELPH RULES: ... Question: ..."
API: from inference import ask; result = ask("ZELPH RULES: ...")
"""
import re
import sys
from pathlib import Path
from typing import Optional
_MODEL_DIR = Path(__file__).parent
_pipeline = None # lazy singleton
def _load_pipeline():
global _pipeline
if _pipeline is not None:
return _pipeline
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
tokenizer = AutoTokenizer.from_pretrained(str(_MODEL_DIR), trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
str(_MODEL_DIR),
dtype=torch.float16,
device_map="auto",
trust_remote_code=True,
)
model.eval()
_pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
)
return _pipeline
def _split_think(raw: str) -> tuple[str, str]:
"""Split ... from final answer. Returns (answer, reasoning).
Handles truncated output where was never generated.
"""
closed_match = re.search(r"(.*?)", raw, re.DOTALL)
if closed_match:
reasoning = closed_match.group(1).strip()
answer = re.sub(r".*?", "", raw, flags=re.DOTALL).strip()
return answer, reasoning
# Unclosed block — generation was cut off during reasoning
open_match = re.search(r"(.*)", raw, re.DOTALL)
if open_match:
return "", open_match.group(1).strip()
return raw.strip(), ""
def ask(
prompt: str,
max_new_tokens: int = 512,
temperature: float = 0.0,
do_sample: bool = False,
) -> dict:
"""
Run a KnowForge prompt through the model.
Args:
prompt: Full user-turn text, e.g. "ZELPH RULES: ... Question: ..."
max_new_tokens: Max tokens to generate.
temperature: Sampling temperature (ignored when do_sample=False).
do_sample: Enable sampling; set True + temperature>0 for stochastic output.
Returns:
{
"answer": str — text after (or full output if no think block),
"reasoning": str — text inside ..., empty string if absent,
}
"""
pipe = _load_pipeline()
messages = [
{
"role": "system",
"content": (
"You are given rules for a fictional system that does NOT exist in the "
"real world. Reason STRICTLY from the rules provided. Do NOT use any "
"outside knowledge. Show your reasoning inside ... tags "
"before giving your final answer."
),
},
{"role": "user", "content": prompt},
]
gen_kwargs: dict = {
"max_new_tokens": max_new_tokens,
"do_sample": do_sample,
"pad_token_id": pipe.tokenizer.eos_token_id,
}
if do_sample:
gen_kwargs["temperature"] = temperature
outputs = pipe(messages, **gen_kwargs)
raw = outputs[0]["generated_text"][-1]["content"]
answer, reasoning = _split_think(raw)
return {"answer": answer, "reasoning": reasoning}
def _main():
if len(sys.argv) < 2:
print("Usage: python inference.py \"ZELPH RULES: ... Question: ...\"")
sys.exit(1)
prompt = " ".join(sys.argv[1:])
result = ask(prompt)
print("Answer:", result["answer"])
if result["reasoning"]:
print("\nReasoning:")
print(result["reasoning"])
if __name__ == "__main__":
_main()