Spaces:

jdsb06
/

meta-r2

Sleeping

github-actions[bot]

Deploy Space snapshot

ddbc1ba about 1 month ago

9 kB

	"""
	LifeStack Model — Inference Script
	===================================
	Usage:
	python scripts/inference.py --model ./lifestack_model
	python scripts/inference.py --model ./lifestack_model --scenario "My car broke down and I have a meeting in 2 hours"
	python scripts/inference.py --model ./lifestack_model --interactive
	"""

	import argparse
	import json
	import re
	import torch
	from transformers import AutoTokenizer

	# ── Load model ────────────────────────────────────────────────────────────────

	def load_model(model_dir: str):
	"""Load the LoRA adapter on top of base Qwen2.5-1.5B.
	Tries Unsloth first (2x faster), falls back to standard PEFT.
	"""
	try:
	from unsloth import FastLanguageModel
	print("Loading with Unsloth (fast)...")
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name=model_dir,
	max_seq_length=2048,
	load_in_4bit=True,
	)
	FastLanguageModel.for_inference(model)
	print("✅ Loaded via Unsloth")
	return model, tokenizer

	except ImportError:
	print("Unsloth not installed — using standard PEFT (slower)...")
	from transformers import AutoModelForCausalLM
	from peft import PeftModel

	base_model_name = "Qwen/Qwen2.5-1.5B-Instruct"
	tokenizer = AutoTokenizer.from_pretrained(model_dir)

	base = AutoModelForCausalLM.from_pretrained(
	base_model_name,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto",
	)
	model = PeftModel.from_pretrained(base, model_dir)
	model.eval()
	print("✅ Loaded via PEFT")
	return model, tokenizer


	# ── Build prompt ───────────────────────────────────────────────────────────────

	SYSTEM = (
	"You are LifeStack, an AI life-management agent. "
	"Given a real-life crisis, respond with a single optimal action as valid JSON.\n\n"
	"Required JSON format:\n"
	'{"action_type": "negotiate\|communicate\|delegate\|spend\|reschedule\|rest\|deprioritize\|execute", '
	'"target_domain": "career\|finances\|relationships\|physical_health\|mental_wellbeing\|time\|transport_crisis\|flight_crisis\|code_merge_crisis", '
	'"metric_changes": {"domain.submetric": delta_value}, '
	'"resource_cost": {"time": hours, "money": dollars, "energy": units}, '
	'"reasoning": "brief explanation of why this is the best action"}'
	)

	def build_prompt(scenario: str) -> str:
	return (
	f"<\|im_start\|>system\n{SYSTEM}<\|im_end\|>\n"
	f"<\|im_start\|>user\n"
	f"CRISIS: {scenario}\n\n"
	f"Respond with ONLY valid JSON — no markdown, no explanation outside the JSON.<\|im_end\|>\n"
	f"<\|im_start\|>assistant\n"
	)


	class JsonCompleteStopping(torch.nn.Module):
	"""Stop once the first complete JSON object closes."""

	def __init__(self, tokenizer, prompt_len: int):
	super().__init__()
	self.tokenizer = tokenizer
	self.prompt_len = prompt_len

	def __call__(self, input_ids: torch.LongTensor, scores, **kwargs) -> bool:
	text = self.tokenizer.decode(input_ids[0][self.prompt_len:], skip_special_tokens=True)
	depth = 0
	entered = False
	for ch in text:
	if ch == "{":
	depth += 1
	entered = True
	elif ch == "}":
	depth -= 1
	if entered and depth == 0:
	return True
	return False


	def extract_json_payload(completion: str) -> dict:
	"""Parse the first complete JSON object, ignoring trailing text."""
	decoder = json.JSONDecoder()
	for match in re.finditer(r"\{", completion.strip()):
	try:
	obj, _ = decoder.raw_decode(completion[match.start():].strip())
	except json.JSONDecodeError:
	continue
	if isinstance(obj, dict):
	return obj
	return {"raw_output": completion, "parse_error": "no valid JSON object found"}


	# ── Inference ──────────────────────────────────────────────────────────────────

	def resolve(model, tokenizer, scenario: str, temperature: float = 0.3) -> dict:
	prompt = build_prompt(scenario)
	device = next(model.parameters()).device

	inputs = tokenizer(prompt, return_tensors="pt").to(device)
	prompt_len = inputs["input_ids"].shape[1]
	pad_token_id = tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id

	with torch.no_grad():
	output_ids = model.generate(
	**inputs,
	max_new_tokens=160,
	temperature=temperature,
	do_sample=temperature > 0,
	pad_token_id=pad_token_id,
	eos_token_id=tokenizer.eos_token_id,
	stopping_criteria=[JsonCompleteStopping(tokenizer, prompt_len)],
	)

	# Decode only the new tokens (not the prompt)
	completion = tokenizer.decode(
	output_ids[0][prompt_len:],
	skip_special_tokens=True
	).strip()

	return extract_json_payload(completion)


	# ── Built-in scenarios for quick demo ─────────────────────────────────────────

	DEMO_SCENARIOS = [
	"My flight got cancelled and my card got declined at the rebooking desk. I have a client presentation tomorrow morning.",
	"My car broke down on the highway. The repair will take 3 days and I have no other transport.",
	"I haven't slept properly in 2 weeks. My productivity is shot and my partner says I'm distant.",
	"A surprise tax audit letter arrived. I owe $4,000 I don't have liquid.",
	"My boss just dropped a 12-hour task on me at 5PM Friday and said it's due Monday morning.",
	"The morning train is delayed 90 minutes and I have a 9AM client meeting I can't miss.",
	"I've been double-booked every weekend for a month and I can't say no to anyone.",
	"A critical git merge broke the staging environment 2 hours before a demo.",
	]


	# ── Main ───────────────────────────────────────────────────────────────────────

	def main():
	parser = argparse.ArgumentParser(description="LifeStack Inference")
	parser.add_argument("--model", type=str, default="./lifestack_model",
	help="Path to the downloaded/unzipped model directory")
	parser.add_argument("--scenario", type=str, default=None,
	help="Describe your crisis (quoted string)")
	parser.add_argument("--interactive", action="store_true",
	help="Interactive REPL — type your own crises")
	parser.add_argument("--demo", action="store_true",
	help="Run all 8 built-in demo scenarios")
	parser.add_argument("--temperature", type=float, default=0.3,
	help="Generation temperature (default 0.3 = focused)")
	args = parser.parse_args()

	print(f"\n{'='*60}")
	print(" LifeStack — AI Life Management Agent")
	print(f"{'='*60}\n")

	model, tokenizer = load_model(args.model)
	print()

	if args.demo:
	for i, scenario in enumerate(DEMO_SCENARIOS, 1):
	print(f"[{i}/8] {scenario[:80]}...")
	action = resolve(model, tokenizer, scenario, args.temperature)
	print(json.dumps(action, indent=2))
	print()

	elif args.interactive:
	print("Interactive mode — type your crisis and press Enter.")
	print("Type 'quit' to exit.\n")
	while True:
	try:
	scenario = input("Crisis > ").strip()
	except (EOFError, KeyboardInterrupt):
	break
	if scenario.lower() in ("quit", "exit", "q"):
	break
	if not scenario:
	continue
	action = resolve(model, tokenizer, scenario, args.temperature)
	print("\nLifeStack Action:")
	print(json.dumps(action, indent=2))
	print()

	elif args.scenario:
	action = resolve(model, tokenizer, args.scenario, args.temperature)
	print("LifeStack Action:")
	print(json.dumps(action, indent=2))

	else:
	# Default: run 1 demo scenario
	scenario = DEMO_SCENARIOS[0]
	print(f"Demo scenario: {scenario}\n")
	action = resolve(model, tokenizer, scenario, args.temperature)
	print("LifeStack Action:")
	print(json.dumps(action, indent=2))


	if __name__ == "__main__":
	main()