Oqwenheimer-0.5B / batch_inference.py

Upload 2 files

78e22d5 verified about 1 month ago

10.4 kB

	"""
	Interactive REPL for testing trained physics problem-solving model.
	"""
	import argparse
	from pathlib import Path

	import torch
	import yaml

	from qwen2_model import Transformer
	from tokenizer import Tokenizer
	from generation_utils import generate
	from tokenizer_wrapper import decode_token_ids


	SYSTEM_MESSAGE = (
	"You are a helpful physics tutor. You first think about the reasoning process "
	"in your mind and then provide the user with the answer."
	)
	USER_TEMPLATE = (
	"{question}\n"
	"Show your reasoning in <think> </think> tags. "
	"Then provide your final answer in <answer> </answer> tags."
	)
	RESPONSE_PROMPT = "Let me solve this step by step.\n<think>"


	def load_model_and_tokenizer(config_path, checkpoint_path=None):
	"""Load model and tokenizer from config and checkpoint."""
	with open(config_path, "r") as f:
	config = yaml.safe_load(f)

	pretrained_model_path = Path(config["model"]["pretrained_model_path"])
	device = torch.device(config["model"]["device"])

	dtype_map = {
	"bfloat16": torch.bfloat16,
	"float16": torch.float16,
	"float32": torch.float32,
	}
	dtype = dtype_map.get(config["model"]["dtype"], torch.bfloat16)

	# Load tokenizer
	tokenizer = Tokenizer(str(pretrained_model_path / "tokenizer.json"))

	# Load model
	model = Transformer.from_pretrained(pretrained_model_path, device=device)

	# Load checkpoint if provided
	if checkpoint_path:
	print(f"Loading checkpoint from {checkpoint_path}...")
	checkpoint = torch.load(checkpoint_path, map_location=device)

	# Handle different checkpoint formats
	if isinstance(checkpoint, dict):
	if "model_state_dict" in checkpoint:
	# Checkpoint contains model_state_dict, optimizer_state_dict, etc.
	state_dict = checkpoint["model_state_dict"]
	print(f"Loaded checkpoint from step {checkpoint.get('step', 'unknown')}")
	else:
	# Checkpoint is already a state dict
	state_dict = checkpoint
	else:
	state_dict = checkpoint

	model.load_state_dict(state_dict)
	print("Checkpoint loaded successfully!")

	model.eval()

	return model, tokenizer, device, dtype, config

	def generate_response(model, tokenizer, question, device, dtype, max_gen_len=512, temperature=0.7, top_p=0.9):
	"""Generate a response for a given physics question."""
	# Format the prompt
	user_message = USER_TEMPLATE.format(question=question)
	prefix = tokenizer.encode_chat_with_response_prompt(
	[
	{"role": "system", "content": SYSTEM_MESSAGE},
	{"role": "user", "content": user_message},
	],
	RESPONSE_PROMPT,
	)

	# Tokenize
	tokens = tokenizer.tokenize(prefix)
	prefix_token_ids = tokens.ids

	# Generate
	print("\nGenerating response...")
	with torch.inference_mode():
	generated_token_ids, is_finished = generate(
	model=model,
	tokenizer=tokenizer,
	prompt_token_ids=prefix_token_ids,
	max_gen_len=max_gen_len,
	temperature=temperature,
	top_p=top_p,
	device=device,
	dtype=dtype,
	)

	# Decode
	generated_text = decode_token_ids(tokenizer, generated_token_ids)

	return prefix + generated_text, is_finished


	def extract_answer(text):
	"""Extract the answer from <answer> tags."""
	import re
	answer_match = re.search(r"<answer>(.*?)</answer>", text, re.DOTALL)
	if answer_match:
	return answer_match.group(1).strip()
	return None


	def print_response(full_text):
	"""Pretty print the model's response."""
	import re

	# Try to extract think and answer sections
	think_match = re.search(r"<think>(.*?)</think>", full_text, re.DOTALL)
	answer_match = re.search(r"<answer>(.*?)</answer>", full_text, re.DOTALL)

	print("\n" + "="*80)

	if think_match:
	print("\n🤔 REASONING:")
	print("-" * 80)
	print(think_match.group(1).strip())

	if answer_match:
	print("\n✅ ANSWER:")
	print("-" * 80)
	print(answer_match.group(1).strip())
	else:
	print("\n⚠️ WARNING: No answer tags found in response")
	print("\nFull response:")
	print("-" * 80)
	print(full_text)

	print("="*80 + "\n")


	def interactive_mode(model, tokenizer, device, dtype, config):
	"""Run interactive REPL mode."""
	print("\n" + "="*80)
	print("Physics Problem Solver - Interactive Mode")
	print("="*80)
	print("\nCommands:")
	print(" - Type your physics question and press Enter")
	print(" - Type 'quit' or 'exit' to exit")
	print(" - Type 'config' to change generation parameters")
	print(" - Type 'example' to see example questions")
	print("="*80 + "\n")

	# Default generation parameters
	max_gen_len = config["training"].get("max_gen_len", 512)
	temperature = 0.7
	top_p = 0.9

	while True:
	try:
	user_input = input("\n📝 Enter physics question (or command): ").strip()

	if not user_input:
	continue

	if user_input.lower() in ['quit', 'exit', 'q']:
	print("\nGoodbye! 👋")
	break

	if user_input.lower() == 'example':
	print("\nExample questions:")
	print(" 1. A ball is thrown upward with velocity 20 m/s. What is its maximum height?")
	print(" 2. Calculate the force needed to accelerate a 5kg object at 3 m/s²")
	print(" 3. What is the wavelength of light with frequency 5×10¹⁴ Hz?")
	print(" 4. A 2kg block slides down a 30° incline. What is its acceleration?")
	continue

	if user_input.lower() == 'config':
	print(f"\nCurrent settings:")
	print(f" max_gen_len: {max_gen_len}")
	print(f" temperature: {temperature}")
	print(f" top_p: {top_p}")

	try:
	new_max_len = input(f"\nNew max_gen_len [{max_gen_len}]: ").strip()
	if new_max_len:
	max_gen_len = int(new_max_len)

	new_temp = input(f"New temperature [{temperature}]: ").strip()
	if new_temp:
	temperature = float(new_temp)

	new_top_p = input(f"New top_p [{top_p}]: ").strip()
	if new_top_p:
	top_p = float(new_top_p)

	print("\n✓ Configuration updated!")
	except ValueError:
	print("\n✗ Invalid input. Configuration unchanged.")
	continue

	# Generate response
	full_text, is_finished = generate_response(
	model=model,
	tokenizer=tokenizer,
	question=user_input,
	device=device,
	dtype=dtype,
	max_gen_len=max_gen_len,
	temperature=temperature,
	top_p=top_p,
	)

	# Print response
	print_response(full_text)

	if not is_finished:
	print("⚠️ Note: Response was truncated (reached max_gen_len)")

	except KeyboardInterrupt:
	print("\n\nInterrupted. Type 'quit' to exit.\n")
	continue
	except Exception as e:
	print(f"\n✗ Error: {e}\n")
	continue


	def batch_inference_mode(model, tokenizer, device, dtype, config, questions_file, output_file):
	"""Run batch inference on a file of questions."""
	print(f"\nRunning batch inference on {questions_file}...")

	max_gen_len = config["training"].get("max_gen_len", 512)

	# Read questions
	with open(questions_file, 'r') as f:
	questions = [line.strip() for line in f if line.strip()]

	print(f"Found {len(questions)} questions")

	results = []
	for i, question in enumerate(questions, 1):
	print(f"\n[{i}/{len(questions)}] Processing: {question[:60]}...")

	full_text, is_finished = generate_response(
	model=model,
	tokenizer=tokenizer,
	question=question,
	device=device,
	dtype=dtype,
	max_gen_len=max_gen_len,
	temperature=0.7,
	top_p=0.9,
	)

	answer = extract_answer(full_text)

	results.append({
	'question': question,
	'full_response': full_text,
	'answer': answer,
	'is_finished': is_finished,
	})

	# Save results
	import json
	with open(output_file, 'w') as f:
	json.dump(results, f, indent=2)

	print(f"\n✓ Results saved to {output_file}")


	def main():
	parser = argparse.ArgumentParser(description="Interactive inference for physics problem solver")
	parser.add_argument("--config", type=str, required=True, help="Path to config YAML file")
	parser.add_argument("--checkpoint", type=str, help="Path to model checkpoint (optional)")
	parser.add_argument("--batch", action="store_true", help="Run batch inference mode")
	parser.add_argument("--questions", type=str, help="Path to questions file (for batch mode)")
	parser.add_argument("--output", type=str, default="results.json", help="Output file (for batch mode)")

	args = parser.parse_args()

	# Load model and tokenizer
	print("Loading model and tokenizer...")
	model, tokenizer, device, dtype, config = load_model_and_tokenizer(
	args.config,
	args.checkpoint
	)
	print("✓ Model loaded successfully!\n")

	if args.batch:
	if not args.questions:
	print("Error: --questions file required for batch mode")
	return
	batch_inference_mode(model, tokenizer, device, dtype, config, args.questions, args.output)
	else:
	interactive_mode(model, tokenizer, device, dtype, config)


	if __name__ == "__main__":
	main()