coolpoodle
/

Qwen3-0.6B-Looped

Model card Files Files and versions

Qwen3-0.6B-Looped / test_loop_generation.py

coolpoodle's picture

Uploaded Training / Testing File / Eval

b871d11 verified 11 days ago

history blame contribute delete

1.8 kB

	#!/usr/bin/env python3
	"""Test generation with Loop Attention (use_cache=False)."""

	import sys
	import torch
	sys.path.insert(0, '/content')
	from modeling_qwen_loop import Qwen3LoopForCausalLM
	from transformers import AutoTokenizer

	MODEL_PATH = "/content/Qwen3-0.6B"
	GATE_PATH = "/content/Qwen3-0.6B-looped/checkpoints/gate_projections_epoch_3.pt"

	print("\n1. Loading model...")
	model = Qwen3LoopForCausalLM.from_pretrained(MODEL_PATH)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = model.to(device)


	print("2. Loading trained gates...")
	gate_state = torch.load(GATE_PATH, map_location=device)
	for key, value in gate_state.items():
	parts = key.split('.')
	layer_idx = int(parts[1])
	param_name = parts[-1]
	if param_name == 'weight':
	model.model.layers[layer_idx].self_attn.gate.weight.data = value.to(device)
	elif param_name == 'bias':
	model.model.layers[layer_idx].self_attn.gate.bias.data = value.to(device)
	print(" Gates loaded!")

	tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
	tokenizer.pad_token = tokenizer.eos_token

	model.eval()

	prompts = [
	"The capital of France is",
	"def fibonacci(n):",
	"In the year 2050,",
	"The quick brown fox",
	"Explain quantum computing in simple terms:"
	]


	for prompt in prompts:
	inputs = tokenizer(prompt, return_tensors="pt").to(device)

	with torch.no_grad():

	out = model.generate(
	input_ids=inputs.input_ids,
	max_new_tokens=50,
	do_sample=True,
	temperature=0.7,
	top_p=0.9,
	use_cache=False,
	pad_token_id=tokenizer.eos_token_id
	)

	text = tokenizer.decode(out[0], skip_special_tokens=True)
	print(f"\nPrompt: {prompt}")
	print(f"Output: {text}")