files for inference

ea2eee0 verified 27 days ago

4.71 kB

	"""
	Interactive chat with your GPT-2 model — Notebook version
	Run Cell 1 and Cell 2 in order, then use the chat widget.
	You can copy the code into colab cells. ensure you rename to your model path
	"""

	# ── Cell 1: Setup & load model ────────────────────────────────────────────────
	import torch
	import tiktoken
	import ipywidgets as widgets
	from IPython.display import display, HTML
	from Architecture import LanguageModel
	from GeneratorHF import HFTextGenerator

	CHECKPOINT_PATH = "/content/drive/MyDrive/GPT2-small/refinedGPT2small.pth" #on google colab
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	def load_model(checkpoint_path, device):
	print(f"Loading model from {checkpoint_path}...")
	ckpt = torch.load(checkpoint_path, map_location=device, weights_only=False)
	config = ckpt["config"]
	model = LanguageModel(config)
	state_dict = ckpt["model_state_dict"]

	cleaned = {}
	for k, v in state_dict.items():
	for prefix in ["module.", "_orig_mod."]:
	while k.startswith(prefix):
	k = k[len(prefix):]
	cleaned[k] = v

	model.load_state_dict(cleaned)
	model.to(device)
	model.eval()

	params = sum(p.numel() for p in model.parameters()) / 1e6
	val_loss = ckpt.get("loss", "?")
	print(f" {params:.1f}M params \| val_loss {val_loss} \| device {device}")
	return model, config

	model, config = load_model(CHECKPOINT_PATH, DEVICE)
	enc = tiktoken.get_encoding("gpt2")
	gen = HFTextGenerator(model, enc, DEVICE, context_size=config["context_length"])
	print("Ready.")


	# ── Cell 2: Chat widget ───────────────────────────────────────────────────────

	# ── Sampling controls ─────────────────────────────────────────────────────────
	temp_slider = widgets.FloatSlider(
	value=0.8, min=0.1, max=2.0, step=0.05,
	description="Temperature",
	style={"description_width": "90px"},
	layout=widgets.Layout(width="400px")
	)
	topk_slider = widgets.IntSlider(
	value=40, min=1, max=200, step=1,
	description="Top-k",
	style={"description_width": "90px"},
	layout=widgets.Layout(width="400px")
	)
	tokens_slider = widgets.IntSlider(
	value=150, min=10, max=500, step=10,
	description="Max tokens",
	style={"description_width": "90px"},
	layout=widgets.Layout(width="400px")
	)

	# ── Input & buttons ───────────────────────────────────────────────────────────
	prompt_box = widgets.Textarea(
	placeholder="Type your prompt here...",
	layout=widgets.Layout(width="700px", height="70px")
	)
	generate_btn = widgets.Button(
	description="Generate",
	button_style="primary",
	layout=widgets.Layout(width="120px")
	)
	clear_btn = widgets.Button(
	description="Clear",
	button_style="warning",
	layout=widgets.Layout(width="80px")
	)

	# ── Output area ───────────────────────────────────────────────────────────────
	output_area = widgets.Output()

	def on_generate(b):
	prompt = prompt_box.value.strip()
	if not prompt:
	return

	with output_area:
	print(f"\nYOU > {prompt}")
	print("MODEL > ", end="", flush=True)

	continuation = gen.generate(
	prompt,
	max_new_tokens=tokens_slider.value,
	temperature=temp_slider.value,
	top_k=topk_slider.value,
	)
	print(continuation)
	print("-" * 70)

	def on_clear(b):
	output_area.clear_output()
	prompt_box.value = ""

	generate_btn.on_click(on_generate)
	clear_btn.on_click(on_clear)

	# ── Layout ────────────────────────────────────────────────────────────────────
	controls = widgets.VBox([
	widgets.HTML("<b>Sampling settings</b>"),
	temp_slider,
	topk_slider,
	tokens_slider,
	])
	buttons = widgets.HBox([generate_btn, clear_btn])
	ui = widgets.VBox([
	widgets.HTML("<h3>GPT-2 Small — Chat</h3>"),
	controls,
	widgets.HTML("<br><b>Prompt</b>"),
	prompt_box,
	buttons,
	widgets.HTML("<br><b>Output</b>"),
	output_area,
	])

	display(ui)