#!/usr/bin/env python3 """ Test with EXACT training format to see if model generates correctly """ import json import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent / "scripts" / "inference")) from inference_codellama import load_local_model import torch from transformers import AutoTokenizer def main(): script_dir = Path(__file__).parent model_path = script_dir / "training-outputs" / "codellama-fifo-v1" base_model_path = script_dir / "models" / "base-models" / "CodeLlama-7B-Instruct" train_dataset = script_dir / "datasets" / "processed" / "split" / "train.jsonl" print("=" * 80) print("๐Ÿงช TESTING WITH EXACT TRAINING FORMAT") print("=" * 80) # Load sample with open(train_dataset, 'r') as f: sample = json.loads(f.readline()) instruction = sample["instruction"] expected_response = sample["response"] print(f"\n๐Ÿ“ Instruction ({len(instruction)} chars):") print(instruction[:300] + "...") print(f"\n๐ŸŽฏ Expected Response ({len(expected_response)} chars):") print(expected_response[:300] + "...") # Load model print("\n๐Ÿ“ฆ Loading model...") model, tokenizer = load_local_model( str(model_path), str(base_model_path) if base_model_path.exists() else None ) # EXACT training format: instruction + EOS (model continues) prompt = f"{instruction}{tokenizer.eos_token}" print(f"\n๐Ÿ” Prompt format (EXACT training format):") print(f" Format: instruction + EOS") print(f" Length: {len(prompt)} chars") print() inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1536).to(model.device) print(f"๐Ÿ“Š Tokenized: {inputs['input_ids'].shape[1]} tokens") print(f"\n๐Ÿค– Generating with temperature 0.1...") print("=" * 80) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=1000, temperature=0.1, do_sample=False, # Greedy decoding repetition_penalty=1.2, pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id else tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, ) # Decode only new tokens input_length = inputs['input_ids'].shape[1] generated_ids = outputs[0][input_length:] generated_text = tokenizer.decode(generated_ids, skip_special_tokens=False) if generated_text.endswith(tokenizer.eos_token): generated_text = generated_text[:-len(tokenizer.eos_token)].rstrip() print("\n" + "=" * 80) print("โœ… GENERATED OUTPUT:") print("=" * 80) print(generated_text) print("=" * 80) # Check if it's code has_module = "module" in generated_text.lower() has_endmodule = "endmodule" in generated_text.lower() has_verilog = "verilog" in generated_text.lower() or "```" in generated_text print(f"\n๐Ÿ“Š Analysis:") print(f" Contains 'module': {has_module}") print(f" Contains 'endmodule': {has_endmodule}") print(f" Contains 'verilog': {has_verilog}") print(f" Length: {len(generated_text)} chars") if has_module and has_endmodule: print(f" โœ… STATUS: Generated Verilog code!") elif has_module: print(f" โš ๏ธ STATUS: Partial code") else: print(f" โŒ STATUS: Not generating code") if __name__ == "__main__": main()