File size: 6,739 Bytes
e465de3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
#!/usr/bin/env python3
"""
Test inference on a single training sample with exact training format
"""
import json
import sys
from pathlib import Path
# Add scripts to path
sys.path.insert(0, str(Path(__file__).parent / "scripts" / "inference"))
from inference_codellama import load_local_model
import torch
def generate_with_exact_format(model, tokenizer, instruction, max_new_tokens=800, temperature=0.1):
"""Generate using EXACT training format: instruction + EOS (model continues from here)"""
# Use EXACT training format: instruction + EOS token
# During training: instruction + EOS + response + EOS
# During inference: instruction + EOS (model will generate response)
prompt = f"{instruction}{tokenizer.eos_token}"
print(f"\nπ Prompt Format (matching training):")
print(f" Length: {len(prompt)} chars")
print(f" First 200 chars: {prompt[:200]}...")
print()
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1536).to(model.device)
print(f"π Tokenized:")
print(f" Input tokens: {inputs['input_ids'].shape[1]}")
print()
print("π€ Generating...")
print("=" * 80)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
temperature=temperature,
do_sample=temperature > 0,
top_p=0.9 if temperature > 0 else None,
repetition_penalty=1.2, # Higher to prevent repetition
pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id else tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
)
# Decode only the newly generated tokens (after the prompt)
generated_ids = outputs[0][inputs['input_ids'].shape[1]:]
generated_text = tokenizer.decode(generated_ids, skip_special_tokens=False)
# Remove EOS token if present at the end
if generated_text.endswith(tokenizer.eos_token):
generated_text = generated_text[:-len(tokenizer.eos_token)].rstrip()
return generated_text
def extract_code_from_response(text):
"""Extract Verilog code from markdown code blocks"""
if not text:
return text
# Check for verilog code block
if '```verilog' in text:
start = text.find('```verilog') + len('```verilog')
end = text.find('```', start)
if end != -1:
extracted = text[start:end].strip()
return extracted
# Check for generic code block
if '```' in text:
start = text.find('```')
if start != -1:
start_marker = text.find('\n', start)
if start_marker == -1:
start_marker = start + 3
else:
start_marker += 1
end = text.find('```', start_marker)
if end != -1:
extracted = text[start_marker:end].strip()
return extracted
return text.strip()
def main():
# Paths
script_dir = Path(__file__).parent
model_path = script_dir / "training-outputs" / "codellama-fifo-v1"
base_model_path = script_dir / "models" / "base-models" / "CodeLlama-7B-Instruct"
train_dataset = script_dir / "datasets" / "processed" / "split" / "train.jsonl"
print("=" * 80)
print("π§ͺ TESTING SINGLE TRAINING SAMPLE (EXACT TRAINING FORMAT)")
print("=" * 80)
print(f"Model: {model_path}")
print(f"Base: {base_model_path}")
print("=" * 80)
# Load first sample
print("\nπ Loading training sample #1...")
with open(train_dataset, 'r') as f:
first_line = f.readline()
sample = json.loads(first_line)
instruction = sample.get("instruction", "")
expected_response = sample.get("response", "")
expected_code = extract_code_from_response(expected_response)
print(f"\nπ Instruction ({len(instruction)} chars):")
print("-" * 80)
print(instruction)
print("-" * 80)
print(f"\nπ― Expected Response ({len(expected_response)} chars):")
print("-" * 80)
print(expected_response[:500] + "..." if len(expected_response) > 500 else expected_response)
print("-" * 80)
# Load model
print("\nπ¦ Loading model...")
model, tokenizer = load_local_model(
str(model_path),
str(base_model_path) if base_model_path.exists() else None,
use_quantization=None,
merge_weights=False
)
print("β
Model loaded!\n")
# Test with different temperatures
temperatures = [0.1, 0.2, 0.3]
for temp in temperatures:
print("\n" + "=" * 80)
print(f"π₯ TESTING WITH TEMPERATURE: {temp}")
print("=" * 80)
try:
generated_response = generate_with_exact_format(
model,
tokenizer,
instruction,
max_new_tokens=800,
temperature=temp
)
generated_code = extract_code_from_response(generated_response)
print("\n" + "=" * 80)
print(f"β
GENERATED OUTPUT (Temperature {temp}):")
print("=" * 80)
print(generated_response)
print("=" * 80)
print(f"\nπ Statistics:")
print(f" Full response length: {len(generated_response)} chars")
print(f" Extracted code length: {len(generated_code)} chars")
print(f" Expected code length: {len(expected_code)} chars")
# Quick check if it contains module declaration
has_module = "module" in generated_response.lower()
has_endmodule = "endmodule" in generated_response.lower()
has_verilog_code = "```verilog" in generated_response or ("module" in generated_response and "input" in generated_response)
print(f"\nβ
Code Quality Check:")
print(f" Contains 'module': {has_module}")
print(f" Contains 'endmodule': {has_endmodule}")
print(f" Looks like Verilog code: {has_verilog_code}")
if has_verilog_code and has_endmodule:
print(f" β
STATUS: Generated Verilog code!")
elif has_module:
print(f" β οΈ STATUS: Partial code (missing endmodule or full implementation)")
else:
print(f" β STATUS: Not generating code (generating text instead)")
except Exception as e:
print(f"β Error with temperature {temp}: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()
|