File size: 3,482 Bytes
a2b3989
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python3
"""
Test with EXACT training format to see if model generates correctly
"""

import json
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent / "scripts" / "inference"))

from inference_codellama import load_local_model
import torch
from transformers import AutoTokenizer

def main():
    script_dir = Path(__file__).parent
    model_path = script_dir / "training-outputs" / "codellama-fifo-v1"
    base_model_path = script_dir / "models" / "base-models" / "CodeLlama-7B-Instruct"
    train_dataset = script_dir / "datasets" / "processed" / "split" / "train.jsonl"
    
    print("=" * 80)
    print("๐Ÿงช TESTING WITH EXACT TRAINING FORMAT")
    print("=" * 80)
    
    # Load sample
    with open(train_dataset, 'r') as f:
        sample = json.loads(f.readline())
    
    instruction = sample["instruction"]
    expected_response = sample["response"]
    
    print(f"\n๐Ÿ“ Instruction ({len(instruction)} chars):")
    print(instruction[:300] + "...")
    
    print(f"\n๐ŸŽฏ Expected Response ({len(expected_response)} chars):")
    print(expected_response[:300] + "...")
    
    # Load model
    print("\n๐Ÿ“ฆ Loading model...")
    model, tokenizer = load_local_model(
        str(model_path),
        str(base_model_path) if base_model_path.exists() else None
    )
    
    # EXACT training format: instruction + EOS (model continues)
    prompt = f"{instruction}{tokenizer.eos_token}"
    
    print(f"\n๐Ÿ” Prompt format (EXACT training format):")
    print(f"   Format: instruction + EOS")
    print(f"   Length: {len(prompt)} chars")
    print()
    
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1536).to(model.device)
    
    print(f"๐Ÿ“Š Tokenized: {inputs['input_ids'].shape[1]} tokens")
    print(f"\n๐Ÿค– Generating with temperature 0.1...")
    print("=" * 80)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=1000,
            temperature=0.1,
            do_sample=False,  # Greedy decoding
            repetition_penalty=1.2,
            pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id else tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    
    # Decode only new tokens
    input_length = inputs['input_ids'].shape[1]
    generated_ids = outputs[0][input_length:]
    generated_text = tokenizer.decode(generated_ids, skip_special_tokens=False)
    
    if generated_text.endswith(tokenizer.eos_token):
        generated_text = generated_text[:-len(tokenizer.eos_token)].rstrip()
    
    print("\n" + "=" * 80)
    print("โœ… GENERATED OUTPUT:")
    print("=" * 80)
    print(generated_text)
    print("=" * 80)
    
    # Check if it's code
    has_module = "module" in generated_text.lower()
    has_endmodule = "endmodule" in generated_text.lower()
    has_verilog = "verilog" in generated_text.lower() or "```" in generated_text
    
    print(f"\n๐Ÿ“Š Analysis:")
    print(f"   Contains 'module': {has_module}")
    print(f"   Contains 'endmodule': {has_endmodule}")
    print(f"   Contains 'verilog': {has_verilog}")
    print(f"   Length: {len(generated_text)} chars")
    
    if has_module and has_endmodule:
        print(f"   โœ… STATUS: Generated Verilog code!")
    elif has_module:
        print(f"   โš ๏ธ  STATUS: Partial code")
    else:
        print(f"   โŒ STATUS: Not generating code")

if __name__ == "__main__":
    main()