tefoteknik commited on
Commit
03cd164
·
verified ·
1 Parent(s): 12b75a1

Upload generate.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. generate.py +94 -0
generate.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from src.models.agiformer import AGIFORMER
3
+ import os
4
+
5
+ def generate_text(model_path, prompt_text, max_new_tokens=200):
6
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
7
+
8
+ # Model Config (Train ile aynı olmalı)
9
+ D_MODEL = 512
10
+ N_LAYERS = 6
11
+ PATCH_SIZE = 4
12
+
13
+ print(f"Loading model from {model_path} on {DEVICE}...")
14
+
15
+ # Load Model
16
+ model = AGIFORMER(
17
+ d_model=D_MODEL,
18
+ n_layers=N_LAYERS,
19
+ patch_size=PATCH_SIZE,
20
+ dropout=0.1 # Dropout doesn't matter for eval but init might expect it
21
+ ).to(DEVICE)
22
+
23
+ if not os.path.exists(model_path):
24
+ print(f"Warning: Model file {model_path} not found.")
25
+ if model_path == "best_model.pth" and os.path.exists("last_model.pth"):
26
+ print("Falling back to 'last_model.pth'...")
27
+ model_path = "last_model.pth"
28
+ else:
29
+ print("Error: No model file found.")
30
+ return
31
+
32
+ if torch.cuda.is_available():
33
+ model.load_state_dict(torch.load(model_path))
34
+ else:
35
+ model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
36
+
37
+ model.eval()
38
+
39
+ # Prepare Prompt
40
+ # Convert string to bytes list
41
+ input_bytes = [ord(c) for c in prompt_text]
42
+ input_tensor = torch.tensor(input_bytes, dtype=torch.long).unsqueeze(0).to(DEVICE) # (1, L)
43
+
44
+ print(f"Prompt: {prompt_text}")
45
+ print("-" * 40)
46
+ print(prompt_text, end='', flush=True)
47
+
48
+ # Generation Loop
49
+ # AGIFORMER currently predicts patches.
50
+ # We need to feed the sequence, get the last patch prediction, append, and repeat.
51
+ # Since our LocalHead is autoregressive, we need to be careful.
52
+
53
+ with torch.no_grad():
54
+ generated = input_bytes[:]
55
+
56
+ for _ in range(max_new_tokens // PATCH_SIZE):
57
+ # Prepare current context
58
+ # Ensure length is divisible by PATCH_SIZE for encoder convenience
59
+ # (Encoder handles padding/cutting via logic, but let's keep it simple)
60
+
61
+ curr_tensor = torch.tensor(generated, dtype=torch.long).unsqueeze(0).to(DEVICE)
62
+
63
+ # Adjust length to match patch boundaries for input
64
+ L = curr_tensor.size(1)
65
+ pad_len = (PATCH_SIZE - (L % PATCH_SIZE)) % PATCH_SIZE
66
+ if pad_len > 0:
67
+ # Pad with 0 just for encoding alignment if needed,
68
+ # but our model logic usually truncates.
69
+ # Let's just rely on what we have.
70
+ pass
71
+
72
+ # Forward
73
+ # We don't pass target_bytes, triggering Inference Mode in LocalHead
74
+ # Logits: (1, N_Patches, Patch_Size, 256)
75
+ # Note: In inference mode, LocalHead returns byte INDICES, not Logits!
76
+ # Wait, check agiformer.py logic:
77
+ # If target_bytes is None -> returns bytes directly.
78
+
79
+ pred_bytes = model(curr_tensor) # (1, N_Patches, Patch_Size)
80
+
81
+ # Get the LAST patch
82
+ last_patch = pred_bytes[0, -1, :].cpu().tolist()
83
+
84
+ # Append to generation
85
+ generated.extend(last_patch)
86
+
87
+ # Print continuously
88
+ text_chunk = "".join([chr(b) if 32 <= b <= 126 else "?" for b in last_patch])
89
+ print(text_chunk, end='', flush=True)
90
+
91
+ print("\n" + "-" * 40)
92
+
93
+ if __name__ == "__main__":
94
+ generate_text("best_model.pth", "The history of artificial intelligence ")