tefoteknik commited on
Commit
7fd7bd3
·
verified ·
1 Parent(s): 310140f

Upload generate.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. generate.py +33 -66
generate.py CHANGED
@@ -1,94 +1,61 @@
1
  import torch
2
  from src.models.agiformer import AGIFORMER
3
  import os
 
4
 
5
- def generate_text(model_path, prompt_text, max_new_tokens=200):
6
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
7
 
8
- # Model Config (Train ile aynı olmalı)
9
  D_MODEL = 512
10
  N_LAYERS = 6
11
  PATCH_SIZE = 4
12
 
13
- print(f"Loading model from {model_path} on {DEVICE}...")
14
-
15
- # Load Model
16
- model = AGIFORMER(
17
- d_model=D_MODEL,
18
- n_layers=N_LAYERS,
19
- patch_size=PATCH_SIZE,
20
- dropout=0.1 # Dropout doesn't matter for eval but init might expect it
21
- ).to(DEVICE)
22
 
23
  if not os.path.exists(model_path):
24
- print(f"Warning: Model file {model_path} not found.")
25
- if model_path == "best_model.pth" and os.path.exists("last_model.pth"):
26
- print("Falling back to 'last_model.pth'...")
27
- model_path = "last_model.pth"
28
- else:
29
- print("Error: No model file found.")
30
- return
31
 
32
- if torch.cuda.is_available():
33
- model.load_state_dict(torch.load(model_path))
34
- else:
35
- model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
36
-
37
  model.eval()
38
 
39
- # Prepare Prompt
40
- # Convert string to bytes list
41
  input_bytes = [ord(c) for c in prompt_text]
42
- input_tensor = torch.tensor(input_bytes, dtype=torch.long).unsqueeze(0).to(DEVICE) # (1, L)
43
-
44
- print(f"Prompt: {prompt_text}")
45
- print("-" * 40)
 
 
46
  print(prompt_text, end='', flush=True)
47
 
48
- # Generation Loop
49
- # AGIFORMER currently predicts patches.
50
- # We need to feed the sequence, get the last patch prediction, append, and repeat.
51
- # Since our LocalHead is autoregressive, we need to be careful.
52
 
53
  with torch.no_grad():
54
- generated = input_bytes[:]
55
-
56
  for _ in range(max_new_tokens // PATCH_SIZE):
57
- # Prepare current context
58
- # Ensure length is divisible by PATCH_SIZE for encoder convenience
59
- # (Encoder handles padding/cutting via logic, but let's keep it simple)
60
 
61
- curr_tensor = torch.tensor(generated, dtype=torch.long).unsqueeze(0).to(DEVICE)
 
62
 
63
- # Adjust length to match patch boundaries for input
64
- L = curr_tensor.size(1)
65
- pad_len = (PATCH_SIZE - (L % PATCH_SIZE)) % PATCH_SIZE
66
- if pad_len > 0:
67
- # Pad with 0 just for encoding alignment if needed,
68
- # but our model logic usually truncates.
69
- # Let's just rely on what we have.
70
- pass
71
-
72
- # Forward
73
- # We don't pass target_bytes, triggering Inference Mode in LocalHead
74
- # Logits: (1, N_Patches, Patch_Size, 256)
75
- # Note: In inference mode, LocalHead returns byte INDICES, not Logits!
76
- # Wait, check agiformer.py logic:
77
- # If target_bytes is None -> returns bytes directly.
78
-
79
- pred_bytes = model(curr_tensor) # (1, N_Patches, Patch_Size)
80
-
81
- # Get the LAST patch
82
- last_patch = pred_bytes[0, -1, :].cpu().tolist()
83
-
84
- # Append to generation
85
  generated.extend(last_patch)
86
 
87
- # Print continuously
88
- text_chunk = "".join([chr(b) if 32 <= b <= 126 else "?" for b in last_patch])
89
- print(text_chunk, end='', flush=True)
 
 
 
 
 
 
90
 
91
- print("\n" + "-" * 40)
92
 
93
  if __name__ == "__main__":
94
- generate_text("best_model.pth", "The history of artificial intelligence ")
 
 
1
  import torch
2
  from src.models.agiformer import AGIFORMER
3
  import os
4
+ import sys
5
 
6
+ def generate_text(model_path, prompt_text, max_new_tokens=200, temperature=0.8):
7
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
8
 
9
+ # Config
10
  D_MODEL = 512
11
  N_LAYERS = 6
12
  PATCH_SIZE = 4
13
 
14
+ print(f"Loading {model_path} (Temp={temperature})...")
15
+ model = AGIFORMER(d_model=D_MODEL, n_layers=N_LAYERS, patch_size=PATCH_SIZE).to(DEVICE)
 
 
 
 
 
 
 
16
 
17
  if not os.path.exists(model_path):
18
+ print("Model not found.")
19
+ return
 
 
 
 
 
20
 
21
+ state_dict = torch.load(model_path, map_location=DEVICE)
22
+ model.load_state_dict(state_dict)
 
 
 
23
  model.eval()
24
 
 
 
25
  input_bytes = [ord(c) for c in prompt_text]
26
+ pad_len = (PATCH_SIZE - (len(input_bytes) % PATCH_SIZE)) % PATCH_SIZE
27
+ if pad_len > 0:
28
+ input_bytes.extend([32] * pad_len)
29
+
30
+ print(f"Prompt: '{prompt_text}'")
31
+ print("-" * 50)
32
  print(prompt_text, end='', flush=True)
33
 
34
+ generated = input_bytes[:]
 
 
 
35
 
36
  with torch.no_grad():
 
 
37
  for _ in range(max_new_tokens // PATCH_SIZE):
38
+ context = generated[-1024:] # Keep context manageable
39
+ curr_tensor = torch.tensor(context, dtype=torch.long).unsqueeze(0).to(DEVICE)
 
40
 
41
+ # Pass Temperature
42
+ pred_patches = model(curr_tensor, temperature=temperature)
43
 
44
+ last_patch = pred_patches[0, -1, :].cpu().tolist()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  generated.extend(last_patch)
46
 
47
+ decoded_str = ""
48
+ for b in last_patch:
49
+ if 32 <= b <= 126 or b == 10 or b == 9:
50
+ decoded_str += chr(b)
51
+ else:
52
+ # Simple representation for non-printables
53
+ pass
54
+
55
+ print(decoded_str, end='', flush=True)
56
 
57
+ print("\n" + "-" * 50)
58
 
59
  if __name__ == "__main__":
60
+ # Test with a generic English prompt to see if it generalizes beyond XML
61
+ generate_text("best_model.pth", "The history of ", temperature=0.7)