tefoteknik
/

agiformer

Model card Files Files and versions

xet

Community

tefoteknik commited on Nov 22, 2025

Commit

7fd7bd3

verified ·

1 Parent(s): 310140f

Upload generate.py with huggingface_hub

Browse files

Files changed (1) hide show

generate.py +33 -66

generate.py CHANGED Viewed

@@ -1,94 +1,61 @@
 import torch
 from src.models.agiformer import AGIFORMER
 import os
-def generate_text(model_path, prompt_text, max_new_tokens=200):
     DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
-    # Model Config (Train ile aynı olmalı)
     D_MODEL = 512
     N_LAYERS = 6
     PATCH_SIZE = 4
-    print(f"Loading model from {model_path} on {DEVICE}...")
-    # Load Model
-    model = AGIFORMER(
-        d_model=D_MODEL,
-        n_layers=N_LAYERS,
-        patch_size=PATCH_SIZE,
-        dropout=0.1 # Dropout doesn't matter for eval but init might expect it
-    ).to(DEVICE)
     if not os.path.exists(model_path):
-        print(f"Warning: Model file {model_path} not found.")
-        if model_path == "best_model.pth" and os.path.exists("last_model.pth"):
-            print("Falling back to 'last_model.pth'...")
-            model_path = "last_model.pth"
-        else:
-            print("Error: No model file found.")
-            return
-    if torch.cuda.is_available():
-        model.load_state_dict(torch.load(model_path))
-    else:
-        model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
     model.eval()
-    # Prepare Prompt
-    # Convert string to bytes list
     input_bytes = [ord(c) for c in prompt_text]
-    input_tensor = torch.tensor(input_bytes, dtype=torch.long).unsqueeze(0).to(DEVICE) # (1, L)
-    print(f"Prompt: {prompt_text}")
-    print("-" * 40)
     print(prompt_text, end='', flush=True)
-    # Generation Loop
-    # AGIFORMER currently predicts patches.
-    # We need to feed the sequence, get the last patch prediction, append, and repeat.
-    # Since our LocalHead is autoregressive, we need to be careful.
     with torch.no_grad():
-        generated = input_bytes[:]
         for _ in range(max_new_tokens // PATCH_SIZE):
-            # Prepare current context
-            # Ensure length is divisible by PATCH_SIZE for encoder convenience
-            # (Encoder handles padding/cutting via logic, but let's keep it simple)
-            curr_tensor = torch.tensor(generated, dtype=torch.long).unsqueeze(0).to(DEVICE)
-            # Adjust length to match patch boundaries for input
-            L = curr_tensor.size(1)
-            pad_len = (PATCH_SIZE - (L % PATCH_SIZE)) % PATCH_SIZE
-            if pad_len > 0:
-                # Pad with 0 just for encoding alignment if needed,
-                # but our model logic usually truncates.
-                # Let's just rely on what we have.
-                pass
-            # Forward
-            # We don't pass target_bytes, triggering Inference Mode in LocalHead
-            # Logits: (1, N_Patches, Patch_Size, 256)
-            # Note: In inference mode, LocalHead returns byte INDICES, not Logits!
-            # Wait, check agiformer.py logic:
-            # If target_bytes is None -> returns bytes directly.
-            pred_bytes = model(curr_tensor) # (1, N_Patches, Patch_Size)
-            # Get the LAST patch
-            last_patch = pred_bytes[0, -1, :].cpu().tolist()
-            # Append to generation
             generated.extend(last_patch)
-            # Print continuously
-            text_chunk = "".join([chr(b) if 32 <= b <= 126 else "?" for b in last_patch])
-            print(text_chunk, end='', flush=True)
-    print("\n" + "-" * 40)
 if __name__ == "__main__":
-    generate_text("best_model.pth", "The history of artificial intelligence ")

 import torch
 from src.models.agiformer import AGIFORMER
 import os
+import sys
+def generate_text(model_path, prompt_text, max_new_tokens=200, temperature=0.8):
     DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+    # Config
     D_MODEL = 512
     N_LAYERS = 6
     PATCH_SIZE = 4
+    print(f"Loading {model_path} (Temp={temperature})...")
+    model = AGIFORMER(d_model=D_MODEL, n_layers=N_LAYERS, patch_size=PATCH_SIZE).to(DEVICE)
     if not os.path.exists(model_path):
+        print("Model not found.")
+        return
+    state_dict = torch.load(model_path, map_location=DEVICE)
+    model.load_state_dict(state_dict)
     model.eval()
     input_bytes = [ord(c) for c in prompt_text]
+    pad_len = (PATCH_SIZE - (len(input_bytes) % PATCH_SIZE)) % PATCH_SIZE
+    if pad_len > 0:
+        input_bytes.extend([32] * pad_len)
+    print(f"Prompt: '{prompt_text}'")
+    print("-" * 50)
     print(prompt_text, end='', flush=True)
+    generated = input_bytes[:]
     with torch.no_grad():
         for _ in range(max_new_tokens // PATCH_SIZE):
+            context = generated[-1024:] # Keep context manageable
+            curr_tensor = torch.tensor(context, dtype=torch.long).unsqueeze(0).to(DEVICE)
+            # Pass Temperature
+            pred_patches = model(curr_tensor, temperature=temperature)
+            last_patch = pred_patches[0, -1, :].cpu().tolist()
             generated.extend(last_patch)
+            decoded_str = ""
+            for b in last_patch:
+                if 32 <= b <= 126 or b == 10 or b == 9:
+                    decoded_str += chr(b)
+                else:
+                    # Simple representation for non-printables
+                    pass
+            print(decoded_str, end='', flush=True)
+    print("\n" + "-" * 50)
 if __name__ == "__main__":
+    # Test with a generic English prompt to see if it generalizes beyond XML
+    generate_text("best_model.pth", "The history of ", temperature=0.7)