tefoteknik
/

agiformer

Model card Files Files and versions

xet

Community

tefoteknik commited on Nov 22, 2025

Commit

c31993e

verified ·

1 Parent(s): 74e89c5

Update AGIFORMER with Turkish benchmark

Browse files

Files changed (1) hide show

generate.py +28 -12

generate.py CHANGED Viewed

@@ -22,7 +22,9 @@ def generate_text(model_path, prompt_text, max_new_tokens=200, temperature=0.8):
     model.load_state_dict(state_dict)
     model.eval()
-    input_bytes = [ord(c) for c in prompt_text]
     pad_len = (PATCH_SIZE - (len(input_bytes) % PATCH_SIZE)) % PATCH_SIZE
     if pad_len > 0:
         input_bytes.extend([32] * pad_len)
@@ -44,18 +46,32 @@ def generate_text(model_path, prompt_text, max_new_tokens=200, temperature=0.8):
             last_patch = pred_patches[0, -1, :].cpu().tolist()
             generated.extend(last_patch)
-            decoded_str = ""
-            for b in last_patch:
-                if 32 <= b <= 126 or b == 10 or b == 9:
-                    decoded_str += chr(b)
-                else:
-                    # Simple representation for non-printables
-                    pass
-            print(decoded_str, end='', flush=True)
     print("\n" + "-" * 50)
 if __name__ == "__main__":
-    # Test with a generic English prompt to see if it generalizes beyond XML
-    generate_text("best_model.pth", "The history of ", temperature=0.7)

     model.load_state_dict(state_dict)
     model.eval()
+    # Encode prompt to UTF-8 bytes
+    input_bytes = list(prompt_text.encode('utf-8'))
     pad_len = (PATCH_SIZE - (len(input_bytes) % PATCH_SIZE)) % PATCH_SIZE
     if pad_len > 0:
         input_bytes.extend([32] * pad_len)
             last_patch = pred_patches[0, -1, :].cpu().tolist()
             generated.extend(last_patch)
+            # Real-time decoding for display is tricky with multi-byte chars
+            # We'll just collect and decode at the end or try best effort
+            pass
     print("\n" + "-" * 50)
+    try:
+        full_text = bytes(generated).decode('utf-8', errors='replace')
+        # Print only the new part
+        print(full_text[len(prompt_text):])
+    except:
+        print("\n[Decoding Error]")
 if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description='Generate text with AGIFORMER')
+    parser.add_argument('--prompt', type=str, default="The history of ", help='Text prompt to start generation')
+    parser.add_argument('--temp', type=float, default=0.7, help='Sampling temperature')
+    parser.add_argument('--model', type=str, default="best_model.pth", help='Path to model checkpoint')
+    args = parser.parse_args()
+    # Check if user meant to use the Turkish model but it's named differently
+    model_path = args.model
+    if not os.path.exists(model_path) and os.path.exists("best_model_turkish.pth"):
+        print(f"Note: '{model_path}' not found, using 'best_model_turkish.pth' instead.")
+        model_path = "best_model_turkish.pth"
+    generate_text(model_path, args.prompt, temperature=args.temp)