Spaces:

OpenTransformer
/

ar-vs-sat

Sleeping

App Files Files Community

OpenTransformer commited on Jan 20

Commit

e310437

verified ·

1 Parent(s): 032c630

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +118 -0

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import gradio as gr
+import torch
+from transformers import GPT2LMHeadModel, GPT2Tokenizer
+# Load model once
+print("Loading GPT-2...")
+model = GPT2LMHeadModel.from_pretrained('gpt2').eval()
+tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+# Move to GPU if available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = model.to(device)
+print(f"Running on {device}")
+def ar_generate(prompt, n_tokens=50):
+    """Standard AR generation - 1 token at a time"""
+    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
+    generated = []
+    for _ in range(n_tokens):
+        with torch.no_grad():
+            outputs = model(input_ids)
+            next_logits = outputs.logits[:, -1, :]
+            next_token = torch.argmax(next_logits, dim=-1)
+            generated.append(next_token.item())
+            input_ids = torch.cat([input_ids, next_token.unsqueeze(0)], dim=1)
+    return tokenizer.decode(generated)
+def forced_sat_generate(prompt, n_tokens=50, block_size=2):
+    """
+    FORCED SAT: Predict 2 tokens at once from AR model
+    Token 1: from position -1 (current)
+    Token 2: from position -2 (stale context)
+    """
+    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
+    generated = []
+    for _ in range(n_tokens // block_size):
+        with torch.no_grad():
+            outputs = model(input_ids)
+            # Token 1: current position
+            logits1 = outputs.logits[:, -1, :]
+            # Token 2: previous position (stale)
+            logits2 = outputs.logits[:, -2, :] if input_ids.shape[1] > 1 else logits1
+            token1 = torch.argmax(logits1, dim=-1)
+            token2 = torch.argmax(logits2, dim=-1)
+            generated.extend([token1.item(), token2.item()])
+            input_ids = torch.cat([
+                input_ids,
+                token1.unsqueeze(0),
+                token2.unsqueeze(0)
+            ], dim=1)
+    return tokenizer.decode(generated)
+def compare(prompt, n_tokens):
+    n_tokens = int(n_tokens)
+    ar_output = ar_generate(prompt, n_tokens)
+    sat_output = forced_sat_generate(prompt, n_tokens)
+    return ar_output, sat_output
+# Gradio interface
+with gr.Blocks(title="AR vs Forced SAT") as demo:
+    gr.Markdown("""
+    # AR vs Forced SAT Comparison
+    **Can AR models be forced to output 2 tokens at once?**
+    - **AR (Autoregressive):** Standard 1-token-at-a-time generation
+    - **Forced SAT:** Outputs 2 tokens per step using stale context for token 2
+    Forced SAT runs ~2x faster but produces degraded output because AR hidden states
+    don't encode multi-token futures. Joint AR+SAT training is required for quality SAT inference.
+    Model: GPT-2 (124M params)
+    """)
+    with gr.Row():
+        prompt = gr.Textbox(label="Prompt", value="The scientist discovered that", lines=2)
+        n_tokens = gr.Slider(minimum=10, maximum=100, value=40, step=10, label="Tokens to generate")
+    btn = gr.Button("Generate", variant="primary")
+    with gr.Row():
+        ar_output = gr.Textbox(label="AR Output (baseline)", lines=5)
+        sat_output = gr.Textbox(label="Forced SAT v1 (2x speed, degraded)", lines=5)
+    btn.click(compare, inputs=[prompt, n_tokens], outputs=[ar_output, sat_output])
+    gr.Examples(
+        examples=[
+            ["The quick brown fox", 40],
+            ["In the beginning", 40],
+            ["Once upon a time", 40],
+            ["Machine learning is", 40],
+            ["The president announced that", 40],
+        ],
+        inputs=[prompt, n_tokens],
+    )
+    gr.Markdown("""
+    ---
+    **Why Forced SAT fails:** AR hidden states at position N only encode "next token N+1".
+    There's no representation for token N+2. Forcing 2-token output uses stale context,
+    creating alternating good/bad tokens.
+    **Solution:** Train AR+SAT jointly from scratch so representations encode multiple future tokens.
+    See: [AGILLM-3](https://huggingface.co/OpenTransformer/AGILLM-3-large) | [Experiment Code](https://huggingface.co/OpenTransformer/sat-retrofit-experiment)
+    *OpenTransformers Ltd - Scott Bisset*
+    """)
+demo.launch()