LatentDFAReconstructorLOGGING

Sleeping

App Files Files Community

Phoenix21 commited on Jan 5

Commit

68d7012

verified ·

1 Parent(s): 1c6672e

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -25

app.py CHANGED Viewed

@@ -2,12 +2,23 @@ import torch
 import gradio as gr
 import networkx as nx
 import matplotlib.pyplot as plt
 from transformers import GPT2Model, GPT2Tokenizer
 from sklearn.cluster import KMeans
-# 1. Load a real small model
 device = "cuda" if torch.cuda.is_available() else "cpu"
-model_name = "gpt2" # 124M parameters
 tokenizer = GPT2Tokenizer.from_pretrained(model_name)
 model = GPT2Model.from_pretrained(model_name).to(device)
@@ -15,55 +26,87 @@ def get_hidden_state(sequence_str):
     inputs = tokenizer(sequence_str, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = model(**inputs, output_hidden_states=True)
-    # Use the last hidden state of the last token
     return outputs.hidden_states[-1][0, -1, :].cpu().numpy()
 def analyze_dfa(input_text):
-    """
-    Simulates a 'State Probe'.
-    Input: 'Right, Up, Left'
-    Logic: Generates a graph showing how the model's internal representation
-    changes with each move.
-    """
     moves = [m.strip() for m in input_text.split(",")]
     history = ""
     states_vectors = []
-    # Track the "path" through the model's internal space
-    for move in moves:
         history += f" Move {move}."
         vec = get_hidden_state(history)
         states_vectors.append(vec)
-    # Clustering: Vafa's Compression metric
-    # We cluster activations to see which moves the model thinks are 'equivalent'
     num_clusters = min(len(moves), 4)
     kmeans = KMeans(n_clusters=num_clusters, n_init=10).fit(states_vectors)
     labels = kmeans.labels_
-    # Build the DFA Graph
     G = nx.DiGraph()
     for i in range(len(moves)-1):
         u, v = f"S{labels[i]}", f"S{labels[i+1]}"
         G.add_edge(u, v, label=moves[i+1])
-    # Draw the DFA
     plt.figure(figsize=(6, 4))
     pos = nx.spring_layout(G)
     nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=2000)
     edge_labels = nx.get_edge_attributes(G, 'label')
     nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
-    plt.savefig("dfa_plot.png")
-    return "dfa_plot.png", f"Found {num_clusters} distinct internal states."
-# 3. Gradio Interface
-demo = gr.Interface(
-    fn=analyze_dfa,
-    inputs=gr.Textbox(placeholder="Enter moves separated by commas, e.g.: Right, Up, Left, Down"),
-    outputs=[gr.Image(label="Extracted Model DFA"), gr.Text(label="Analysis")],
-    title="World Model DFA Extractor",
-    description="This tool probes GPT-2's internal activations to see if it treats different move sequences as the same 'State'."
-)
 demo.launch()

 import gradio as gr
 import networkx as nx
 import matplotlib.pyplot as plt
+import logging
+import io
 from transformers import GPT2Model, GPT2Tokenizer
 from sklearn.cluster import KMeans
+import lightning as L  # Using Lightning for structural logging
+# 1. Setup Logging Buffer
+log_capture = io.StringIO()
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("DFA_Probe")
+handler = logging.StreamHandler(log_capture)
+handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
+logger.addHandler(handler)
+# 2. Model & Tokenizer Initialization
 device = "cuda" if torch.cuda.is_available() else "cpu"
+model_name = "gpt2"
 tokenizer = GPT2Tokenizer.from_pretrained(model_name)
 model = GPT2Model.from_pretrained(model_name).to(device)
     inputs = tokenizer(sequence_str, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = model(**inputs, output_hidden_states=True)
     return outputs.hidden_states[-1][0, -1, :].cpu().numpy()
 def analyze_dfa(input_text):
+    # Clear logs for a fresh run
+    log_capture.truncate(0)
+    log_capture.seek(0)
+    logger.info(f"🚀 Starting analysis for input: '{input_text}'")
     moves = [m.strip() for m in input_text.split(",")]
     history = ""
     states_vectors = []
+    # Probing loop
+    for i, move in enumerate(moves):
         history += f" Move {move}."
+        logger.info(f"Processing Step {i+1}: Extracting activations for history '{history}'")
         vec = get_hidden_state(history)
         states_vectors.append(vec)
+    # Clustering (The World Model logic)
+    logger.info(f"🧠 Running KMeans clustering to find equivalent latent states...")
     num_clusters = min(len(moves), 4)
     kmeans = KMeans(n_clusters=num_clusters, n_init=10).fit(states_vectors)
     labels = kmeans.labels_
+    logger.info(f"📊 State mapping completed: {labels}")
+    # Build and Draw DFA
     G = nx.DiGraph()
     for i in range(len(moves)-1):
         u, v = f"S{labels[i]}", f"S{labels[i+1]}"
         G.add_edge(u, v, label=moves[i+1])
     plt.figure(figsize=(6, 4))
     pos = nx.spring_layout(G)
     nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=2000)
     edge_labels = nx.get_edge_attributes(G, 'label')
     nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
+    plot_path = "dfa_plot.png"
+    plt.savefig(plot_path)
+    plt.close()
+    logger.info("✅ Analysis finished. DFA plot generated.")
+    return plot_path, f"Found {num_clusters} distinct internal states.", log_capture.getvalue()
+# 3. Custom Gradio UI with Log View
+with gr.Blocks(title="World Model DFA Extractor") as demo:
+    gr.Markdown("# World Model DFA Extractor")
+    gr.Markdown("Probing GPT-2 activations to visualize internal state logic.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_box = gr.Textbox(
+                label="Input Moves",
+                placeholder="Right, Left, Right, Left",
+                lines=2
+            )
+            submit_btn = gr.Button("Submit", variant="primary")
+            clear_btn = gr.Button("Clear")
+        with gr.Column(scale=2):
+            output_img = gr.Image(label="Extracted Model DFA")
+            analysis_text = gr.Textbox(label="Result Summary")
+    with gr.Row():
+        # Dedicated Log Box
+        log_box = gr.Textbox(
+            label="System & Probe Logs",
+            interactive=False,
+            lines=10,
+            max_lines=15,
+            autoscroll=True
+        )
+    submit_btn.click(
+        fn=analyze_dfa,
+        inputs=input_box,
+        outputs=[output_img, analysis_text, log_box]
+    )
+    clear_btn.click(lambda: [None, "", ""], None, [output_img, analysis_text, log_box])
 demo.launch()