LatentDFAReconstructorLOGGING1

Sleeping

App Files Files Community

Phoenix21 commited on Jan 5

Commit

8113b74

verified ·

1 Parent(s): 1c8cef7

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -59

app.py CHANGED Viewed

@@ -4,11 +4,12 @@ import networkx as nx
 import matplotlib.pyplot as plt
 import logging
 import io
 from transformers import GPT2Model, GPT2Tokenizer
 from sklearn.cluster import KMeans
-import lightning as L  # Using Lightning for structural logging
-# 1. Setup Logging Buffer
 log_capture = io.StringIO()
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("DFA_Probe")
@@ -16,7 +17,7 @@ handler = logging.StreamHandler(log_capture)
 handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
 logger.addHandler(handler)
-# 2. Model & Tokenizer Initialization
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_name = "gpt2"
 tokenizer = GPT2Tokenizer.from_pretrained(model_name)
@@ -29,84 +30,61 @@ def get_hidden_state(sequence_str):
     return outputs.hidden_states[-1][0, -1, :].cpu().numpy()
 def analyze_dfa(input_text):
-    # Clear logs for a fresh run
     log_capture.truncate(0)
     log_capture.seek(0)
-    logger.info(f"🚀 Starting analysis for input: '{input_text}'")
     moves = [m.strip() for m in input_text.split(",")]
     history = ""
     states_vectors = []
-    # Probing loop
     for i, move in enumerate(moves):
         history += f" Move {move}."
-        logger.info(f"Processing Step {i+1}: Extracting activations for history '{history}'")
         vec = get_hidden_state(history)
         states_vectors.append(vec)
-    # Clustering (The World Model logic)
-    logger.info(f"🧠 Running KMeans clustering to find equivalent latent states...")
     num_clusters = min(len(moves), 4)
     kmeans = KMeans(n_clusters=num_clusters, n_init=10).fit(states_vectors)
-    labels = kmeans.labels_
-    logger.info(f"📊 State mapping completed: {labels}")
-    # Build and Draw DFA
-    G = nx.DiGraph()
     for i in range(len(moves)-1):
-        u, v = f"S{labels[i]}", f"S{labels[i+1]}"
-        G.add_edge(u, v, label=moves[i+1])
-    plt.figure(figsize=(6, 4))
-    pos = nx.spring_layout(G)
-    nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=2000)
-    edge_labels = nx.get_edge_attributes(G, 'label')
-    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
-    plot_path = "dfa_plot.png"
     plt.savefig(plot_path)
     plt.close()
-    logger.info("✅ Analysis finished. DFA plot generated.")
-    return plot_path, f"Found {num_clusters} distinct internal states.", log_capture.getvalue()
-# 3. Custom Gradio UI with Log View
-with gr.Blocks(title="World Model DFA Extractor") as demo:
-    gr.Markdown("# World Model DFA Extractor")
-    gr.Markdown("Probing GPT-2 activations to visualize internal state logic.")
-    with gr.Row():
-        with gr.Column(scale=1):
-            input_box = gr.Textbox(
-                label="Input Moves",
-                placeholder="Right, Left, Right, Left",
-                lines=2
-            )
-            submit_btn = gr.Button("Submit", variant="primary")
-            clear_btn = gr.Button("Clear")
-        with gr.Column(scale=2):
-            output_img = gr.Image(label="Extracted Model DFA")
-            analysis_text = gr.Textbox(label="Result Summary")
     with gr.Row():
-        # Dedicated Log Box
-        log_box = gr.Textbox(
-            label="System & Probe Logs",
-            interactive=False,
-            lines=10,
-            max_lines=15,
-            autoscroll=True
-        )
-    submit_btn.click(
-        fn=analyze_dfa,
-        inputs=input_box,
-        outputs=[output_img, analysis_text, log_box]
-    )
-    clear_btn.click(lambda: [None, "", ""], None, [output_img, analysis_text, log_box])
 demo.launch()

 import matplotlib.pyplot as plt
 import logging
 import io
+import numpy as np
 from transformers import GPT2Model, GPT2Tokenizer
 from sklearn.cluster import KMeans
+from sklearn.decomposition import PCA
+# Setup Logging
 log_capture = io.StringIO()
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("DFA_Probe")
 handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
 logger.addHandler(handler)
+# Load GPT-2
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_name = "gpt2"
 tokenizer = GPT2Tokenizer.from_pretrained(model_name)
     return outputs.hidden_states[-1][0, -1, :].cpu().numpy()
 def analyze_dfa(input_text):
     log_capture.truncate(0)
     log_capture.seek(0)
     moves = [m.strip() for m in input_text.split(",")]
     history = ""
     states_vectors = []
     for i, move in enumerate(moves):
         history += f" Move {move}."
         vec = get_hidden_state(history)
         states_vectors.append(vec)
+    # --- 1. KMeans Graph (Unsupervised State Map) ---
     num_clusters = min(len(moves), 4)
     kmeans = KMeans(n_clusters=num_clusters, n_init=10).fit(states_vectors)
+    km_labels = kmeans.labels_
+    G_km = nx.DiGraph()
     for i in range(len(moves)-1):
+        G_km.add_edge(f"S{km_labels[i]}", f"S{km_labels[i+1]}", label=moves[i+1])
+    plt.figure(figsize=(12, 5))
+    plt.subplot(1, 2, 1)
+    pos_km = nx.spring_layout(G_km)
+    nx.draw(G_km, pos_km, with_labels=True, node_color='lightblue', node_size=1500)
+    nx.draw_networkx_edge_labels(G_km, pos_km, edge_labels=nx.get_edge_attributes(G_km, 'label'))
+    plt.title("KMeans DFA (State-Based)")
+    # --- 2. Linear Probe / PCA (Geometric Map) ---
+    logger.info("📐 Running Linear Probe (PCA) to find the 'Spatial Axis'...")
+    pca = PCA(n_components=2)
+    coords = pca.fit_transform(states_vectors)
+    plt.subplot(1, 2, 2)
+    plt.scatter(coords[:, 0], coords[:, 1], c=range(len(moves)), cmap='viridis', s=100)
+    for i, move in enumerate(moves):
+        plt.annotate(f"{i}:{move}", (coords[i, 0], coords[i, 1]))
+    plt.plot(coords[:, 0], coords[:, 1], 'r--', alpha=0.3) # Path line
+    plt.title("Linear Probe (Spatial Projection)")
+    plot_path = "comparison_plot.png"
     plt.savefig(plot_path)
     plt.close()
+    return plot_path, f"KMeans Labels: {km_labels}", log_capture.getvalue()
+# Launching with dual display
+with gr.Blocks() as demo:
+    gr.Markdown("# KMeans vs. Linear Probe Analysis")
+    input_box = gr.Textbox(label="Moves (Right, Left...)")
+    submit_btn = gr.Button("Compare")
     with gr.Row():
+        output_img = gr.Image(label="KMeans (Left) vs Linear PCA (Right)")
+        analysis_text = gr.Textbox(label="Mapping Results")
+    log_box = gr.Textbox(label="Probe Logs", lines=5)
+    submit_btn.click(analyze_dfa, input_box, [output_img, analysis_text, log_box])
 demo.launch()