Phoenix21 commited on
Commit
3a924ef
·
verified ·
1 Parent(s): 1c6672e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -53
app.py CHANGED
@@ -1,69 +1,68 @@
1
  import torch
2
  import gradio as gr
 
 
 
3
  import networkx as nx
4
  import matplotlib.pyplot as plt
5
- from transformers import GPT2Model, GPT2Tokenizer
6
- from sklearn.cluster import KMeans
7
 
8
- # 1. Load a real small model
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- model_name = "gpt2" # 124M parameters
11
- tokenizer = GPT2Tokenizer.from_pretrained(model_name)
12
- model = GPT2Model.from_pretrained(model_name).to(device)
13
 
14
- def get_hidden_state(sequence_str):
15
- inputs = tokenizer(sequence_str, return_tensors="pt").to(device)
16
- with torch.no_grad():
17
- outputs = model(**inputs, output_hidden_states=True)
18
- # Use the last hidden state of the last token
19
- return outputs.hidden_states[-1][0, -1, :].cpu().numpy()
20
-
21
- def analyze_dfa(input_text):
22
- """
23
- Simulates a 'State Probe'.
24
- Input: 'Right, Up, Left'
25
- Logic: Generates a graph showing how the model's internal representation
26
- changes with each move.
27
- """
28
- moves = [m.strip() for m in input_text.split(",")]
29
- history = ""
30
- states_vectors = []
31
 
32
- # Track the "path" through the model's internal space
33
- for move in moves:
34
- history += f" Move {move}."
35
- vec = get_hidden_state(history)
36
- states_vectors.append(vec)
37
 
38
- # Clustering: Vafa's Compression metric
39
- # We cluster activations to see which moves the model thinks are 'equivalent'
40
- num_clusters = min(len(moves), 4)
41
- kmeans = KMeans(n_clusters=num_clusters, n_init=10).fit(states_vectors)
42
- labels = kmeans.labels_
43
 
44
- # Build the DFA Graph
45
- G = nx.DiGraph()
46
- for i in range(len(moves)-1):
47
- u, v = f"S{labels[i]}", f"S{labels[i+1]}"
48
- G.add_edge(u, v, label=moves[i+1])
 
 
 
 
 
 
 
 
 
 
 
49
 
 
 
 
 
 
 
50
  # Draw the DFA
51
- plt.figure(figsize=(6, 4))
52
  pos = nx.spring_layout(G)
53
- nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=2000)
54
- edge_labels = nx.get_edge_attributes(G, 'label')
55
- nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
56
 
57
- plt.savefig("dfa_plot.png")
58
- return "dfa_plot.png", f"Found {num_clusters} distinct internal states."
59
 
60
- # 3. Gradio Interface
61
- demo = gr.Interface(
62
- fn=analyze_dfa,
63
- inputs=gr.Textbox(placeholder="Enter moves separated by commas, e.g.: Right, Up, Left, Down"),
64
- outputs=[gr.Image(label="Extracted Model DFA"), gr.Text(label="Analysis")],
65
- title="World Model DFA Extractor",
66
- description="This tool probes GPT-2's internal activations to see if it treats different move sequences as the same 'State'."
67
- )
 
 
 
68
 
69
  demo.launch()
 
1
  import torch
2
  import gradio as gr
3
+ from transformers import AutoModel, AutoTokenizer
4
+ from datasets import load_dataset
5
+ from sklearn.cluster import KMeans
6
  import networkx as nx
7
  import matplotlib.pyplot as plt
 
 
8
 
9
+ # 1. Configuration for Models & Datasets
10
+ MODELS = ["gpt2", "distilgpt2", "qwen/Qwen2.5-0.5B", "TinyLlama/TinyLlama-1.1B-Chat-v1.0"]
11
+ DATASETS = ["wikitext", "tinystories", "ag_news"]
 
 
12
 
13
+ def analyze_world_model(model_name, dataset_name, num_samples=20):
14
+ # Load Model & Tokenizer
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
17
+ model = AutoModel.from_pretrained(model_name).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # Load Dataset
20
+ ds = load_dataset(dataset_name, split='train', streaming=True).take(num_samples)
 
 
 
21
 
22
+ all_hidden_states = []
23
+ labels = []
 
 
 
24
 
25
+ # Step A: The Probe (Keplerian Observation)
26
+ for i, example in enumerate(ds):
27
+ text = example['text'][:100] # Use a snippet
28
+ inputs = tokenizer(text, return_tensors="pt").to(device)
29
+ with torch.no_grad():
30
+ outputs = model(**inputs, output_hidden_states=True)
31
+ # Take the last hidden state of the sequence
32
+ state = outputs.hidden_states[-1][0, -1, :].cpu().numpy()
33
+ all_hidden_states.append(state)
34
+ labels.append(f"Seq_{i}")
35
+
36
+ # Step B: Myhill-Nerode Clustering (Newtonian Recovery)
37
+ # We cluster to find 'Equivalence Classes' (Internal States)
38
+ n_clusters = min(len(all_hidden_states), 5)
39
+ kmeans = KMeans(n_clusters=n_clusters, n_init=10).fit(all_hidden_states)
40
+ state_assignments = kmeans.labels_
41
 
42
+ # Step C: DFA Reconstruction
43
+ G = nx.DiGraph()
44
+ for i in range(len(state_assignments) - 1):
45
+ u, v = f"S{state_assignments[i]}", f"S{state_assignments[i+1]}"
46
+ G.add_edge(u, v, label=f"Next_{i}")
47
+
48
  # Draw the DFA
49
+ plt.figure(figsize=(8, 6))
50
  pos = nx.spring_layout(G)
51
+ nx.draw(G, pos, with_labels=True, node_color='orange', node_size=3000, font_weight='bold')
52
+ plt.savefig("dfa_output.png")
 
53
 
54
+ return "dfa_output.png", f"Model '{model_name}' reduced this dataset into {n_clusters} distinct internal states."
 
55
 
56
+ # 3. Gradio UI
57
+ with gr.Blocks() as demo:
58
+ gr.Markdown("# The Universal Newtonian Probe")
59
+ with gr.Row():
60
+ m_drop = gr.Dropdown(choices=MODELS, label="Select Model", value="gpt2")
61
+ d_drop = gr.Dropdown(choices=DATASETS, label="Select Dataset", value="wikitext")
62
+ btn = gr.Button("Analyze Coherence")
63
+ out_img = gr.Image(label="Extracted DFA")
64
+ out_txt = gr.Textbox(label="Analysis Result")
65
+
66
+ btn.click(analyze_world_model, inputs=[m_drop, d_drop], outputs=[out_img, out_txt])
67
 
68
  demo.launch()