Phoenix21 commited on
Commit
69fe038
·
verified ·
1 Parent(s): a9a2740

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -34
app.py CHANGED
@@ -9,11 +9,6 @@ import collections
9
  import os
10
  import google.generativeai as genai
11
 
12
- # Hugging Face Secrets access
13
- api_key = os.environ.get("GEMINI_API_KEY")
14
- if api_key:
15
- genai.configure(api_key=api_key)
16
-
17
  # 1. Models & Datasets Configs
18
  MODELS = ["gpt2", "distilgpt2", "qwen/Qwen2.5-0.5B", "TinyLlama/TinyLlama-1.1B-Chat-v1.0"]
19
  DATASET_CONFIGS = {
@@ -22,7 +17,15 @@ DATASET_CONFIGS = {
22
  "AG News": ("ag_news", None)
23
  }
24
 
25
- def analyze_world_model(model_name, dataset_key, num_samples=25):
 
 
 
 
 
 
 
 
26
  device = "cuda" if torch.cuda.is_available() else "cpu"
27
  dataset_name, config_name = DATASET_CONFIGS[dataset_key]
28
 
@@ -46,7 +49,6 @@ def analyze_world_model(model_name, dataset_key, num_samples=25):
46
  inputs = tokenizer(text, return_tensors="pt").to(device)
47
  with torch.no_grad():
48
  outputs = model(**inputs, output_hidden_states=True)
49
- # We take the middle-to-late layer where semantic 'World Models' reside
50
  state = outputs.hidden_states[-2][0, -1, :].cpu().numpy()
51
  all_hidden_states.append(state)
52
  input_snippets.append(text)
@@ -57,53 +59,40 @@ def analyze_world_model(model_name, dataset_key, num_samples=25):
57
  state_assignments = kmeans.labels_
58
 
59
  # STEP C: Iterative Newtonian Interpretation
60
-
61
  cluster_texts = collections.defaultdict(list)
62
  for idx, cluster_id in enumerate(state_assignments):
63
  cluster_texts[cluster_id].append(input_snippets[idx])
64
 
65
  # Initialize Gemini model
66
- gemini_model = genai.GenerativeModel('gemini-2.5-flash')
67
 
68
- # We start with a clean header
69
  state_info = "## 🧠 Newtonian State Interpretation\n"
70
- state_info += "Each state represents a discovered *Equivalence Class* where the model treats different data as functionally identical for its internal world model.\n\n"
71
 
72
- # LOOP: Call Gemini for EACH state individually to ensure equal depth
73
  for cluster_id in range(n_clusters):
74
  snippets = cluster_texts[cluster_id]
75
- # Provide a richer payload for better structural laws
76
  context_payload = "\n".join([f"- {s}" for s in snippets[:8]])
77
 
78
- # IMPROVED PROMPT: Forces individual focus on ONE state at a time
79
  prompt = f"""
80
  Act as a Mechanistic Interpretability Researcher. You are reverse-engineering Cluster S{cluster_id}
81
- from the '{dataset_key}' dataset.
82
 
83
- The model has grouped these snippets into an 'Equivalence Class'—an internal map where it
84
- applies the same logical laws to diverse data.
85
-
86
- ### RAW SNIPPETS FOR S{cluster_id}:
87
  {context_payload}
88
 
89
- ### YOUR RESEARCH TASK:
90
- Analyze this cluster with high-fidelity Newtonian depth. Focus ONLY on S{cluster_id}.
91
-
92
- ### REQUIRED OUTPUT FORMAT (Strictly Follow):
93
  **State S{cluster_id} [Structural State Label]**
94
- - **Internal World Model**: Explain the CORE 'Law' or 'Invariant' here. What logical map has the model activated?
95
- Describe how this state interconnects lore, timelines, or mechanics into a single 'Coherent World State'.
96
- - **Dataset Sensor**: List the specific 'Triggers' (Proper Nouns, Terminology, Syntax) that push the model here.
97
- - **Predictive Function**: Explain how being in this state constrains the model's future tokens.
98
- What next-tokens are 'Biased' or 'Anticipated'?
99
  """
100
 
101
  try:
102
- # Iterative generation ensures Gemini doesn't 'lazy-load' the middle states
103
  response = gemini_model.generate_content(prompt, generation_config={"temperature": 0.2})
104
  state_info += response.text.strip() + "\n\n---\n\n"
105
  except Exception as e:
106
- state_info += f"**State S{cluster_id} [API Error]**: Analysis failed for this state. (Error: {str(e)})\n\n---\n\n"
 
107
  # Step D: DFA Reconstruction
108
  G = nx.DiGraph()
109
  for i in range(len(state_assignments) - 1):
@@ -116,15 +105,23 @@ def analyze_world_model(model_name, dataset_key, num_samples=25):
116
  plt.savefig("dfa_output.png", transparent=True)
117
  plt.close()
118
 
119
- analysis_brief = f"Model '{model_name}' identified {n_clusters} distinct equivalence classes in the '{dataset_key}' dataset."
120
 
121
  return "dfa_output.png", analysis_brief, state_info
122
 
123
- # 2. Gradio UI with Elaboration
124
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
125
  gr.Markdown("# 🌐 The Universal Newtonian Probe")
126
  gr.Markdown("Extracting the hidden Deterministic Finite Automaton (DFA) from any model and dataset.")
127
 
 
 
 
 
 
 
 
 
128
  with gr.Row():
129
  m_drop = gr.Dropdown(choices=MODELS, label="Select Model", value="gpt2")
130
  d_drop = gr.Dropdown(choices=list(DATASET_CONFIGS.keys()), label="Select Dataset", value="wikitext (v2-raw)")
@@ -135,8 +132,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
135
  out_img = gr.Image(label="Extracted DFA (World Map)")
136
  with gr.Column():
137
  out_txt = gr.Textbox(label="Analysis Status")
138
- out_elaboration = gr.Markdown() # Markdown for better readability of interpretation
139
 
140
- btn.click(analyze_world_model, inputs=[m_drop, d_drop], outputs=[out_img, out_txt, out_elaboration])
 
 
 
 
 
141
 
142
  demo.launch()
 
9
  import os
10
  import google.generativeai as genai
11
 
 
 
 
 
 
12
  # 1. Models & Datasets Configs
13
  MODELS = ["gpt2", "distilgpt2", "qwen/Qwen2.5-0.5B", "TinyLlama/TinyLlama-1.1B-Chat-v1.0"]
14
  DATASET_CONFIGS = {
 
17
  "AG News": ("ag_news", None)
18
  }
19
 
20
+ # Added api_key parameter to the function
21
+ def analyze_world_model(api_key, model_name, dataset_key, num_samples=25):
22
+ # Validate API Key
23
+ if not api_key or len(api_key) < 10:
24
+ return None, "Error: Please provide a valid Gemini API Key.", ""
25
+
26
+ # Configure Gemini with the user-provided key
27
+ genai.configure(api_key=api_key)
28
+
29
  device = "cuda" if torch.cuda.is_available() else "cpu"
30
  dataset_name, config_name = DATASET_CONFIGS[dataset_key]
31
 
 
49
  inputs = tokenizer(text, return_tensors="pt").to(device)
50
  with torch.no_grad():
51
  outputs = model(**inputs, output_hidden_states=True)
 
52
  state = outputs.hidden_states[-2][0, -1, :].cpu().numpy()
53
  all_hidden_states.append(state)
54
  input_snippets.append(text)
 
59
  state_assignments = kmeans.labels_
60
 
61
  # STEP C: Iterative Newtonian Interpretation
 
62
  cluster_texts = collections.defaultdict(list)
63
  for idx, cluster_id in enumerate(state_assignments):
64
  cluster_texts[cluster_id].append(input_snippets[idx])
65
 
66
  # Initialize Gemini model
67
+ gemini_model = genai.GenerativeModel('gemini-1.5-flash') # Updated to a widely available version
68
 
 
69
  state_info = "## 🧠 Newtonian State Interpretation\n"
70
+ state_info += "Each state represents a discovered *Equivalence Class*.\n\n"
71
 
 
72
  for cluster_id in range(n_clusters):
73
  snippets = cluster_texts[cluster_id]
 
74
  context_payload = "\n".join([f"- {s}" for s in snippets[:8]])
75
 
 
76
  prompt = f"""
77
  Act as a Mechanistic Interpretability Researcher. You are reverse-engineering Cluster S{cluster_id}
78
+ from the '{dataset_key}' dataset. Analyze this cluster with high-fidelity Newtonian depth.
79
 
80
+ ### RAW SNIPPETS:
 
 
 
81
  {context_payload}
82
 
83
+ ### REQUIRED OUTPUT FORMAT:
 
 
 
84
  **State S{cluster_id} [Structural State Label]**
85
+ - **Internal World Model**: CORE 'Law' or 'Invariant'.
86
+ - **Dataset Sensor**: Triggers (Nouns, Syntax).
87
+ - **Predictive Function**: Biased future tokens.
 
 
88
  """
89
 
90
  try:
 
91
  response = gemini_model.generate_content(prompt, generation_config={"temperature": 0.2})
92
  state_info += response.text.strip() + "\n\n---\n\n"
93
  except Exception as e:
94
+ state_info += f"**State S{cluster_id} [API Error]**: {str(e)}\n\n---\n\n"
95
+
96
  # Step D: DFA Reconstruction
97
  G = nx.DiGraph()
98
  for i in range(len(state_assignments) - 1):
 
105
  plt.savefig("dfa_output.png", transparent=True)
106
  plt.close()
107
 
108
+ analysis_brief = f"Model '{model_name}' identified {n_clusters} distinct equivalence classes."
109
 
110
  return "dfa_output.png", analysis_brief, state_info
111
 
112
+ # 2. Gradio UI
113
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
114
  gr.Markdown("# 🌐 The Universal Newtonian Probe")
115
  gr.Markdown("Extracting the hidden Deterministic Finite Automaton (DFA) from any model and dataset.")
116
 
117
+ with gr.Row():
118
+ # Added API Key Input
119
+ api_key_input = gr.Textbox(
120
+ label="Gemini API Key",
121
+ placeholder="paste your API key here...",
122
+ type="password"
123
+ )
124
+
125
  with gr.Row():
126
  m_drop = gr.Dropdown(choices=MODELS, label="Select Model", value="gpt2")
127
  d_drop = gr.Dropdown(choices=list(DATASET_CONFIGS.keys()), label="Select Dataset", value="wikitext (v2-raw)")
 
132
  out_img = gr.Image(label="Extracted DFA (World Map)")
133
  with gr.Column():
134
  out_txt = gr.Textbox(label="Analysis Status")
135
+ out_elaboration = gr.Markdown()
136
 
137
+ # Updated inputs to include api_key_input
138
+ btn.click(
139
+ analyze_world_model,
140
+ inputs=[api_key_input, m_drop, d_drop],
141
+ outputs=[out_img, out_txt, out_elaboration]
142
+ )
143
 
144
  demo.launch()