ToyUniversalInterpreter

Sleeping

App Files Files Community

Phoenix21 commited on Jan 5

Commit

69fe038

verified ·

1 Parent(s): a9a2740

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -34

app.py CHANGED Viewed

@@ -9,11 +9,6 @@ import collections
 import os
 import google.generativeai as genai
-# Hugging Face Secrets access
-api_key = os.environ.get("GEMINI_API_KEY")
-if api_key:
-    genai.configure(api_key=api_key)
 # 1. Models & Datasets Configs
 MODELS = ["gpt2", "distilgpt2", "qwen/Qwen2.5-0.5B", "TinyLlama/TinyLlama-1.1B-Chat-v1.0"]
 DATASET_CONFIGS = {
@@ -22,7 +17,15 @@ DATASET_CONFIGS = {
     "AG News": ("ag_news", None)
 }
-def analyze_world_model(model_name, dataset_key, num_samples=25):
     device = "cuda" if torch.cuda.is_available() else "cpu"
     dataset_name, config_name = DATASET_CONFIGS[dataset_key]
@@ -46,7 +49,6 @@ def analyze_world_model(model_name, dataset_key, num_samples=25):
         inputs = tokenizer(text, return_tensors="pt").to(device)
         with torch.no_grad():
             outputs = model(**inputs, output_hidden_states=True)
-            # We take the middle-to-late layer where semantic 'World Models' reside
             state = outputs.hidden_states[-2][0, -1, :].cpu().numpy()
             all_hidden_states.append(state)
             input_snippets.append(text)
@@ -57,53 +59,40 @@ def analyze_world_model(model_name, dataset_key, num_samples=25):
     state_assignments = kmeans.labels_
     # STEP C: Iterative Newtonian Interpretation
     cluster_texts = collections.defaultdict(list)
     for idx, cluster_id in enumerate(state_assignments):
         cluster_texts[cluster_id].append(input_snippets[idx])
     # Initialize Gemini model
-    gemini_model = genai.GenerativeModel('gemini-2.5-flash')
-    # We start with a clean header
     state_info = "## 🧠 Newtonian State Interpretation\n"
-    state_info += "Each state represents a discovered *Equivalence Class* where the model treats different data as functionally identical for its internal world model.\n\n"
-    # LOOP: Call Gemini for EACH state individually to ensure equal depth
     for cluster_id in range(n_clusters):
         snippets = cluster_texts[cluster_id]
-        # Provide a richer payload for better structural laws
         context_payload = "\n".join([f"- {s}" for s in snippets[:8]])
-        # IMPROVED PROMPT: Forces individual focus on ONE state at a time
         prompt = f"""
         Act as a Mechanistic Interpretability Researcher. You are reverse-engineering Cluster S{cluster_id}
-        from the '{dataset_key}' dataset.
-        The model has grouped these snippets into an 'Equivalence Class'—an internal map where it
-        applies the same logical laws to diverse data.
-        ### RAW SNIPPETS FOR S{cluster_id}:
         {context_payload}
-        ### YOUR RESEARCH TASK:
-        Analyze this cluster with high-fidelity Newtonian depth. Focus ONLY on S{cluster_id}.
-        ### REQUIRED OUTPUT FORMAT (Strictly Follow):
         **State S{cluster_id} [Structural State Label]**
-        - **Internal World Model**: Explain the CORE 'Law' or 'Invariant' here. What logical map has the model activated?
-          Describe how this state interconnects lore, timelines, or mechanics into a single 'Coherent World State'.
-        - **Dataset Sensor**: List the specific 'Triggers' (Proper Nouns, Terminology, Syntax) that push the model here.
-        - **Predictive Function**: Explain how being in this state constrains the model's future tokens.
-          What next-tokens are 'Biased' or 'Anticipated'?
         """
         try:
-            # Iterative generation ensures Gemini doesn't 'lazy-load' the middle states
             response = gemini_model.generate_content(prompt, generation_config={"temperature": 0.2})
             state_info += response.text.strip() + "\n\n---\n\n"
         except Exception as e:
-            state_info += f"**State S{cluster_id} [API Error]**: Analysis failed for this state. (Error: {str(e)})\n\n---\n\n"
     # Step D: DFA Reconstruction
     G = nx.DiGraph()
     for i in range(len(state_assignments) - 1):
@@ -116,15 +105,23 @@ def analyze_world_model(model_name, dataset_key, num_samples=25):
     plt.savefig("dfa_output.png", transparent=True)
     plt.close()
-    analysis_brief = f"Model '{model_name}' identified {n_clusters} distinct equivalence classes in the '{dataset_key}' dataset."
     return "dfa_output.png", analysis_brief, state_info
-# 2. Gradio UI with Elaboration
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🌐 The Universal Newtonian Probe")
     gr.Markdown("Extracting the hidden Deterministic Finite Automaton (DFA) from any model and dataset.")
     with gr.Row():
         m_drop = gr.Dropdown(choices=MODELS, label="Select Model", value="gpt2")
         d_drop = gr.Dropdown(choices=list(DATASET_CONFIGS.keys()), label="Select Dataset", value="wikitext (v2-raw)")
@@ -135,8 +132,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         out_img = gr.Image(label="Extracted DFA (World Map)")
         with gr.Column():
             out_txt = gr.Textbox(label="Analysis Status")
-            out_elaboration = gr.Markdown() # Markdown for better readability of interpretation
-    btn.click(analyze_world_model, inputs=[m_drop, d_drop], outputs=[out_img, out_txt, out_elaboration])
 demo.launch()

 import os
 import google.generativeai as genai
 # 1. Models & Datasets Configs
 MODELS = ["gpt2", "distilgpt2", "qwen/Qwen2.5-0.5B", "TinyLlama/TinyLlama-1.1B-Chat-v1.0"]
 DATASET_CONFIGS = {
     "AG News": ("ag_news", None)
 }
+# Added api_key parameter to the function
+def analyze_world_model(api_key, model_name, dataset_key, num_samples=25):
+    # Validate API Key
+    if not api_key or len(api_key) < 10:
+        return None, "Error: Please provide a valid Gemini API Key.", ""
+    # Configure Gemini with the user-provided key
+    genai.configure(api_key=api_key)
     device = "cuda" if torch.cuda.is_available() else "cpu"
     dataset_name, config_name = DATASET_CONFIGS[dataset_key]
         inputs = tokenizer(text, return_tensors="pt").to(device)
         with torch.no_grad():
             outputs = model(**inputs, output_hidden_states=True)
             state = outputs.hidden_states[-2][0, -1, :].cpu().numpy()
             all_hidden_states.append(state)
             input_snippets.append(text)
     state_assignments = kmeans.labels_
     # STEP C: Iterative Newtonian Interpretation
     cluster_texts = collections.defaultdict(list)
     for idx, cluster_id in enumerate(state_assignments):
         cluster_texts[cluster_id].append(input_snippets[idx])
     # Initialize Gemini model
+    gemini_model = genai.GenerativeModel('gemini-1.5-flash') # Updated to a widely available version
     state_info = "## 🧠 Newtonian State Interpretation\n"
+    state_info += "Each state represents a discovered *Equivalence Class*.\n\n"
     for cluster_id in range(n_clusters):
         snippets = cluster_texts[cluster_id]
         context_payload = "\n".join([f"- {s}" for s in snippets[:8]])
         prompt = f"""
         Act as a Mechanistic Interpretability Researcher. You are reverse-engineering Cluster S{cluster_id}
+        from the '{dataset_key}' dataset. Analyze this cluster with high-fidelity Newtonian depth.
+        ### RAW SNIPPETS:
         {context_payload}
+        ### REQUIRED OUTPUT FORMAT:
         **State S{cluster_id} [Structural State Label]**
+        - **Internal World Model**: CORE 'Law' or 'Invariant'.
+        - **Dataset Sensor**: Triggers (Nouns, Syntax).
+        - **Predictive Function**: Biased future tokens.
         """
         try:
             response = gemini_model.generate_content(prompt, generation_config={"temperature": 0.2})
             state_info += response.text.strip() + "\n\n---\n\n"
         except Exception as e:
+            state_info += f"**State S{cluster_id} [API Error]**: {str(e)}\n\n---\n\n"
     # Step D: DFA Reconstruction
     G = nx.DiGraph()
     for i in range(len(state_assignments) - 1):
     plt.savefig("dfa_output.png", transparent=True)
     plt.close()
+    analysis_brief = f"Model '{model_name}' identified {n_clusters} distinct equivalence classes."
     return "dfa_output.png", analysis_brief, state_info
+# 2. Gradio UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🌐 The Universal Newtonian Probe")
     gr.Markdown("Extracting the hidden Deterministic Finite Automaton (DFA) from any model and dataset.")
+    with gr.Row():
+        # Added API Key Input
+        api_key_input = gr.Textbox(
+            label="Gemini API Key",
+            placeholder="paste your API key here...",
+            type="password"
+        )
     with gr.Row():
         m_drop = gr.Dropdown(choices=MODELS, label="Select Model", value="gpt2")
         d_drop = gr.Dropdown(choices=list(DATASET_CONFIGS.keys()), label="Select Dataset", value="wikitext (v2-raw)")
         out_img = gr.Image(label="Extracted DFA (World Map)")
         with gr.Column():
             out_txt = gr.Textbox(label="Analysis Status")
+            out_elaboration = gr.Markdown()
+    # Updated inputs to include api_key_input
+    btn.click(
+        analyze_world_model,
+        inputs=[api_key_input, m_drop, d_drop],
+        outputs=[out_img, out_txt, out_elaboration]
+    )
 demo.launch()