ToyUniversalInterpreter

Sleeping

App Files Files Community

Phoenix21 commited on 28 days ago

Commit

cddf7c8

verified ·

1 Parent(s): d13742d

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -51

app.py CHANGED Viewed

@@ -55,79 +55,55 @@ def analyze_world_model(model_name, dataset_key, num_samples=25):
     n_clusters = 5
     kmeans = KMeans(n_clusters=n_clusters, n_init=10).fit(all_hidden_states)
     state_assignments = kmeans.labels_
-    # Step C: State Elaboration Logic
     cluster_texts = collections.defaultdict(list)
     for idx, cluster_id in enumerate(state_assignments):
         cluster_texts[cluster_id].append(input_snippets[idx])
-    # Initialize Gemini
-    gemini_model = genai.GenerativeModel('gemini-2.5-flash')
     state_info = "## 🧠 Newtonian State Interpretation\n"
     for cluster_id in range(n_clusters):
         snippets = cluster_texts[cluster_id]
-        context_payload = "\n".join([f"- {s}" for s in snippets[:4]])
-        # Proper prompt engineering to decode the 'Equivalence Class'
-        # prompt = f"""
-        # Analyze these text snippets from the '{dataset_key}' dataset that fall into the same latent state cluster.
-        # Identify the CORE structural or semantic theme (e.g., 'Historical Narrative', 'Technical Development', 'Numerical Lists').
-        # Text Snippets:
-        # {context_payload}
-        # Format your response exactly as:
-        # **State S{cluster_id} [Label]**: [One sentence explanation of the shared logic/context].
-        # """
         prompt = f"""
-        Act as a Mechanistic Interpretability Researcher. You are decoding the latent 'Newtonian' world model
-        within a Transformer for the '{dataset_key}' dataset.
-        You are analyzing Cluster S{cluster_id}. This cluster represents a specific 'Equivalence Class' where
-        the model treats different sequences as functionally identical.
-        ### RAW DATASET SNIPPETS FOR CLUSTER S{cluster_id}:
         {context_payload}
-        ### YOUR MANDATORY TASK:
-        Provide a comprehensive, high-fidelity analysis for THIS CLUSTER ALONE. You must deliver the same
-        level of depth as seen in previous successful state interpretations.
-        ### REQUIRED OUTPUT FORMAT:
         **State S{cluster_id} [Structural State Label]**
         - **Internal World Model**: Explain the CORE 'Law' or 'Invariant' here. What logical map has the model activated?
           Describe how this state interconnects lore, timelines, or mechanics into a single 'Coherent World State'.
-        - **Dataset Sensor**: List the specific 'Triggers' that push the model into this state.
-          Identify: (1) Proper Nouns/Identifiers, (2) Domain-Specific Terminology, (3) Syntax Patterns (e.g., lists, headers).
-        - **Predictive Function**: Explain how being in this state constrains the model's future.
-          What next-tokens are now 'Biased' or 'Anticipated'? How does this state filter out irrelevant topics?
-        ---
-        (Ensure your response is dense, professional, and strictly follows the bolded sections above.)
         """
         try:
-            response = gemini_model.generate_content(prompt)
-            state_info += response.text.strip() + "\n\n"
-        except Exception:
-            # Fallback if API fails
-            summary = " | ".join([s[:40] + "..." for s in snippets[:2]])
-            state_info += f"**State S{cluster_id}**: Context: *{summary}*\n\n"
-    # state_info = "### 🧠 State Interpretation & Dataset Mapping\n"
-    # cluster_texts = collections.defaultdict(list)
-    # for idx, cluster_id in enumerate(state_assignments):
-    #     cluster_texts[cluster_id].append(input_snippets[idx])
-    # for cluster_id in range(n_clusters):
-    #     snippets = cluster_texts[cluster_id]
-    #     # Identify common tokens/attributes that represent this state
-    #     summary = " | ".join([s[:40] + "..." for s in snippets[:2]])
-    #     state_info += f"**State S{cluster_id}**: Representing context such as: *{summary}*\n\n"
     # Step D: DFA Reconstruction
     G = nx.DiGraph()
     for i in range(len(state_assignments) - 1):

     n_clusters = 5
     kmeans = KMeans(n_clusters=n_clusters, n_init=10).fit(all_hidden_states)
     state_assignments = kmeans.labels_
+    # STEP C: Iterative Newtonian Interpretation
     cluster_texts = collections.defaultdict(list)
     for idx, cluster_id in enumerate(state_assignments):
         cluster_texts[cluster_id].append(input_snippets[idx])
+    # Initialize Gemini model
+    gemini_model = genai.GenerativeModel('gemini-1.5-flash')
+    # We start with a clean header
     state_info = "## 🧠 Newtonian State Interpretation\n"
+    state_info += "Each state represents a discovered *Equivalence Class* where the model treats different data as functionally identical for its internal world model.\n\n"
+    # LOOP: Call Gemini for EACH state individually to ensure equal depth
     for cluster_id in range(n_clusters):
         snippets = cluster_texts[cluster_id]
+        # Provide a richer payload for better structural laws
+        context_payload = "\n".join([f"- {s}" for s in snippets[:8]])
+        # IMPROVED PROMPT: Forces individual focus on ONE state at a time
         prompt = f"""
+        Act as a Mechanistic Interpretability Researcher. You are reverse-engineering Cluster S{cluster_id}
+        from the '{dataset_key}' dataset.
+        The model has grouped these snippets into an 'Equivalence Class'—an internal map where it
+        applies the same logical laws to diverse data.
+        ### RAW SNIPPETS FOR S{cluster_id}:
         {context_payload}
+        ### YOUR RESEARCH TASK:
+        Analyze this cluster with high-fidelity Newtonian depth. Focus ONLY on S{cluster_id}.
+        ### REQUIRED OUTPUT FORMAT (Strictly Follow):
         **State S{cluster_id} [Structural State Label]**
         - **Internal World Model**: Explain the CORE 'Law' or 'Invariant' here. What logical map has the model activated?
           Describe how this state interconnects lore, timelines, or mechanics into a single 'Coherent World State'.
+        - **Dataset Sensor**: List the specific 'Triggers' (Proper Nouns, Terminology, Syntax) that push the model here.
+        - **Predictive Function**: Explain how being in this state constrains the model's future tokens.
+          What next-tokens are 'Biased' or 'Anticipated'?
         """
         try:
+            # Iterative generation ensures Gemini doesn't 'lazy-load' the middle states
+            response = gemini_model.generate_content(prompt, generation_config={"temperature": 0.2})
+            state_info += response.text.strip() + "\n\n---\n\n"
+        except Exception as e:
+            state_info += f"**State S{cluster_id} [API Error]**: Analysis failed for this state. (Error: {str(e)})\n\n---\n\n"
     # Step D: DFA Reconstruction
     G = nx.DiGraph()
     for i in range(len(state_assignments) - 1):