Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -50,16 +50,47 @@ def analyze_world_model(model_name, dataset_key, num_samples=25):
|
|
| 50 |
state_assignments = kmeans.labels_
|
| 51 |
|
| 52 |
# Step C: State Elaboration Logic
|
| 53 |
-
state_info = "### 🧠 State Interpretation & Dataset Mapping\n"
|
| 54 |
cluster_texts = collections.defaultdict(list)
|
| 55 |
for idx, cluster_id in enumerate(state_assignments):
|
| 56 |
cluster_texts[cluster_id].append(input_snippets[idx])
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
for cluster_id in range(n_clusters):
|
| 59 |
snippets = cluster_texts[cluster_id]
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
# Step D: DFA Reconstruction
|
| 65 |
G = nx.DiGraph()
|
|
|
|
| 50 |
state_assignments = kmeans.labels_
|
| 51 |
|
| 52 |
# Step C: State Elaboration Logic
|
|
|
|
| 53 |
cluster_texts = collections.defaultdict(list)
|
| 54 |
for idx, cluster_id in enumerate(state_assignments):
|
| 55 |
cluster_texts[cluster_id].append(input_snippets[idx])
|
| 56 |
+
|
| 57 |
+
# Initialize Gemini
|
| 58 |
+
gemini_model = genai.GenerativeModel('gemini-1.5-flash')
|
| 59 |
+
state_info = "## 🧠 Newtonian State Interpretation\n"
|
| 60 |
+
|
| 61 |
for cluster_id in range(n_clusters):
|
| 62 |
snippets = cluster_texts[cluster_id]
|
| 63 |
+
context_payload = "\n".join([f"- {s}" for s in snippets[:4]])
|
| 64 |
+
|
| 65 |
+
# Proper prompt engineering to decode the 'Equivalence Class'
|
| 66 |
+
prompt = f"""
|
| 67 |
+
Analyze these text snippets from the '{dataset_key}' dataset that fall into the same latent state cluster.
|
| 68 |
+
Identify the CORE structural or semantic theme (e.g., 'Historical Narrative', 'Technical Development', 'Numerical Lists').
|
| 69 |
+
|
| 70 |
+
Text Snippets:
|
| 71 |
+
{context_payload}
|
| 72 |
+
|
| 73 |
+
Format your response exactly as:
|
| 74 |
+
**State S{cluster_id} [Label]**: [One sentence explanation of the shared logic/context].
|
| 75 |
+
"""
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
response = gemini_model.generate_content(prompt)
|
| 79 |
+
state_info += response.text.strip() + "\n\n"
|
| 80 |
+
except Exception:
|
| 81 |
+
# Fallback if API fails
|
| 82 |
+
summary = " | ".join([s[:40] + "..." for s in snippets[:2]])
|
| 83 |
+
state_info += f"**State S{cluster_id}**: Context: *{summary}*\n\n"
|
| 84 |
+
# state_info = "### 🧠 State Interpretation & Dataset Mapping\n"
|
| 85 |
+
# cluster_texts = collections.defaultdict(list)
|
| 86 |
+
# for idx, cluster_id in enumerate(state_assignments):
|
| 87 |
+
# cluster_texts[cluster_id].append(input_snippets[idx])
|
| 88 |
+
|
| 89 |
+
# for cluster_id in range(n_clusters):
|
| 90 |
+
# snippets = cluster_texts[cluster_id]
|
| 91 |
+
# # Identify common tokens/attributes that represent this state
|
| 92 |
+
# summary = " | ".join([s[:40] + "..." for s in snippets[:2]])
|
| 93 |
+
# state_info += f"**State S{cluster_id}**: Representing context such as: *{summary}*\n\n"
|
| 94 |
|
| 95 |
# Step D: DFA Reconstruction
|
| 96 |
G = nx.DiGraph()
|