Phoenix21 commited on
Commit
5c3b16a
·
verified ·
1 Parent(s): e62887f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -5
app.py CHANGED
@@ -50,16 +50,47 @@ def analyze_world_model(model_name, dataset_key, num_samples=25):
50
  state_assignments = kmeans.labels_
51
 
52
  # Step C: State Elaboration Logic
53
- state_info = "### 🧠 State Interpretation & Dataset Mapping\n"
54
  cluster_texts = collections.defaultdict(list)
55
  for idx, cluster_id in enumerate(state_assignments):
56
  cluster_texts[cluster_id].append(input_snippets[idx])
57
-
 
 
 
 
58
  for cluster_id in range(n_clusters):
59
  snippets = cluster_texts[cluster_id]
60
- # Identify common tokens/attributes that represent this state
61
- summary = " | ".join([s[:40] + "..." for s in snippets[:2]])
62
- state_info += f"**State S{cluster_id}**: Representing context such as: *{summary}*\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  # Step D: DFA Reconstruction
65
  G = nx.DiGraph()
 
50
  state_assignments = kmeans.labels_
51
 
52
  # Step C: State Elaboration Logic
 
53
  cluster_texts = collections.defaultdict(list)
54
  for idx, cluster_id in enumerate(state_assignments):
55
  cluster_texts[cluster_id].append(input_snippets[idx])
56
+
57
+ # Initialize Gemini
58
+ gemini_model = genai.GenerativeModel('gemini-1.5-flash')
59
+ state_info = "## 🧠 Newtonian State Interpretation\n"
60
+
61
  for cluster_id in range(n_clusters):
62
  snippets = cluster_texts[cluster_id]
63
+ context_payload = "\n".join([f"- {s}" for s in snippets[:4]])
64
+
65
+ # Proper prompt engineering to decode the 'Equivalence Class'
66
+ prompt = f"""
67
+ Analyze these text snippets from the '{dataset_key}' dataset that fall into the same latent state cluster.
68
+ Identify the CORE structural or semantic theme (e.g., 'Historical Narrative', 'Technical Development', 'Numerical Lists').
69
+
70
+ Text Snippets:
71
+ {context_payload}
72
+
73
+ Format your response exactly as:
74
+ **State S{cluster_id} [Label]**: [One sentence explanation of the shared logic/context].
75
+ """
76
+
77
+ try:
78
+ response = gemini_model.generate_content(prompt)
79
+ state_info += response.text.strip() + "\n\n"
80
+ except Exception:
81
+ # Fallback if API fails
82
+ summary = " | ".join([s[:40] + "..." for s in snippets[:2]])
83
+ state_info += f"**State S{cluster_id}**: Context: *{summary}*\n\n"
84
+ # state_info = "### 🧠 State Interpretation & Dataset Mapping\n"
85
+ # cluster_texts = collections.defaultdict(list)
86
+ # for idx, cluster_id in enumerate(state_assignments):
87
+ # cluster_texts[cluster_id].append(input_snippets[idx])
88
+
89
+ # for cluster_id in range(n_clusters):
90
+ # snippets = cluster_texts[cluster_id]
91
+ # # Identify common tokens/attributes that represent this state
92
+ # summary = " | ".join([s[:40] + "..." for s in snippets[:2]])
93
+ # state_info += f"**State S{cluster_id}**: Representing context such as: *{summary}*\n\n"
94
 
95
  # Step D: DFA Reconstruction
96
  G = nx.DiGraph()