ginnyxxxxxxx commited on
Commit
144e51b
Β·
1 Parent(s): b1c5fad
Files changed (1) hide show
  1. app.py +513 -253
app.py CHANGED
@@ -3,13 +3,15 @@ import pandas as pd
3
  import folium
4
  import numpy as np
5
  import os
6
- import json
 
7
 
8
  BASE = os.path.dirname(os.path.abspath(__file__))
9
  STAY_POINTS = os.path.join(BASE, "data", "stay_points_sampled.csv")
10
  POI_PATH = os.path.join(BASE, "data", "poi_sampled.csv")
11
  DEMO_PATH = os.path.join(BASE, "data", "demographics_sampled.csv")
12
- COT_PATH = os.path.join(BASE, "data", "cot_results.json") # <-- your JSON file
 
13
 
14
  SEX_MAP = {1:"Male", 2:"Female", -8:"Unknown", -7:"Prefer not to answer"}
15
  EDU_MAP = {1:"Less than HS", 2:"HS Graduate/GED", 3:"Some College/Associate",
@@ -48,185 +50,444 @@ def parse_act_types(x):
48
  return str(x)
49
 
50
  sp["act_label"] = sp["act_types"].apply(parse_act_types)
51
-
52
- # Load CoT JSON
53
- cot_by_agent = {}
54
- if os.path.exists(COT_PATH):
55
- print("Loading CoT results...")
56
- with open(COT_PATH, "r") as f:
57
- cot_raw = json.load(f)
58
- for result in cot_raw.get("inference_results", []):
59
- cot_by_agent[result["agent_id"]] = result
60
-
61
- # All CSV agents are available; CoT will fall back to mock if no match
62
  sample_agents = sorted(sp["agent_id"].unique().tolist())
63
  print(f"Ready. {len(sample_agents)} agents loaded.")
64
 
65
- MOCK_COT = {
66
- "text_representation": """MOBILITY TRAJECTORY DATA
67
- ===========================
68
- Observation Period: 2024-01-29 to 2024-02-25 (28 days)
69
- Total Stay Points: 82
70
- Unique Locations: 16
71
-
72
- LOCATION PATTERNS
73
- ----------------
74
- 1. residence
75
- Visits: 36 times
76
- Average Duration: 829 minutes
77
-
78
- 2. Clinton Mobile Estates
79
- Visits: 9 times
80
- Average Duration: 137 minutes
81
-
82
- 3. 7-Eleven
83
- Visits: 8 times
84
- Average Duration: 118 minutes
85
-
86
- 4. Euro Caffe
87
- Visits: 5 times
88
- Average Duration: 101 minutes
89
-
90
- 5. Hoa Phap Hoa Dao Trang
91
- Visits: 4 times
92
- Average Duration: 112 minutes
93
-
94
- TEMPORAL PATTERNS
95
- ----------------
96
- Activity by Time of Day:
97
- - morning: 56%
98
- - afternoon: 24%
99
- - evening: 20%
100
-
101
- Weekday vs Weekend:
102
- - weekday: 66%
103
- - weekend: 34%""",
104
-
105
- "weekly_checkin": """WEEKLY CHECK-IN SUMMARY
106
- =======================
107
- Period: 2024-01-29 to 2024-02-25 (28 days)
108
-
109
- --- Monday, January 29 (Weekday) ---
110
- Total activities: 2
111
- - 09:10-10:14 (64 mins): Bear Wire - Work, Services, DropOff
112
- - 10:38-08:54 (1336 mins): residence - Home, Social Visit, DropOff
113
-
114
- --- Tuesday, January 30 (Weekday) ---
115
- Total activities: 2
116
- - 09:12-11:06 (115 mins): Clinton Mobile Estates - Work, Recreation, Exercise
117
- - 11:24-09:36 (1331 mins): residence - Home, Social Visit, DropOff
118
-
119
- --- Wednesday, January 31 (Weekday) ---
120
- Total activities: 4
121
- - 09:52-14:07 (255 mins): Castaway Cove Water Playground - Work, Exercise, DropOff
122
- - 14:21-15:27 (66 mins): residence - Home, Social Visit, DropOff
123
- - 15:44-19:03 (198 mins): Clinton Mobile Estates - Work, Recreation, Exercise
124
- - 19:21-09:06 (825 mins): residence - Home, Social Visit, DropOff
125
-
126
- --- Friday, February 02 (Weekday) ---
127
- Total activities: 3
128
- - 08:34-09:29 (55 mins): Euro Caffe - Work, Dining
129
- - 09:58-11:06 (68 mins): 7-Eleven - Work, Shopping, Dining, DropOff
130
- - 11:25-08:56 (1291 mins): residence - Home, Social Visit, DropOff
131
-
132
- --- Saturday, February 03 (Weekend) ---
133
- Total activities: 5
134
- - 09:13-10:03 (50 mins): Clinton Mobile Estates - Work, Recreation, Exercise
135
- - 10:27-11:10 (43 mins): 7-Eleven - Work, Shopping, Dining, DropOff
136
- - 11:29-18:39 (430 mins): residence - Home, Social Visit, DropOff
137
- - 18:56-21:30 (154 mins): Pepper Shaker Cafe - Work, Dining
138
- - 21:48-09:16 (689 mins): residence - Home, Social Visit, DropOff
139
-
140
- --- Sunday, February 11 (Weekend) ---
141
- Total activities: 5
142
- - 10:05-11:56 (111 mins): Hoa Phap Hoa Dao Trang - Religious
143
- - 12:21-13:36 (75 mins): Pepper Shaker Cafe - Work, Dining
144
- - 14:56-16:58 (122 mins): 7-Eleven - Work, Shopping, Dining, DropOff
145
- - 17:17-13:27 (1209 mins): residence - Home, Social Visit, DropOff
146
-
147
- WEEKLY INSIGHTS
148
- ===============
149
- Most visited location: residence (36 visits)
150
- Time spent: DropOff 31.0% | Home 28.8% | Work 4.1% | Dining 2.3% | Exercise 1.7%
151
- Weekday avg activities: 2.7 | Weekend avg: 3.5""",
152
-
153
- "step1_response": """Based on the provided mobility trajectory data, here are the objective features extracted:
154
 
155
  LOCATION INVENTORY:
156
- - POI categories visited:
157
- - Residence
158
- - Clinton Mobile Estates (Recreation, Exercise)
159
- - 7-Eleven (Shopping, Dining, DropOff)
160
- - Euro Caffe (Dining)
161
- - Hoa Phap Hoa Dao Trang (Religious)
162
- - Castaway Cove Water Playground (Exercise, DropOff)
163
- - Salon 860 West (Shopping, Dining, DropOff)
164
- - Pepper Shaker Cafe (Dining)
165
- - Lucky Pierre Self Service Car Wash (Shopping, Dining, DropOff)
166
-
167
- - Frequency of visits:
168
- - Residence: 36 times
169
- - Clinton Mobile Estates: 9 times
170
- - 7-Eleven: 8 times
171
- - Euro Caffe: 5 times
172
- - Hoa Phap Hoa Dao Trang: 4 times
173
-
174
- - Apparent price level:
175
- - Budget: 7-Eleven, Lucky Pierre Self Service Car Wash
176
- - Mid-range: Euro Caffe, Pepper Shaker Cafe
177
 
178
  TEMPORAL PATTERNS:
179
- - Active hours: 09:00 to 23:00
180
- - Weekly distribution: 66% weekday, 34% weekend
181
- - Regularity: Consistent daily routines
182
- - Duration: Longer stays at residence (829 min avg), shorter at other venues
183
-
184
- SPATIAL CHARACTERISTICS:
185
- - Activity radius: up to ~13 km from home
186
- - Geographic distribution: Urban areas, mix of residential and commercial
187
-
188
- SEQUENCE OBSERVATIONS:
189
- - Common transitions: Home→Clinton Mobile Estates→Home, Home→7-Eleven→Home
190
- - Weekday pattern: morning work/exercise, afternoon errands
191
- - Weekend pattern: morning religious visit, afternoon dining/shopping""",
192
-
193
- "step2_response": """1. ROUTINE & SCHEDULE ANALYSIS:
194
- - Semi-structured lifestyle with consistent morning start times (~09:00)
195
- - Flexible schedule: varies between 2–5 activities per day
196
- - Weekday/weekend contrast suggests work-centric weekdays, more leisure on weekends
197
-
198
- 2. ECONOMIC BEHAVIOR PATTERNS:
199
- - Mix of budget-conscious and mid-range spending
200
- - Frequent 7-Eleven visits suggest convenience-oriented, cost-aware shopping
201
- - No signs of premium or luxury venue preferences
202
- - Activity radius (~13 km) suggests personal vehicle access
203
-
204
- 3. SOCIAL & LIFESTYLE INDICATORS:
205
- - Regular weekend religious attendance (Hoa Phap Hoa Dao Trang) β†’ community ties
206
- - Recreational activities (Castaway Cove, Clinton Mobile Estates) β†’ active lifestyle
207
- - Dining out occasionally (Euro Caffe, Pepper Shaker Cafe) β†’ moderate social life
208
- - High home-time suggests family-oriented or home-centered lifestyle
209
-
210
- 4. URBAN LIFESTYLE CHARACTERISTICS:
211
- - Urban/suburban mix: lives in residential area, travels to nearby commercial zones
212
- - Activity radius and venue diversity suggest working-class to middle-class area
213
- - Community engagement evident through religious and recreational venues
214
-
215
- 5. ROUTINE STABILITY:
216
- - Highly consistent patterns over 4 weeks with minimal deviation
217
- - No signs of major life transitions
218
- - Regular work-like attendance at Clinton Mobile Estates suggests stable employment""",
219
-
220
- "step3_response": """INCOME_PREDICTION: Middle ($35k-$75k)
221
- INCOME_CONFIDENCE: 4
222
- INCOME_REASONING: The individual's mobility patterns suggest a mix of budget-conscious and mid-range spending. Frequent visits to 7-Eleven and self-service venues indicate cost awareness, while occasional mid-range dining (Euro Caffe, Pepper Shaker Cafe) suggests some disposable income. The neighborhood types are commercial/residential mixed, consistent with a middle-class area. No luxury venue visits detected. Activity radius of ~13 km and apparent vehicle access align with middle-income transport patterns.
223
-
224
- RANKED ALTERNATIVES:
225
- 1. Middle ($35k-$75k) β€” Primary prediction
226
- 2. Low ($15k-$35k) β€” Budget venue frequency could suggest lower income
227
- 3. Upper-Middle ($75k-$125k) β€” Unlikely given absence of premium venues"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
  def build_map(agent_sp):
232
  agent_sp = agent_sp.reset_index(drop=True).copy()
@@ -243,25 +504,18 @@ def build_map(agent_sp):
243
 
244
  n = len(agent_sp)
245
  for i, row in agent_sp.iterrows():
246
- # Red gradient: light red (#ffcccc) β†’ deep red (#8b0000)
247
  ratio = i / max(n - 1, 1)
248
- r = 255
249
- g = int(204 * (1 - ratio)) # 204 β†’ 0
250
- b = int(204 * (1 - ratio)) # 204 β†’ 0
251
- # Clamp deep end toward dark red (139, 0, 0)
252
- r = int(255 - ratio * (255 - 139)) # 255 β†’ 139
253
- g = int(204 * (1 - ratio) * (1 - ratio * 0.3)) # fade to 0
254
  b = 0
255
  color = f"#{r:02x}{g:02x}{b:02x}"
256
-
257
  folium.CircleMarker(
258
  location=[row["latitude"], row["longitude"]],
259
  radius=7, color=color, fill=True, fill_color=color, fill_opacity=0.9,
260
  popup=folium.Popup(
261
  f"<b>#{i+1} {row['name']}</b><br>"
262
  f"{row['start_datetime'].strftime('%a %m/%d %H:%M')}<br>"
263
- f"{int(row['duration_min'])} min<br>"
264
- f"{row['act_label']}",
265
  max_width=220
266
  )
267
  ).add_to(m)
@@ -272,7 +526,7 @@ def build_map(agent_sp):
272
 
273
 
274
  def build_demo_text(row):
275
- age = int(row["age"]) if row["age"] > 0 else "Unknown"
276
  return (
277
  f"Age: {age} | "
278
  f"Sex: {SEX_MAP.get(int(row['sex']), row['sex'])} | "
@@ -282,54 +536,79 @@ def build_demo_text(row):
282
  )
283
 
284
 
285
- def parse_step3(text):
286
- """Extract prediction, confidence, reasoning from step3 response text."""
287
- prediction, confidence, reasoning = "", "", ""
288
- for line in text.splitlines():
289
- line = line.strip()
290
- if line.startswith("INCOME_PREDICTION:"):
291
- prediction = line.replace("INCOME_PREDICTION:", "").strip()
292
- elif line.startswith("INCOME_CONFIDENCE:"):
293
- confidence = line.replace("INCOME_CONFIDENCE:", "").strip()
294
- elif line.startswith("INCOME_REASONING:"):
295
- reasoning = line.replace("INCOME_REASONING:", "").strip()
296
- return prediction, confidence, reasoning
297
-
298
 
299
  def on_select(agent_id):
300
  agent_id = int(agent_id)
301
  agent_sp = sp[sp["agent_id"] == agent_id].sort_values("start_datetime")
302
  agent_demo = demo[demo["agent_id"] == agent_id].iloc[0]
303
- cot = cot_by_agent.get(agent_id, MOCK_COT)
304
 
305
  map_html = build_map(agent_sp)
306
  demo_text = build_demo_text(agent_demo)
 
 
307
 
308
- # NUMOSIM raw data
309
- raw_text = cot.get("text_representation", "") + "\n\n" + cot.get("weekly_checkin", "")
310
 
311
- # CoT stages
312
- step1 = cot.get("step1_response", "No data")
313
- step2 = cot.get("step2_response", "No data")
314
- step3_raw = cot.get("step3_response", "No data")
315
- pred, conf, reason = parse_step3(step3_raw)
316
- step3_summary = f"INCOME PREDICTION: {pred}\nCONFIDENCE: {conf}/5\n\nREASONING:\n{reason}\n\n---FULL RESPONSE---\n{step3_raw}"
317
 
318
- return map_html, raw_text, step1, step2, step3_summary, demo_text
 
 
 
319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
 
321
- custom_css = """
322
- .gradio-container { max-width: 1600px !important; }
323
- .stage-label { font-weight: bold; color: #b22222; }
324
- """
325
 
326
- with gr.Blocks(title="HiCoTraj Demo", theme=gr.themes.Soft(), css=custom_css) as app:
327
- gr.Markdown("## πŸ—ΊοΈ HiCoTraj: Trajectory Visualization & Chain-of-Thought Demo")
 
 
328
  gr.Markdown("*Zero-Shot Demographic Reasoning via Hierarchical Chain-of-Thought Prompting from Trajectory*")
329
 
330
- # ── Top bar ──────────────────────────────────────────────────────────────
331
  with gr.Row():
332
- agent_dd = gr.Dropdown(
 
 
 
 
 
 
 
 
333
  choices=[str(a) for a in sample_agents],
334
  label="Select Agent",
335
  value=str(sample_agents[0]),
@@ -341,56 +620,37 @@ with gr.Blocks(title="HiCoTraj Demo", theme=gr.themes.Soft(), css=custom_css) as
341
  scale=4
342
  )
343
 
344
- # ── Main content: Left | Right ────────────────────────────────────────
345
  with gr.Row():
346
 
347
- # LEFT: Map + NUMOSIM raw data
348
  with gr.Column(scale=1):
349
- gr.Markdown("### πŸ“ Trajectory Map")
350
- map_out = gr.HTML(label="Trajectory Map")
351
-
352
- gr.Markdown("### πŸ“‹ Contextual Trajectory Data")
353
  raw_out = gr.Textbox(
354
- label="Mobility Summary + Weekly Check-in",
355
- lines=25,
356
- interactive=False
357
  )
358
 
359
- # RIGHT: CoT three stages
360
  with gr.Column(scale=1):
361
- gr.Markdown("### 🧠 Hierarchical Chain-of-Thought Reasoning")
362
-
363
- with gr.Accordion("πŸ“Œ Stage 1 β€” Factual Feature Extraction", open=True):
364
- step1_out = gr.Textbox(
365
- label="Stage 1 Response",
366
- lines=12,
367
- interactive=False
368
- )
369
-
370
- with gr.Accordion("πŸ” Stage 2 β€” Behavioral Pattern Analysis", open=True):
371
- step2_out = gr.Textbox(
372
- label="Stage 2 Response",
373
- lines=12,
374
- interactive=False
375
- )
376
-
377
- with gr.Accordion("🎯 Stage 3 β€” Demographic Inference", open=True):
378
- step3_out = gr.Textbox(
379
- label="Stage 3 Response (Income Prediction)",
380
- lines=12,
381
- interactive=False
382
- )
383
 
384
  agent_dd.change(
385
- fn=on_select,
386
- inputs=agent_dd,
387
- outputs=[map_out, raw_out, step1_out, step2_out, step3_out, demo_label]
388
  )
389
  app.load(
390
- fn=on_select,
391
- inputs=agent_dd,
392
- outputs=[map_out, raw_out, step1_out, step2_out, step3_out, demo_label]
 
 
 
 
393
  )
394
 
395
  if __name__ == "__main__":
396
- app.launch(share=True)
 
3
  import folium
4
  import numpy as np
5
  import os
6
+ import re
7
+ from huggingface_hub import InferenceClient
8
 
9
  BASE = os.path.dirname(os.path.abspath(__file__))
10
  STAY_POINTS = os.path.join(BASE, "data", "stay_points_sampled.csv")
11
  POI_PATH = os.path.join(BASE, "data", "poi_sampled.csv")
12
  DEMO_PATH = os.path.join(BASE, "data", "demographics_sampled.csv")
13
+
14
+ MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct"
15
 
16
  SEX_MAP = {1:"Male", 2:"Female", -8:"Unknown", -7:"Prefer not to answer"}
17
  EDU_MAP = {1:"Less than HS", 2:"HS Graduate/GED", 3:"Some College/Associate",
 
50
  return str(x)
51
 
52
  sp["act_label"] = sp["act_types"].apply(parse_act_types)
 
 
 
 
 
 
 
 
 
 
 
53
  sample_agents = sorted(sp["agent_id"].unique().tolist())
54
  print(f"Ready. {len(sample_agents)} agents loaded.")
55
 
56
+
57
+ # ── Mobility text builders ────────────────────────────────────────────────────
58
+
59
+ def build_mobility_summary(agent_sp):
60
+ top5 = (agent_sp.groupby("name")["duration_min"]
61
+ .agg(visits="count", avg_dur="mean")
62
+ .sort_values("visits", ascending=False)
63
+ .head(5))
64
+
65
+ obs_start = agent_sp["start_datetime"].min().strftime("%Y-%m-%d")
66
+ obs_end = agent_sp["end_datetime"].max().strftime("%Y-%m-%d")
67
+ days = (agent_sp["end_datetime"].max() - agent_sp["start_datetime"].min()).days
68
+
69
+ lines = [
70
+ "MOBILITY TRAJECTORY DATA",
71
+ "===========================",
72
+ f"Observation Period: {obs_start} to {obs_end} ({days} days)",
73
+ f"Total Stay Points: {len(agent_sp)}",
74
+ f"Unique Locations: {agent_sp['name'].nunique()}",
75
+ "",
76
+ "LOCATION PATTERNS",
77
+ "----------------",
78
+ ]
79
+ for i, (name, row) in enumerate(top5.iterrows(), 1):
80
+ lines += [f"{i}. {name}",
81
+ f" Visits: {int(row['visits'])} times",
82
+ f" Average Duration: {int(row['avg_dur'])} minutes", ""]
83
+
84
+ agent_sp2 = agent_sp.copy()
85
+ agent_sp2["hour"] = agent_sp2["start_datetime"].dt.hour
86
+ def tod(h):
87
+ if 5 <= h < 12: return "morning"
88
+ if 12 <= h < 17: return "afternoon"
89
+ if 17 <= h < 21: return "evening"
90
+ return "night"
91
+ agent_sp2["tod"] = agent_sp2["hour"].apply(tod)
92
+ tod_pct = (agent_sp2["tod"].value_counts(normalize=True) * 100).round(0).astype(int)
93
+
94
+ agent_sp2["is_weekend"] = agent_sp2["start_datetime"].dt.dayofweek >= 5
95
+ wd_pct = int((~agent_sp2["is_weekend"]).mean() * 100)
96
+
97
+ lines += ["TEMPORAL PATTERNS", "----------------", "Activity by Time of Day:"]
98
+ for k, v in tod_pct.items():
99
+ lines.append(f"- {k}: {v}%")
100
+ lines += ["", "Weekday vs Weekend:",
101
+ f"- weekday: {wd_pct}%", f"- weekend: {100 - wd_pct}%"]
102
+ return "\n".join(lines)
103
+
104
+
105
+ def build_weekly_checkin(agent_sp):
106
+ lines = ["WEEKLY CHECK-IN SUMMARY", "======================="]
107
+ agent_sp2 = agent_sp.copy()
108
+ agent_sp2["date"] = agent_sp2["start_datetime"].dt.date
109
+ for date, grp in agent_sp2.groupby("date"):
110
+ dow = grp["start_datetime"].iloc[0].strftime("%A")
111
+ label = "Weekend" if grp["start_datetime"].iloc[0].dayofweek >= 5 else "Weekday"
112
+ lines.append(f"\n--- {dow}, {date} ({label}) ---")
113
+ lines.append(f"Total activities: {len(grp)}")
114
+ for _, row in grp.iterrows():
115
+ lines.append(
116
+ f"- {row['start_datetime'].strftime('%H:%M')}-"
117
+ f"{row['end_datetime'].strftime('%H:%M')} "
118
+ f"({int(row['duration_min'])} mins): "
119
+ f"{row['name']} - {row['act_label']}"
120
+ )
121
+ return "\n".join(lines)
122
+
123
+
124
+ # ── Prompts ───────────────────────────────────────────────────────────────────
125
+
126
+ STEP1_SYSTEM = """You are an expert mobility analyst. Extract objective features from the trajectory data.
127
+ Respond with EXACTLY this structure, keep each point to one short sentence:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  LOCATION INVENTORY:
130
+ - Top venues: [list top 3 with visit counts]
131
+ - Price level: [budget/mid-range/high-end mix]
132
+ - Neighborhood: [residential/commercial/urban/suburban]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  TEMPORAL PATTERNS:
135
+ - Active hours: [time range]
136
+ - Weekday/Weekend: [ratio]
137
+ - Routine: [consistent/variable]
138
+
139
+ SEQUENCE:
140
+ - Typical chain: [e.g. Home β†’ Work β†’ Home]
141
+ - Notable pattern: [one observation]
142
+
143
+ Do NOT interpret or infer demographics. Be concise."""
144
+
145
+ STEP2_SYSTEM = """You are an expert mobility analyst. Based on the extracted features, analyze behavioral patterns.
146
+ Respond with EXACTLY this structure, one short sentence per point:
147
+
148
+ SCHEDULE: [fixed/flexible/shift β€” one sentence]
149
+ ECONOMIC: [budget/mid-range/premium spending β€” one sentence]
150
+ SOCIAL: [family/individual/community focus β€” one sentence]
151
+ LIFESTYLE: [urban professional/suburban/student/other β€” one sentence]
152
+ STABILITY: [routine consistency β€” one sentence]
153
+
154
+ Do NOT make income predictions yet. Be concise."""
155
+
156
+ STEP3_SYSTEM = """You are an expert mobility analyst performing final income inference.
157
+ Based on the trajectory features and behavioral analysis, output EXACTLY:
158
+
159
+ INCOME_PREDICTION: [Very Low (<$15k) | Low ($15k-$35k) | Middle ($35k-$75k) | Upper-Middle ($75k-$125k) | High ($125k-$200k) | Very High (>$200k)]
160
+ INCOME_CONFIDENCE: [1-5]
161
+ INCOME_REASONING: [2-3 sentences linking specific mobility evidence to the prediction]
162
+ ALTERNATIVES: [2nd most likely] | [3rd most likely]"""
163
+
164
+
165
+ def call_llm(client, system_prompt, user_content, max_tokens=400):
166
+ response = client.chat.completions.create(
167
+ model=MODEL_ID,
168
+ messages=[
169
+ {"role": "system", "content": system_prompt},
170
+ {"role": "user", "content": user_content},
171
+ ],
172
+ max_tokens=max_tokens,
173
+ temperature=0.3,
174
+ )
175
+ return response.choices[0].message.content.strip()
176
+
177
+
178
+ # ── HTML rendering ────────────────────────────────────────────────────────────
179
+
180
+ CHAIN_CSS = """
181
+ <style>
182
+ @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600&family=IBM+Plex+Sans:wght@300;400;600&display=swap');
183
+
184
+ .hicotraj-chain {
185
+ font-family: 'IBM Plex Sans', sans-serif;
186
+ padding: 12px 4px;
187
+ max-width: 100%;
188
+ }
189
+
190
+ /* Stage cards */
191
+ .stage-card {
192
+ border-radius: 10px;
193
+ padding: 16px 18px;
194
+ margin-bottom: 0;
195
+ position: relative;
196
+ transition: box-shadow 0.3s;
197
+ }
198
+ .stage-card.dim { opacity: 0.35; filter: grayscale(0.4); }
199
+ .stage-card.active { box-shadow: 0 4px 20px rgba(0,0,0,0.12); opacity: 1; filter: none; }
200
+
201
+ .stage-card.s1 { background: #f8f9fc; border: 1.5px solid #c8d0e0; }
202
+ .stage-card.s2 { background: #fdf6f0; border: 1.5px solid #e8c9a8; }
203
+ .stage-card.s3 { background: #fff8f8; border: 2px solid #c0392b; }
204
+
205
+ .stage-header {
206
+ display: flex;
207
+ align-items: center;
208
+ gap: 10px;
209
+ margin-bottom: 10px;
210
+ }
211
+ .stage-badge {
212
+ font-family: 'IBM Plex Mono', monospace;
213
+ font-size: 10px;
214
+ font-weight: 600;
215
+ letter-spacing: 0.08em;
216
+ padding: 3px 8px;
217
+ border-radius: 4px;
218
+ text-transform: uppercase;
219
+ }
220
+ .s1 .stage-badge { background: #dde3f0; color: #3a4a6b; }
221
+ .s2 .stage-badge { background: #f0dcc8; color: #7a4010; }
222
+ .s3 .stage-badge { background: #c0392b; color: #fff; }
223
+
224
+ .stage-title {
225
+ font-size: 13px;
226
+ font-weight: 600;
227
+ color: #1a1a2e;
228
+ }
229
+
230
+ /* Content inside cards */
231
+ .tag-row { display: flex; flex-wrap: wrap; gap: 6px; margin-top: 4px; }
232
+ .tag {
233
+ font-family: 'IBM Plex Mono', monospace;
234
+ font-size: 11px;
235
+ background: #e8ecf5;
236
+ color: #2c3e60;
237
+ padding: 3px 8px;
238
+ border-radius: 4px;
239
+ white-space: nowrap;
240
+ }
241
+ .s2 .tag { background: #f5e8d8; color: #6b3a10; }
242
+
243
+ .behavior-row {
244
+ display: grid;
245
+ grid-template-columns: 100px 1fr;
246
+ gap: 4px 10px;
247
+ margin-top: 2px;
248
+ font-size: 12px;
249
+ line-height: 1.5;
250
+ }
251
+ .bkey {
252
+ font-family: 'IBM Plex Mono', monospace;
253
+ font-size: 11px;
254
+ font-weight: 600;
255
+ color: #9b6a3a;
256
+ padding-top: 1px;
257
+ }
258
+ .bval { color: #3a2a1a; }
259
+
260
+ /* Prediction block */
261
+ .pred-block { margin-top: 8px; }
262
+ .pred-label {
263
+ font-size: 11px;
264
+ font-family: 'IBM Plex Mono', monospace;
265
+ color: #888;
266
+ text-transform: uppercase;
267
+ letter-spacing: 0.06em;
268
+ margin-bottom: 4px;
269
+ }
270
+ .pred-value {
271
+ font-size: 22px;
272
+ font-weight: 600;
273
+ color: #c0392b;
274
+ letter-spacing: -0.01em;
275
+ margin-bottom: 8px;
276
  }
277
+ .confidence-bar-wrap {
278
+ display: flex;
279
+ align-items: center;
280
+ gap: 10px;
281
+ margin-bottom: 10px;
282
+ }
283
+ .confidence-bar-bg {
284
+ flex: 1;
285
+ height: 6px;
286
+ background: #f0d0cf;
287
+ border-radius: 3px;
288
+ overflow: hidden;
289
+ }
290
+ .confidence-bar-fill {
291
+ height: 100%;
292
+ background: linear-gradient(90deg, #e74c3c, #8b0000);
293
+ border-radius: 3px;
294
+ transition: width 0.8s ease;
295
+ }
296
+ .confidence-label {
297
+ font-family: 'IBM Plex Mono', monospace;
298
+ font-size: 11px;
299
+ color: #c0392b;
300
+ font-weight: 600;
301
+ white-space: nowrap;
302
+ }
303
+ .reasoning-text {
304
+ font-size: 12px;
305
+ color: #4a2a2a;
306
+ line-height: 1.6;
307
+ border-left: 3px solid #e8c0be;
308
+ padding-left: 10px;
309
+ margin-top: 6px;
310
+ }
311
+ .alternatives {
312
+ margin-top: 10px;
313
+ font-size: 11px;
314
+ font-family: 'IBM Plex Mono', monospace;
315
+ color: #999;
316
+ }
317
+ .alternatives span { color: #c0392b; opacity: 0.7; }
318
+
319
+ /* Arrow connector */
320
+ .chain-arrow {
321
+ display: flex;
322
+ flex-direction: column;
323
+ align-items: center;
324
+ margin: 0;
325
+ padding: 4px 0;
326
+ gap: 0;
327
+ }
328
+ .arrow-line {
329
+ width: 2px;
330
+ height: 18px;
331
+ background: linear-gradient(180deg, #c8d0e0, #e8c9a8);
332
+ }
333
+ .arrow-label {
334
+ font-family: 'IBM Plex Mono', monospace;
335
+ font-size: 10px;
336
+ color: #aaa;
337
+ letter-spacing: 0.06em;
338
+ text-transform: uppercase;
339
+ background: white;
340
+ padding: 2px 8px;
341
+ border: 1px solid #e0e0e0;
342
+ border-radius: 10px;
343
+ margin: 2px 0;
344
+ }
345
+ .arrow-tip {
346
+ width: 0; height: 0;
347
+ border-left: 5px solid transparent;
348
+ border-right: 5px solid transparent;
349
+ border-top: 7px solid #e8c9a8;
350
+ }
351
+
352
+ /* Waiting state */
353
+ .waiting-dot {
354
+ display: inline-block;
355
+ width: 7px; height: 7px;
356
+ border-radius: 50%;
357
+ background: #ccc;
358
+ margin: 0 2px;
359
+ animation: pulse 1.2s ease-in-out infinite;
360
+ }
361
+ .waiting-dot:nth-child(2) { animation-delay: 0.2s; }
362
+ .waiting-dot:nth-child(3) { animation-delay: 0.4s; }
363
+ @keyframes pulse {
364
+ 0%, 100% { opacity: 0.3; transform: scale(0.8); }
365
+ 50% { opacity: 1; transform: scale(1.1); }
366
+ }
367
+ </style>
368
+ """
369
 
370
+ def _waiting_dots():
371
+ return '<span class="waiting-dot"></span><span class="waiting-dot"></span><span class="waiting-dot"></span>'
372
+
373
+ def render_chain(s1_text="", s2_text="", s3_text="", status="idle"):
374
+ """
375
+ status: idle | running1 | running2 | running3 | done
376
+ """
377
+ s1_active = status in ("running1", "running2", "running3", "done")
378
+ s2_active = status in ("running2", "running3", "done")
379
+ s3_active = status in ("running3", "done")
380
+
381
+ # ── Stage 1 content ──────────────────────────────────────────────────────
382
+ if status == "running1":
383
+ s1_content = f'<div style="padding:8px 0; color:#888; font-size:13px;">Extracting features {_waiting_dots()}</div>'
384
+ elif s1_text:
385
+ # Parse tags from the response β€” pull out short bullet points as tags
386
+ tags = []
387
+ for line in s1_text.splitlines():
388
+ line = line.strip().lstrip("-").strip()
389
+ if line and len(line) < 60 and not line.endswith(":"):
390
+ tags.append(line)
391
+ if len(tags) >= 8:
392
+ break
393
+ tag_html = "".join(f'<span class="tag">{t}</span>' for t in tags[:8])
394
+ s1_content = f'<div class="tag-row">{tag_html}</div>'
395
+ else:
396
+ s1_content = '<div style="font-size:12px;color:#bbb;padding:6px 0;">Run inference to see results</div>'
397
+
398
+ # ── Stage 2 content ──────────────────────────────────────────────────────
399
+ BEHAVIOR_KEYS = ["SCHEDULE", "ECONOMIC", "SOCIAL", "LIFESTYLE", "STABILITY"]
400
+ if status == "running2":
401
+ s2_content = f'<div style="padding:8px 0; color:#a06030; font-size:13px;">Analyzing behavior {_waiting_dots()}</div>'
402
+ elif s2_text:
403
+ rows_html = ""
404
+ for key in BEHAVIOR_KEYS:
405
+ pattern = rf"{key}[:\s]+(.+)"
406
+ m = re.search(pattern, s2_text, re.IGNORECASE)
407
+ val = m.group(1).strip().rstrip(".") if m else "β€”"
408
+ if len(val) > 80:
409
+ val = val[:77] + "..."
410
+ rows_html += f'<div class="bkey">{key}</div><div class="bval">{val}</div>'
411
+ s2_content = f'<div class="behavior-row">{rows_html}</div>'
412
+ else:
413
+ s2_content = '<div style="font-size:12px;color:#bbb;padding:6px 0;">Run inference to see results</div>'
414
+
415
+ # ── Stage 3 content ──────────────────────────────────────────────────────
416
+ if status == "running3":
417
+ s3_content = f'<div style="padding:8px 0; color:#c0392b; font-size:13px;">Inferring demographics {_waiting_dots()}</div>'
418
+ elif s3_text:
419
+ # Parse structured output
420
+ pred = conf_raw = reasoning = alts = ""
421
+ for line in s3_text.splitlines():
422
+ line = line.strip()
423
+ if line.startswith("INCOME_PREDICTION:"):
424
+ pred = line.replace("INCOME_PREDICTION:", "").strip()
425
+ elif line.startswith("INCOME_CONFIDENCE:"):
426
+ conf_raw = line.replace("INCOME_CONFIDENCE:", "").strip()
427
+ elif line.startswith("INCOME_REASONING:"):
428
+ reasoning = line.replace("INCOME_REASONING:", "").strip()
429
+ elif line.startswith("ALTERNATIVES:"):
430
+ alts = line.replace("ALTERNATIVES:", "").strip()
431
+
432
+ # Confidence bar
433
+ try:
434
+ conf_int = int(re.search(r"\d", conf_raw).group())
435
+ except:
436
+ conf_int = 3
437
+ bar_pct = conf_int * 20
438
+
439
+ alts_html = ""
440
+ if alts:
441
+ alts_html = f'<div class="alternatives">Also possible: <span>{alts}</span></div>'
442
+
443
+ s3_content = f"""
444
+ <div class="pred-block">
445
+ <div class="pred-label">Income Prediction</div>
446
+ <div class="pred-value">{pred or "β€”"}</div>
447
+ <div class="confidence-bar-wrap">
448
+ <div class="confidence-bar-bg">
449
+ <div class="confidence-bar-fill" style="width:{bar_pct}%"></div>
450
+ </div>
451
+ <div class="confidence-label">Confidence {conf_int}/5</div>
452
+ </div>
453
+ <div class="reasoning-text">{reasoning or s3_text[:200]}</div>
454
+ {alts_html}
455
+ </div>"""
456
+ else:
457
+ s3_content = '<div style="font-size:12px;color:#bbb;padding:6px 0;">Run inference to see results</div>'
458
+
459
+ def card(cls, badge, title, content, active):
460
+ dim_cls = "active" if active else "dim"
461
+ return f"""
462
+ <div class="stage-card {cls} {dim_cls}">
463
+ <div class="stage-header">
464
+ <span class="stage-badge">{badge}</span>
465
+ <span class="stage-title">{title}</span>
466
+ </div>
467
+ {content}
468
+ </div>"""
469
+
470
+ def arrow(label, active):
471
+ opacity = "1" if active else "0.3"
472
+ return f"""
473
+ <div class="chain-arrow" style="opacity:{opacity}">
474
+ <div class="arrow-line"></div>
475
+ <div class="arrow-label">{label}</div>
476
+ <div class="arrow-line"></div>
477
+ <div class="arrow-tip"></div>
478
+ </div>"""
479
+
480
+ html = CHAIN_CSS + '<div class="hicotraj-chain">'
481
+ html += card("s1", "Stage 1", "Factual Feature Extraction", s1_content, s1_active)
482
+ html += arrow("behavioral abstraction", s2_active)
483
+ html += card("s2", "Stage 2", "Behavioral Pattern Analysis", s2_content, s2_active)
484
+ html += arrow("demographic inference", s3_active)
485
+ html += card("s3", "Stage 3", "Demographic Inference", s3_content, s3_active)
486
+ html += "</div>"
487
+ return html
488
+
489
+
490
+ # ── Map & demo ────────────────────────────────────────────────────────────────
491
 
492
  def build_map(agent_sp):
493
  agent_sp = agent_sp.reset_index(drop=True).copy()
 
504
 
505
  n = len(agent_sp)
506
  for i, row in agent_sp.iterrows():
 
507
  ratio = i / max(n - 1, 1)
508
+ r = int(255 - ratio * (255 - 139))
509
+ g = int(204 * (1 - ratio) ** 2)
 
 
 
 
510
  b = 0
511
  color = f"#{r:02x}{g:02x}{b:02x}"
 
512
  folium.CircleMarker(
513
  location=[row["latitude"], row["longitude"]],
514
  radius=7, color=color, fill=True, fill_color=color, fill_opacity=0.9,
515
  popup=folium.Popup(
516
  f"<b>#{i+1} {row['name']}</b><br>"
517
  f"{row['start_datetime'].strftime('%a %m/%d %H:%M')}<br>"
518
+ f"{int(row['duration_min'])} min<br>{row['act_label']}",
 
519
  max_width=220
520
  )
521
  ).add_to(m)
 
526
 
527
 
528
  def build_demo_text(row):
529
+ age = int(row["age"]) if row["age"] > 0 else "Unknown"
530
  return (
531
  f"Age: {age} | "
532
  f"Sex: {SEX_MAP.get(int(row['sex']), row['sex'])} | "
 
536
  )
537
 
538
 
539
+ # ── Callbacks ─────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
540
 
541
  def on_select(agent_id):
542
  agent_id = int(agent_id)
543
  agent_sp = sp[sp["agent_id"] == agent_id].sort_values("start_datetime")
544
  agent_demo = demo[demo["agent_id"] == agent_id].iloc[0]
 
545
 
546
  map_html = build_map(agent_sp)
547
  demo_text = build_demo_text(agent_demo)
548
+ raw_text = build_mobility_summary(agent_sp) + "\n\n" + build_weekly_checkin(agent_sp)
549
+ chain_html = render_chain(status="idle")
550
 
551
+ return map_html, raw_text, demo_text, chain_html
 
552
 
 
 
 
 
 
 
553
 
554
+ def run_inference(agent_id, hf_token):
555
+ if not hf_token or not hf_token.strip():
556
+ yield render_chain(s3_text="⚠️ Please enter your Hugging Face token first.", status="done")
557
+ return
558
 
559
+ agent_id = int(agent_id)
560
+ agent_sp = sp[sp["agent_id"] == agent_id].sort_values("start_datetime")
561
+ traj_text = build_mobility_summary(agent_sp) + "\n\n" + build_weekly_checkin(agent_sp)
562
+
563
+ try:
564
+ client = InferenceClient(token=hf_token.strip())
565
+
566
+ yield render_chain(status="running1")
567
+ s1 = call_llm(client, STEP1_SYSTEM, traj_text, max_tokens=400)
568
+
569
+ yield render_chain(s1_text=s1, status="running2")
570
+ s2_input = f"Features:\n{s1}\n\nNow analyze behavioral patterns."
571
+ s2 = call_llm(client, STEP2_SYSTEM, s2_input, max_tokens=300)
572
+
573
+ yield render_chain(s1_text=s1, s2_text=s2, status="running3")
574
+ s3_input = f"Features:\n{s1}\n\nBehavioral analysis:\n{s2}\n\nNow infer income."
575
+ s3 = call_llm(client, STEP3_SYSTEM, s3_input, max_tokens=300)
576
+
577
+ yield render_chain(s1_text=s1, s2_text=s2, s3_text=s3, status="done")
578
+
579
+ except Exception as e:
580
+ yield render_chain(s3_text=f"❌ Error: {str(e)}", status="done")
581
+
582
+
583
+ def call_llm(client, system_prompt, user_content, max_tokens=400):
584
+ response = client.chat.completions.create(
585
+ model=MODEL_ID,
586
+ messages=[
587
+ {"role": "system", "content": system_prompt},
588
+ {"role": "user", "content": user_content},
589
+ ],
590
+ max_tokens=max_tokens,
591
+ temperature=0.3,
592
+ )
593
+ return response.choices[0].message.content.strip()
594
 
 
 
 
 
595
 
596
+ # ── UI ────────────────────────────────────────────────────────────────────────
597
+
598
+ with gr.Blocks(title="HiCoTraj Demo", theme=gr.themes.Soft()) as app:
599
+ gr.Markdown("## HiCoTraj β€” Trajectory Visualization & Hierarchical CoT Demo")
600
  gr.Markdown("*Zero-Shot Demographic Reasoning via Hierarchical Chain-of-Thought Prompting from Trajectory*")
601
 
 
602
  with gr.Row():
603
+ hf_token_box = gr.Textbox(
604
+ label="Hugging Face Token",
605
+ placeholder="hf_...",
606
+ type="password",
607
+ scale=2
608
+ )
609
+
610
+ with gr.Row():
611
+ agent_dd = gr.Dropdown(
612
  choices=[str(a) for a in sample_agents],
613
  label="Select Agent",
614
  value=str(sample_agents[0]),
 
620
  scale=4
621
  )
622
 
 
623
  with gr.Row():
624
 
625
+ # LEFT: map + NUMOSIM data
626
  with gr.Column(scale=1):
627
+ gr.Markdown("### Trajectory Map")
628
+ map_out = gr.HTML()
629
+ gr.Markdown("### NUMOSIM Raw Data")
 
630
  raw_out = gr.Textbox(
631
+ lines=25, interactive=False,
632
+ label="Mobility Summary + Weekly Check-in"
 
633
  )
634
 
635
+ # RIGHT: reasoning chain
636
  with gr.Column(scale=1):
637
+ gr.Markdown("### Hierarchical Chain-of-Thought Reasoning")
638
+ run_btn = gr.Button("β–Ά Run HiCoTraj Inference", variant="primary")
639
+ chain_out = gr.HTML(value=render_chain(status="idle"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640
 
641
  agent_dd.change(
642
+ fn=on_select, inputs=agent_dd,
643
+ outputs=[map_out, raw_out, demo_label, chain_out]
 
644
  )
645
  app.load(
646
+ fn=on_select, inputs=agent_dd,
647
+ outputs=[map_out, raw_out, demo_label, chain_out]
648
+ )
649
+ run_btn.click(
650
+ fn=run_inference,
651
+ inputs=[agent_dd, hf_token_box],
652
+ outputs=[chain_out]
653
  )
654
 
655
  if __name__ == "__main__":
656
+ app.launch()