ginnyxxxxxxx commited on
Commit
b30e889
Β·
1 Parent(s): e5d67b5
Files changed (1) hide show
  1. app.py +288 -26
app.py CHANGED
@@ -3,11 +3,13 @@ import pandas as pd
3
  import folium
4
  import numpy as np
5
  import os
 
6
 
7
  BASE = os.path.dirname(os.path.abspath(__file__))
8
  STAY_POINTS = os.path.join(BASE, "data", "stay_points_sampled.csv")
9
  POI_PATH = os.path.join(BASE, "data", "poi_sampled.csv")
10
  DEMO_PATH = os.path.join(BASE, "data", "demographics_sampled.csv")
 
11
 
12
  SEX_MAP = {1:"Male", 2:"Female", -8:"Unknown", -7:"Prefer not to answer"}
13
  EDU_MAP = {1:"Less than HS", 2:"HS Graduate/GED", 3:"Some College/Associate",
@@ -47,9 +49,184 @@ def parse_act_types(x):
47
 
48
  sp["act_label"] = sp["act_types"].apply(parse_act_types)
49
 
 
 
 
 
 
 
 
 
 
 
50
  sample_agents = sorted(sp["agent_id"].unique().tolist())
51
  print(f"Ready. {len(sample_agents)} agents loaded.")
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  def build_map(agent_sp):
55
  agent_sp = agent_sp.reset_index(drop=True).copy()
@@ -62,14 +239,21 @@ def build_map(agent_sp):
62
 
63
  coords = list(zip(agent_sp["latitude"], agent_sp["longitude"]))
64
  if len(coords) > 1:
65
- folium.PolyLine(coords, color="#aaaaaa", weight=1.5, opacity=0.4).add_to(m)
66
 
67
  n = len(agent_sp)
68
  for i, row in agent_sp.iterrows():
 
69
  ratio = i / max(n - 1, 1)
70
- r = int(255 * ratio)
71
- g = int(255 * (1 - ratio))
72
- color = f"#{r:02x}{g:02x}33"
 
 
 
 
 
 
73
  folium.CircleMarker(
74
  location=[row["latitude"], row["longitude"]],
75
  radius=7, color=color, fill=True, fill_color=color, fill_opacity=0.9,
@@ -83,21 +267,10 @@ def build_map(agent_sp):
83
  ).add_to(m)
84
 
85
  m.get_root().width = "100%"
86
- m.get_root().height = "500px"
87
  return m._repr_html_()
88
 
89
 
90
- def build_poi_sequence(agent_sp):
91
- lines = []
92
- for _, row in agent_sp.iterrows():
93
- lines.append(
94
- f"{row['start_datetime'].strftime('%a %m/%d')} "
95
- f"{row['start_datetime'].strftime('%H:%M')}–{row['end_datetime'].strftime('%H:%M')} "
96
- f"({int(row['duration_min'])} min) | {row['name']} | {row['act_label']}"
97
- )
98
- return "\n".join(lines)
99
-
100
-
101
  def build_demo_text(row):
102
  age = int(row["age"]) if row["age"] > 0 else "Unknown"
103
  return (
@@ -109,26 +282,115 @@ def build_demo_text(row):
109
  )
110
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  def on_select(agent_id):
113
  agent_id = int(agent_id)
114
  agent_sp = sp[sp["agent_id"] == agent_id].sort_values("start_datetime")
115
  agent_demo = demo[demo["agent_id"] == agent_id].iloc[0]
116
- return build_map(agent_sp), build_poi_sequence(agent_sp), build_demo_text(agent_demo)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
 
119
- with gr.Blocks(title="HiCoTraj Demo", theme=gr.themes.Soft()) as app:
120
- gr.Markdown("## HiCoTraj: Trajectory Visualization")
 
 
121
 
 
 
 
 
 
122
  with gr.Row():
123
- agent_dd = gr.Dropdown(choices=[str(a) for a in sample_agents],
124
- label="Select Agent", value=str(sample_agents[0]))
125
- demo_label = gr.Textbox(label="Ground Truth Demographics", interactive=False)
 
 
 
 
 
 
 
 
126
 
127
- map_out = gr.HTML(label="Trajectory Map")
128
- poi_out = gr.Textbox(label="POI Sequence", lines=20, interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
- agent_dd.change(fn=on_select, inputs=agent_dd, outputs=[map_out, poi_out, demo_label])
131
- app.load(fn=on_select, inputs=agent_dd, outputs=[map_out, poi_out, demo_label])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  if __name__ == "__main__":
134
  app.launch(share=True)
 
3
  import folium
4
  import numpy as np
5
  import os
6
+ import json
7
 
8
  BASE = os.path.dirname(os.path.abspath(__file__))
9
  STAY_POINTS = os.path.join(BASE, "data", "stay_points_sampled.csv")
10
  POI_PATH = os.path.join(BASE, "data", "poi_sampled.csv")
11
  DEMO_PATH = os.path.join(BASE, "data", "demographics_sampled.csv")
12
+ COT_PATH = os.path.join(BASE, "data", "cot_results.json") # <-- your JSON file
13
 
14
  SEX_MAP = {1:"Male", 2:"Female", -8:"Unknown", -7:"Prefer not to answer"}
15
  EDU_MAP = {1:"Less than HS", 2:"HS Graduate/GED", 3:"Some College/Associate",
 
49
 
50
  sp["act_label"] = sp["act_types"].apply(parse_act_types)
51
 
52
+ # Load CoT JSON
53
+ cot_by_agent = {}
54
+ if os.path.exists(COT_PATH):
55
+ print("Loading CoT results...")
56
+ with open(COT_PATH, "r") as f:
57
+ cot_raw = json.load(f)
58
+ for result in cot_raw.get("inference_results", []):
59
+ cot_by_agent[result["agent_id"]] = result
60
+
61
+ # All CSV agents are available; CoT will fall back to mock if no match
62
  sample_agents = sorted(sp["agent_id"].unique().tolist())
63
  print(f"Ready. {len(sample_agents)} agents loaded.")
64
 
65
+ MOCK_COT = {
66
+ "text_representation": """MOBILITY TRAJECTORY DATA
67
+ ===========================
68
+ Observation Period: 2024-01-29 to 2024-02-25 (28 days)
69
+ Total Stay Points: 82
70
+ Unique Locations: 16
71
+
72
+ LOCATION PATTERNS
73
+ ----------------
74
+ 1. residence
75
+ Visits: 36 times
76
+ Average Duration: 829 minutes
77
+
78
+ 2. Clinton Mobile Estates
79
+ Visits: 9 times
80
+ Average Duration: 137 minutes
81
+
82
+ 3. 7-Eleven
83
+ Visits: 8 times
84
+ Average Duration: 118 minutes
85
+
86
+ 4. Euro Caffe
87
+ Visits: 5 times
88
+ Average Duration: 101 minutes
89
+
90
+ 5. Hoa Phap Hoa Dao Trang
91
+ Visits: 4 times
92
+ Average Duration: 112 minutes
93
+
94
+ TEMPORAL PATTERNS
95
+ ----------------
96
+ Activity by Time of Day:
97
+ - morning: 56%
98
+ - afternoon: 24%
99
+ - evening: 20%
100
+
101
+ Weekday vs Weekend:
102
+ - weekday: 66%
103
+ - weekend: 34%""",
104
+
105
+ "weekly_checkin": """WEEKLY CHECK-IN SUMMARY
106
+ =======================
107
+ Period: 2024-01-29 to 2024-02-25 (28 days)
108
+
109
+ --- Monday, January 29 (Weekday) ---
110
+ Total activities: 2
111
+ - 09:10-10:14 (64 mins): Bear Wire - Work, Services, DropOff
112
+ - 10:38-08:54 (1336 mins): residence - Home, Social Visit, DropOff
113
+
114
+ --- Tuesday, January 30 (Weekday) ---
115
+ Total activities: 2
116
+ - 09:12-11:06 (115 mins): Clinton Mobile Estates - Work, Recreation, Exercise
117
+ - 11:24-09:36 (1331 mins): residence - Home, Social Visit, DropOff
118
+
119
+ --- Wednesday, January 31 (Weekday) ---
120
+ Total activities: 4
121
+ - 09:52-14:07 (255 mins): Castaway Cove Water Playground - Work, Exercise, DropOff
122
+ - 14:21-15:27 (66 mins): residence - Home, Social Visit, DropOff
123
+ - 15:44-19:03 (198 mins): Clinton Mobile Estates - Work, Recreation, Exercise
124
+ - 19:21-09:06 (825 mins): residence - Home, Social Visit, DropOff
125
+
126
+ --- Friday, February 02 (Weekday) ---
127
+ Total activities: 3
128
+ - 08:34-09:29 (55 mins): Euro Caffe - Work, Dining
129
+ - 09:58-11:06 (68 mins): 7-Eleven - Work, Shopping, Dining, DropOff
130
+ - 11:25-08:56 (1291 mins): residence - Home, Social Visit, DropOff
131
+
132
+ --- Saturday, February 03 (Weekend) ---
133
+ Total activities: 5
134
+ - 09:13-10:03 (50 mins): Clinton Mobile Estates - Work, Recreation, Exercise
135
+ - 10:27-11:10 (43 mins): 7-Eleven - Work, Shopping, Dining, DropOff
136
+ - 11:29-18:39 (430 mins): residence - Home, Social Visit, DropOff
137
+ - 18:56-21:30 (154 mins): Pepper Shaker Cafe - Work, Dining
138
+ - 21:48-09:16 (689 mins): residence - Home, Social Visit, DropOff
139
+
140
+ --- Sunday, February 11 (Weekend) ---
141
+ Total activities: 5
142
+ - 10:05-11:56 (111 mins): Hoa Phap Hoa Dao Trang - Religious
143
+ - 12:21-13:36 (75 mins): Pepper Shaker Cafe - Work, Dining
144
+ - 14:56-16:58 (122 mins): 7-Eleven - Work, Shopping, Dining, DropOff
145
+ - 17:17-13:27 (1209 mins): residence - Home, Social Visit, DropOff
146
+
147
+ WEEKLY INSIGHTS
148
+ ===============
149
+ Most visited location: residence (36 visits)
150
+ Time spent: DropOff 31.0% | Home 28.8% | Work 4.1% | Dining 2.3% | Exercise 1.7%
151
+ Weekday avg activities: 2.7 | Weekend avg: 3.5""",
152
+
153
+ "step1_response": """Based on the provided mobility trajectory data, here are the objective features extracted:
154
+
155
+ LOCATION INVENTORY:
156
+ - POI categories visited:
157
+ - Residence
158
+ - Clinton Mobile Estates (Recreation, Exercise)
159
+ - 7-Eleven (Shopping, Dining, DropOff)
160
+ - Euro Caffe (Dining)
161
+ - Hoa Phap Hoa Dao Trang (Religious)
162
+ - Castaway Cove Water Playground (Exercise, DropOff)
163
+ - Salon 860 West (Shopping, Dining, DropOff)
164
+ - Pepper Shaker Cafe (Dining)
165
+ - Lucky Pierre Self Service Car Wash (Shopping, Dining, DropOff)
166
+
167
+ - Frequency of visits:
168
+ - Residence: 36 times
169
+ - Clinton Mobile Estates: 9 times
170
+ - 7-Eleven: 8 times
171
+ - Euro Caffe: 5 times
172
+ - Hoa Phap Hoa Dao Trang: 4 times
173
+
174
+ - Apparent price level:
175
+ - Budget: 7-Eleven, Lucky Pierre Self Service Car Wash
176
+ - Mid-range: Euro Caffe, Pepper Shaker Cafe
177
+
178
+ TEMPORAL PATTERNS:
179
+ - Active hours: 09:00 to 23:00
180
+ - Weekly distribution: 66% weekday, 34% weekend
181
+ - Regularity: Consistent daily routines
182
+ - Duration: Longer stays at residence (829 min avg), shorter at other venues
183
+
184
+ SPATIAL CHARACTERISTICS:
185
+ - Activity radius: up to ~13 km from home
186
+ - Geographic distribution: Urban areas, mix of residential and commercial
187
+
188
+ SEQUENCE OBSERVATIONS:
189
+ - Common transitions: Home→Clinton Mobile Estates→Home, Home→7-Eleven→Home
190
+ - Weekday pattern: morning work/exercise, afternoon errands
191
+ - Weekend pattern: morning religious visit, afternoon dining/shopping""",
192
+
193
+ "step2_response": """1. ROUTINE & SCHEDULE ANALYSIS:
194
+ - Semi-structured lifestyle with consistent morning start times (~09:00)
195
+ - Flexible schedule: varies between 2–5 activities per day
196
+ - Weekday/weekend contrast suggests work-centric weekdays, more leisure on weekends
197
+
198
+ 2. ECONOMIC BEHAVIOR PATTERNS:
199
+ - Mix of budget-conscious and mid-range spending
200
+ - Frequent 7-Eleven visits suggest convenience-oriented, cost-aware shopping
201
+ - No signs of premium or luxury venue preferences
202
+ - Activity radius (~13 km) suggests personal vehicle access
203
+
204
+ 3. SOCIAL & LIFESTYLE INDICATORS:
205
+ - Regular weekend religious attendance (Hoa Phap Hoa Dao Trang) β†’ community ties
206
+ - Recreational activities (Castaway Cove, Clinton Mobile Estates) β†’ active lifestyle
207
+ - Dining out occasionally (Euro Caffe, Pepper Shaker Cafe) β†’ moderate social life
208
+ - High home-time suggests family-oriented or home-centered lifestyle
209
+
210
+ 4. URBAN LIFESTYLE CHARACTERISTICS:
211
+ - Urban/suburban mix: lives in residential area, travels to nearby commercial zones
212
+ - Activity radius and venue diversity suggest working-class to middle-class area
213
+ - Community engagement evident through religious and recreational venues
214
+
215
+ 5. ROUTINE STABILITY:
216
+ - Highly consistent patterns over 4 weeks with minimal deviation
217
+ - No signs of major life transitions
218
+ - Regular work-like attendance at Clinton Mobile Estates suggests stable employment""",
219
+
220
+ "step3_response": """INCOME_PREDICTION: Middle ($35k-$75k)
221
+ INCOME_CONFIDENCE: 4
222
+ INCOME_REASONING: The individual's mobility patterns suggest a mix of budget-conscious and mid-range spending. Frequent visits to 7-Eleven and self-service venues indicate cost awareness, while occasional mid-range dining (Euro Caffe, Pepper Shaker Cafe) suggests some disposable income. The neighborhood types are commercial/residential mixed, consistent with a middle-class area. No luxury venue visits detected. Activity radius of ~13 km and apparent vehicle access align with middle-income transport patterns.
223
+
224
+ RANKED ALTERNATIVES:
225
+ 1. Middle ($35k-$75k) β€” Primary prediction
226
+ 2. Low ($15k-$35k) β€” Budget venue frequency could suggest lower income
227
+ 3. Upper-Middle ($75k-$125k) β€” Unlikely given absence of premium venues"""
228
+ }
229
+
230
 
231
  def build_map(agent_sp):
232
  agent_sp = agent_sp.reset_index(drop=True).copy()
 
239
 
240
  coords = list(zip(agent_sp["latitude"], agent_sp["longitude"]))
241
  if len(coords) > 1:
242
+ folium.PolyLine(coords, color="#cc000055", weight=1.5, opacity=0.4).add_to(m)
243
 
244
  n = len(agent_sp)
245
  for i, row in agent_sp.iterrows():
246
+ # Red gradient: light red (#ffcccc) β†’ deep red (#8b0000)
247
  ratio = i / max(n - 1, 1)
248
+ r = 255
249
+ g = int(204 * (1 - ratio)) # 204 β†’ 0
250
+ b = int(204 * (1 - ratio)) # 204 β†’ 0
251
+ # Clamp deep end toward dark red (139, 0, 0)
252
+ r = int(255 - ratio * (255 - 139)) # 255 β†’ 139
253
+ g = int(204 * (1 - ratio) * (1 - ratio * 0.3)) # fade to 0
254
+ b = 0
255
+ color = f"#{r:02x}{g:02x}{b:02x}"
256
+
257
  folium.CircleMarker(
258
  location=[row["latitude"], row["longitude"]],
259
  radius=7, color=color, fill=True, fill_color=color, fill_opacity=0.9,
 
267
  ).add_to(m)
268
 
269
  m.get_root().width = "100%"
270
+ m.get_root().height = "420px"
271
  return m._repr_html_()
272
 
273
 
 
 
 
 
 
 
 
 
 
 
 
274
  def build_demo_text(row):
275
  age = int(row["age"]) if row["age"] > 0 else "Unknown"
276
  return (
 
282
  )
283
 
284
 
285
+ def parse_step3(text):
286
+ """Extract prediction, confidence, reasoning from step3 response text."""
287
+ prediction, confidence, reasoning = "", "", ""
288
+ for line in text.splitlines():
289
+ line = line.strip()
290
+ if line.startswith("INCOME_PREDICTION:"):
291
+ prediction = line.replace("INCOME_PREDICTION:", "").strip()
292
+ elif line.startswith("INCOME_CONFIDENCE:"):
293
+ confidence = line.replace("INCOME_CONFIDENCE:", "").strip()
294
+ elif line.startswith("INCOME_REASONING:"):
295
+ reasoning = line.replace("INCOME_REASONING:", "").strip()
296
+ return prediction, confidence, reasoning
297
+
298
+
299
  def on_select(agent_id):
300
  agent_id = int(agent_id)
301
  agent_sp = sp[sp["agent_id"] == agent_id].sort_values("start_datetime")
302
  agent_demo = demo[demo["agent_id"] == agent_id].iloc[0]
303
+ cot = cot_by_agent.get(agent_id, MOCK_COT)
304
+
305
+ map_html = build_map(agent_sp)
306
+ demo_text = build_demo_text(agent_demo)
307
+
308
+ # NUMOSIM raw data
309
+ raw_text = cot.get("text_representation", "") + "\n\n" + cot.get("weekly_checkin", "")
310
+
311
+ # CoT stages
312
+ step1 = cot.get("step1_response", "No data")
313
+ step2 = cot.get("step2_response", "No data")
314
+ step3_raw = cot.get("step3_response", "No data")
315
+ pred, conf, reason = parse_step3(step3_raw)
316
+ step3_summary = f"INCOME PREDICTION: {pred}\nCONFIDENCE: {conf}/5\n\nREASONING:\n{reason}\n\n---FULL RESPONSE---\n{step3_raw}"
317
+
318
+ return map_html, raw_text, step1, step2, step3_summary, demo_text
319
 
320
 
321
+ custom_css = """
322
+ .gradio-container { max-width: 1600px !important; }
323
+ .stage-label { font-weight: bold; color: #b22222; }
324
+ """
325
 
326
+ with gr.Blocks(title="HiCoTraj Demo", theme=gr.themes.Soft(), css=custom_css) as app:
327
+ gr.Markdown("## πŸ—ΊοΈ HiCoTraj: Trajectory Visualization & Chain-of-Thought Demo")
328
+ gr.Markdown("*Zero-Shot Demographic Reasoning via Hierarchical Chain-of-Thought Prompting from Trajectory*")
329
+
330
+ # ── Top bar ──────────────────────────────────────────────────────────────
331
  with gr.Row():
332
+ agent_dd = gr.Dropdown(
333
+ choices=[str(a) for a in sample_agents],
334
+ label="Select Agent",
335
+ value=str(sample_agents[0]),
336
+ scale=1
337
+ )
338
+ demo_label = gr.Textbox(
339
+ label="Ground Truth Demographics",
340
+ interactive=False,
341
+ scale=4
342
+ )
343
 
344
+ # ── Main content: Left | Right ────────────────────────────────────────
345
+ with gr.Row():
346
+
347
+ # LEFT: Map + NUMOSIM raw data
348
+ with gr.Column(scale=1):
349
+ gr.Markdown("### πŸ“ Trajectory Map")
350
+ map_out = gr.HTML(label="Trajectory Map")
351
+
352
+ gr.Markdown("### πŸ“‹ NUMOSIM Raw Data")
353
+ raw_out = gr.Textbox(
354
+ label="Mobility Summary + Weekly Check-in",
355
+ lines=25,
356
+ interactive=False
357
+ )
358
 
359
+ # RIGHT: CoT three stages
360
+ with gr.Column(scale=1):
361
+ gr.Markdown("### 🧠 Hierarchical Chain-of-Thought Reasoning")
362
+
363
+ with gr.Accordion("πŸ“Œ Stage 1 β€” Factual Feature Extraction", open=True):
364
+ step1_out = gr.Textbox(
365
+ label="Stage 1 Response",
366
+ lines=12,
367
+ interactive=False
368
+ )
369
+
370
+ with gr.Accordion("πŸ” Stage 2 β€” Behavioral Pattern Analysis", open=True):
371
+ step2_out = gr.Textbox(
372
+ label="Stage 2 Response",
373
+ lines=12,
374
+ interactive=False
375
+ )
376
+
377
+ with gr.Accordion("🎯 Stage 3 β€” Demographic Inference", open=True):
378
+ step3_out = gr.Textbox(
379
+ label="Stage 3 Response (Income Prediction)",
380
+ lines=12,
381
+ interactive=False
382
+ )
383
+
384
+ agent_dd.change(
385
+ fn=on_select,
386
+ inputs=agent_dd,
387
+ outputs=[map_out, raw_out, step1_out, step2_out, step3_out, demo_label]
388
+ )
389
+ app.load(
390
+ fn=on_select,
391
+ inputs=agent_dd,
392
+ outputs=[map_out, raw_out, step1_out, step2_out, step3_out, demo_label]
393
+ )
394
 
395
  if __name__ == "__main__":
396
  app.launch(share=True)