AceXRoux commited on
Commit
bcfe21e
·
verified ·
1 Parent(s): 48499b0

Update app.py

Browse files

Added an enhanced look using ThreeJS Globe.GL

Files changed (1) hide show
  1. app.py +362 -142
app.py CHANGED
@@ -1,7 +1,7 @@
1
  #!/usr/bin/env python3
2
  """
3
- GeoVLM - AI-Powered Geolocation
4
- Upload any image and predict where it was taken using Vision-Language Models
5
  """
6
 
7
  import gradio as gr
@@ -9,22 +9,20 @@ from PIL import Image
9
  from transformers import AutoProcessor, AutoModelForImageTextToText
10
  import torch
11
  import re
12
- import math
13
  from dataclasses import dataclass
14
 
15
  # ============================================================================
16
- # Simplified Geolocation Parser (from vlm-gym)
17
  # ============================================================================
18
 
19
  @dataclass(frozen=True)
20
  class Coords:
21
- """Geographic coordinates"""
22
  lat: float
23
  lon: float
24
 
25
  @dataclass(frozen=True)
26
  class ParsedResponse:
27
- """Structured model output"""
28
  city: str | None
29
  region: str | None
30
  country: str | None
@@ -43,25 +41,18 @@ PROMPT_TEMPLATE = (
43
  )
44
 
45
  KEY_ALIASES = {
46
- "city": "city",
47
- "country": "country",
48
- "region": "region",
49
- "state": "region",
50
- "province": "region",
51
- "latitude": "lat",
52
- "lat": "lat",
53
- "longitude": "lon",
54
- "lon": "lon",
55
  }
56
 
57
  def parse_response(text: str) -> ParsedResponse:
58
  """Parse structured 5-line format"""
59
  parsed = {}
60
-
61
  if not text:
62
  return ParsedResponse(None, None, None, None, text, False)
63
 
64
- # Parse key-value lines
65
  key_pattern = re.compile(
66
  r'^\s*(?:[-*+\u2022]\s*)?(?P<key>[A-Za-z][A-Za-z0-9\s\-/_.]*?)\s*:\s*(?P<value>.+)$'
67
  )
@@ -72,18 +63,14 @@ def parse_response(text: str) -> ParsedResponse:
72
  continue
73
 
74
  key_raw = match.group("key").strip().lower()
75
- key_raw = key_raw.strip("*_`\"' ")
76
- key_raw = re.sub(r"\s+", " ", key_raw)
77
  canonical = KEY_ALIASES.get(key_raw)
78
 
79
  if canonical is None:
80
  continue
81
 
82
- value_raw = match.group("value").strip()
83
- value_raw = value_raw.strip("`\"' \t")
84
- value_raw = re.sub(r"^[*_`]+", "", value_raw)
85
- value_raw = re.sub(r"[*_`]+$", "", value_raw)
86
- value_raw = value_raw.strip()
87
 
88
  if canonical in {"city", "region", "country"}:
89
  if value_raw and canonical not in parsed:
@@ -97,26 +84,22 @@ def parse_response(text: str) -> ParsedResponse:
97
  except ValueError:
98
  pass
99
 
100
- # Build coords if available
101
  coords = None
102
  if "lat" in parsed and "lon" in parsed:
103
  try:
104
- lat = parsed["lat"]
105
- lon = parsed["lon"]
106
  if -90 <= lat <= 90 and -180 <= lon <= 180:
107
  coords = Coords(lat=lat, lon=lon)
108
  except (ValueError, TypeError):
109
  pass
110
 
111
- format_valid = bool(len(parsed) >= 2)
112
-
113
  return ParsedResponse(
114
  city=parsed.get("city"),
115
  region=parsed.get("region"),
116
  country=parsed.get("country"),
117
  coords=coords,
118
  raw_text=text,
119
- format_valid=format_valid,
120
  )
121
 
122
  # ============================================================================
@@ -128,7 +111,6 @@ processor = None
128
  MODEL_NAME = "Qwen/Qwen2-VL-2B-Instruct"
129
 
130
  def load_model():
131
- """Load model once on startup"""
132
  global model, processor
133
  if model is None:
134
  print(f"Loading model: {MODEL_NAME}")
@@ -138,123 +120,331 @@ def load_model():
138
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
139
  device_map="auto" if torch.cuda.is_available() else "cpu"
140
  )
141
- print("Model loaded successfully!")
142
 
143
  def predict_location(image):
144
- """Predict geolocation from an image"""
145
  if image is None:
146
- return "Please upload an image.", ""
147
 
148
- # Ensure model is loaded
149
  load_model()
150
 
151
- # Convert to PIL if needed
152
  if not isinstance(image, Image.Image):
153
  image = Image.fromarray(image).convert("RGB")
154
  else:
155
  image = image.convert("RGB")
156
 
157
- # Prepare prompt
158
- messages = [
159
- {
160
- "role": "user",
161
- "content": [
162
- {"type": "image"},
163
- {"type": "text", "text": PROMPT_TEMPLATE}
164
- ]
165
- }
166
- ]
167
 
168
- # Process inputs
169
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
170
  inputs = processor(text=[text], images=[image], return_tensors="pt", padding=True)
171
-
172
- # Move to device
173
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
174
 
175
- # Generate
176
  with torch.no_grad():
177
- output_ids = model.generate(
178
- **inputs,
179
- max_new_tokens=256,
180
- do_sample=False,
181
- )
182
 
183
- # Decode
184
  generated_ids = output_ids[0][inputs['input_ids'].shape[1]:]
185
  response = processor.decode(generated_ids, skip_special_tokens=True).strip()
186
-
187
- # Parse
188
  parsed = parse_response(response)
189
 
190
  # Format output
191
  output = f"""
192
- ## 🤖 Raw Model Response:
 
 
 
 
 
 
 
 
 
 
193
  ```
194
  {response}
195
  ```
 
 
 
 
 
 
 
 
 
196
 
197
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
- ## 📍 Parsed Prediction:
 
 
 
 
 
200
 
201
- **City:** {parsed.city or "Not provided"}
202
- **Region:** {parsed.region or "Not provided"}
203
- **Country:** {parsed.country or "Not provided"}
204
- **Coordinates:** {f"{parsed.coords.lat:.6f}, {parsed.coords.lon:.6f}" if parsed.coords else "Not provided"}
205
- **Format Valid:** {"✅ Yes" if parsed.format_valid else "❌ No"}
206
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
- # Create map embed
209
- map_html = ""
210
- if parsed.coords:
211
- map_html = f"""
212
- <div style="margin-top: 20px;">
213
- <iframe
214
- width="100%"
215
- height="450"
216
- frameborder="0"
217
- scrolling="no"
218
- marginheight="0"
219
- marginwidth="0"
220
- src="https://www.openstreetmap.org/export/embed.html?bbox={parsed.coords.lon-0.1},{parsed.coords.lat-0.1},{parsed.coords.lon+0.1},{parsed.coords.lat+0.1}&marker={parsed.coords.lat},{parsed.coords.lon}"
221
- style="border: 2px solid #ddd; border-radius: 8px;">
222
- </iframe>
223
- <div style="margin-top: 10px; text-align: center;">
224
- <a href="https://www.google.com/maps?q={parsed.coords.lat},{parsed.coords.lon}" target="_blank" style="margin: 0 10px; color: #4285f4; text-decoration: none; font-weight: bold;">
225
- 🗺️ View on Google Maps
226
- </a>
227
- <span style="color: #666;">|</span>
228
- <a href="https://www.openstreetmap.org/?mlat={parsed.coords.lat}&mlon={parsed.coords.lon}#map=12/{parsed.coords.lat}/{parsed.coords.lon}" target="_blank" style="margin: 0 10px; color: #7ebc6f; text-decoration: none; font-weight: bold;">
229
- 🌍 View on OpenStreetMap
230
- </a>
231
  </div>
232
  </div>
233
- """
234
- else:
235
- map_html = "<div style='text-align: center; padding: 20px; color: #666;'>No valid coordinates found</div>"
236
-
237
- return output, map_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
  # ============================================================================
240
  # Gradio Interface
241
  # ============================================================================
242
 
243
- with gr.Blocks(title="GeoVLM - AI Geolocation", theme=gr.themes.Soft()) as demo:
244
- gr.Markdown(
245
- """
246
- # 🌍 GeoVLM - AI-Powered Geolocation
247
-
248
- Upload any image and let AI predict where it was taken using vision-language models!
249
-
250
- ### How it works:
251
- - Analyzes visual features: architecture, vegetation, road signs, landscape
252
- - Uses state-of-the-art vision-language models (Qwen2-VL)
253
- - Predicts city, region, country, and GPS coordinates
254
-
255
- **Powered by [vlm-gym](https://github.com/sdan/vlm-gym)** | Model: Qwen2-VL-2B-Instruct
256
- """
257
- )
258
 
259
  with gr.Row():
260
  with gr.Column(scale=1):
@@ -263,47 +453,77 @@ with gr.Blocks(title="GeoVLM - AI Geolocation", theme=gr.themes.Soft()) as demo:
263
  label="📸 Upload Image",
264
  height=400
265
  )
266
- predict_btn = gr.Button("🔍 Predict Location", variant="primary", size="lg")
267
 
268
- gr.Markdown(
269
- """
270
- ### 💡 Tips:
271
- - Outdoor images work best
272
- - Street views are ideal
273
- - Clear photos with visible landmarks
274
- - Unique architectural or natural features help
275
- """
276
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
- with gr.Column(scale=1):
279
- output_text = gr.Markdown(label="Results")
280
- map_output = gr.HTML(label="Map")
 
 
 
 
 
 
 
 
281
 
282
- gr.Markdown(
283
- """
284
- ---
285
- ### 🎯 Use Cases:
286
- - **OSINT Research** - Verify photo locations for investigations
287
- - **GeoGuessr Training** - Practice location identification
288
- - **Education** - Learn about geographic features and cultures
289
- - **Travel Planning** - Identify interesting locations from photos
290
-
291
- ---
292
-
293
- **Note:** This is a demo. Predictions may not always be accurate. Use responsibly for educational and research purposes.
294
-
295
- Built with ❤️ using [Gradio](https://gradio.app) and [Hugging Face Transformers](https://huggingface.co/transformers)
296
- """
297
- )
 
 
 
 
 
 
 
 
 
 
298
 
299
- # Event handlers
300
  predict_btn.click(
301
  fn=predict_location,
302
  inputs=image_input,
303
- outputs=[output_text, map_output]
304
  )
305
 
306
  if __name__ == "__main__":
307
- print("🚀 Starting GeoVLM...")
308
  load_model()
309
- demo.launch()
 
 
 
 
 
1
  #!/usr/bin/env python3
2
  """
3
+ GeoVLM with 3D Globe Visualization
4
+ Interactive 3D globe that flies to predicted locations
5
  """
6
 
7
  import gradio as gr
 
9
  from transformers import AutoProcessor, AutoModelForImageTextToText
10
  import torch
11
  import re
12
+ import json
13
  from dataclasses import dataclass
14
 
15
  # ============================================================================
16
+ # Geolocation Parser
17
  # ============================================================================
18
 
19
  @dataclass(frozen=True)
20
  class Coords:
 
21
  lat: float
22
  lon: float
23
 
24
  @dataclass(frozen=True)
25
  class ParsedResponse:
 
26
  city: str | None
27
  region: str | None
28
  country: str | None
 
41
  )
42
 
43
  KEY_ALIASES = {
44
+ "city": "city", "country": "country", "region": "region",
45
+ "state": "region", "province": "region",
46
+ "latitude": "lat", "lat": "lat",
47
+ "longitude": "lon", "lon": "lon",
 
 
 
 
 
48
  }
49
 
50
  def parse_response(text: str) -> ParsedResponse:
51
  """Parse structured 5-line format"""
52
  parsed = {}
 
53
  if not text:
54
  return ParsedResponse(None, None, None, None, text, False)
55
 
 
56
  key_pattern = re.compile(
57
  r'^\s*(?:[-*+\u2022]\s*)?(?P<key>[A-Za-z][A-Za-z0-9\s\-/_.]*?)\s*:\s*(?P<value>.+)$'
58
  )
 
63
  continue
64
 
65
  key_raw = match.group("key").strip().lower()
66
+ key_raw = re.sub(r"\s+", " ", key_raw.strip("*_`\"' "))
 
67
  canonical = KEY_ALIASES.get(key_raw)
68
 
69
  if canonical is None:
70
  continue
71
 
72
+ value_raw = match.group("value").strip().strip("`\"' \t")
73
+ value_raw = re.sub(r"^[*_`]+|[*_`]+$", "", value_raw).strip()
 
 
 
74
 
75
  if canonical in {"city", "region", "country"}:
76
  if value_raw and canonical not in parsed:
 
84
  except ValueError:
85
  pass
86
 
 
87
  coords = None
88
  if "lat" in parsed and "lon" in parsed:
89
  try:
90
+ lat, lon = parsed["lat"], parsed["lon"]
 
91
  if -90 <= lat <= 90 and -180 <= lon <= 180:
92
  coords = Coords(lat=lat, lon=lon)
93
  except (ValueError, TypeError):
94
  pass
95
 
 
 
96
  return ParsedResponse(
97
  city=parsed.get("city"),
98
  region=parsed.get("region"),
99
  country=parsed.get("country"),
100
  coords=coords,
101
  raw_text=text,
102
+ format_valid=bool(len(parsed) >= 2),
103
  )
104
 
105
  # ============================================================================
 
111
  MODEL_NAME = "Qwen/Qwen2-VL-2B-Instruct"
112
 
113
  def load_model():
 
114
  global model, processor
115
  if model is None:
116
  print(f"Loading model: {MODEL_NAME}")
 
120
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
121
  device_map="auto" if torch.cuda.is_available() else "cpu"
122
  )
123
+ print("Model loaded!")
124
 
125
  def predict_location(image):
126
+ """Predict geolocation and return globe visualization data"""
127
  if image is None:
128
+ return "Please upload an image.", "", ""
129
 
 
130
  load_model()
131
 
 
132
  if not isinstance(image, Image.Image):
133
  image = Image.fromarray(image).convert("RGB")
134
  else:
135
  image = image.convert("RGB")
136
 
137
+ messages = [{
138
+ "role": "user",
139
+ "content": [
140
+ {"type": "image"},
141
+ {"type": "text", "text": PROMPT_TEMPLATE}
142
+ ]
143
+ }]
 
 
 
144
 
 
145
  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
146
  inputs = processor(text=[text], images=[image], return_tensors="pt", padding=True)
 
 
147
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
148
 
 
149
  with torch.no_grad():
150
+ output_ids = model.generate(**inputs, max_new_tokens=256, do_sample=False)
 
 
 
 
151
 
 
152
  generated_ids = output_ids[0][inputs['input_ids'].shape[1]:]
153
  response = processor.decode(generated_ids, skip_special_tokens=True).strip()
 
 
154
  parsed = parse_response(response)
155
 
156
  # Format output
157
  output = f"""
158
+ ## 🤖 AI Prediction
159
+
160
+ **📍 Location Details:**
161
+ - **City:** {parsed.city or "Unknown"}
162
+ - **Region:** {parsed.region or "Unknown"}
163
+ - **Country:** {parsed.country or "Unknown"}
164
+ - **Coordinates:** {f"{parsed.coords.lat:.6f}°, {parsed.coords.lon:.6f}°" if parsed.coords else "Not found"}
165
+
166
+ ---
167
+
168
+ ## 🔍 Raw Response:
169
  ```
170
  {response}
171
  ```
172
+ """
173
+
174
+ # Create globe HTML
175
+ globe_html = create_globe_html(parsed) if parsed.coords else "<div style='text-align:center; padding:50px; color:#666;'>No coordinates found</div>"
176
+
177
+ # Create info card
178
+ info_html = create_info_card(parsed)
179
+
180
+ return output, globe_html, info_html
181
 
182
+ def create_globe_html(parsed: ParsedResponse) -> str:
183
+ """Create Three.js globe visualization with day/night toggle and country borders"""
184
+ if not parsed.coords:
185
+ return ""
186
+
187
+ lat, lon = parsed.coords.lat, parsed.coords.lon
188
+
189
+ html = f"""
190
+ <!DOCTYPE html>
191
+ <html>
192
+ <head>
193
+ <style>
194
+ body {{ margin: 0; padding: 0; overflow: hidden; background: #000; position: relative; }}
195
+ #globeViz {{ width: 100%; height: 600px; }}
196
+ .location-label {{
197
+ color: white;
198
+ font-size: 16px;
199
+ font-family: Arial, sans-serif;
200
+ background: rgba(0,0,0,0.7);
201
+ padding: 8px 12px;
202
+ border-radius: 4px;
203
+ pointer-events: none;
204
+ }}
205
+ .controls {{
206
+ position: absolute;
207
+ top: 20px;
208
+ right: 20px;
209
+ z-index: 100;
210
+ display: flex;
211
+ gap: 10px;
212
+ }}
213
+ .control-btn {{
214
+ background: rgba(255,255,255,0.9);
215
+ border: none;
216
+ padding: 10px 16px;
217
+ border-radius: 6px;
218
+ cursor: pointer;
219
+ font-weight: bold;
220
+ font-size: 14px;
221
+ transition: all 0.3s;
222
+ box-shadow: 0 2px 8px rgba(0,0,0,0.3);
223
+ }}
224
+ .control-btn:hover {{
225
+ background: white;
226
+ transform: translateY(-2px);
227
+ box-shadow: 0 4px 12px rgba(0,0,0,0.4);
228
+ }}
229
+ .control-btn.active {{
230
+ background: #667eea;
231
+ color: white;
232
+ }}
233
+ </style>
234
+ </head>
235
+ <body>
236
+ <div class="controls">
237
+ <button class="control-btn active" id="dayBtn" onclick="setDayMode()">☀️ Day</button>
238
+ <button class="control-btn" id="nightBtn" onclick="setNightMode()">🌙 Night</button>
239
+ <button class="control-btn" id="bordersBtn" onclick="toggleBorders()">🗺️ Borders</button>
240
+ </div>
241
+ <div id="globeViz"></div>
242
+
243
+ <script src="//unpkg.com/globe.gl"></script>
244
+ <script>
245
+ let showBorders = false;
246
+ let currentMode = 'day';
247
+
248
+ const myGlobe = Globe()
249
+ .globeImageUrl('//unpkg.com/three-globe/example/img/earth-blue-marble.jpg')
250
+ .bumpImageUrl('//unpkg.com/three-globe/example/img/earth-topology.png')
251
+ .backgroundImageUrl('//unpkg.com/three-globe/example/img/night-sky.png')
252
+ .pointOfView({{ lat: {lat}, lng: {lon}, altitude: 2.5 }}, 0)
253
+ .atmosphereColor('lightskyblue')
254
+ .atmosphereAltitude(0.15)
255
+ (document.getElementById('globeViz'));
256
 
257
+ // Load country borders
258
+ fetch('//unpkg.com/world-atlas/countries-50m.json')
259
+ .then(res => res.json())
260
+ .then(countries => {{
261
+ window.countriesData = countries;
262
+ }});
263
 
264
+ // Add marker point
265
+ const markerData = [{{
266
+ lat: {lat},
267
+ lng: {lon},
268
+ size: 0.5,
269
+ color: '#ff4444',
270
+ label: '{parsed.city or "Location"}',
271
+ city: '{parsed.city or "Unknown"}',
272
+ region: '{parsed.region or "Unknown"}',
273
+ country: '{parsed.country or "Unknown"}'
274
+ }}];
275
+
276
+ myGlobe
277
+ .pointsData(markerData)
278
+ .pointAltitude('size')
279
+ .pointColor('color')
280
+ .pointRadius(0.6)
281
+ .pointLabel(d => `
282
+ <div class="location-label">
283
+ <b>${{d.city}}</b><br/>
284
+ ${{d.region}}, ${{d.country}}<br/>
285
+ ${{d.lat.toFixed(4)}}°, ${{d.lng.toFixed(4)}}°
286
+ </div>
287
+ `);
288
+
289
+ // Animate to location
290
+ myGlobe.pointOfView({{ lat: {lat}, lng: {lon}, altitude: 1.5 }}, 3000);
291
+
292
+ // Auto-rotate
293
+ myGlobe.controls().autoRotate = true;
294
+ myGlobe.controls().autoRotateSpeed = 0.3;
295
+
296
+ // Add pulsing ring animation
297
+ const ringData = [{{
298
+ lat: {lat},
299
+ lng: {lon},
300
+ maxR: 10,
301
+ propagationSpeed: 2,
302
+ repeatPeriod: 1500
303
+ }}];
304
+
305
+ myGlobe
306
+ .ringsData(ringData)
307
+ .ringColor(() => 'rgba(255,68,68,0.5)')
308
+ .ringMaxRadius('maxR')
309
+ .ringPropagationSpeed('propagationSpeed')
310
+ .ringRepeatPeriod('repeatPeriod');
311
+
312
+ // Add arcs for visual effect
313
+ const arcData = [{{
314
+ startLat: {lat},
315
+ startLng: {lon},
316
+ endLat: {lat + 10},
317
+ endLng: {lon + 10},
318
+ color: ['rgba(255,68,68,0.4)', 'rgba(255,68,68,0.1)']
319
+ }}];
320
+
321
+ myGlobe
322
+ .arcsData(arcData)
323
+ .arcColor('color')
324
+ .arcDashLength(0.4)
325
+ .arcDashGap(0.2)
326
+ .arcDashAnimateTime(2000)
327
+ .arcStroke(0.5);
328
+
329
+ // Mode switching functions
330
+ function setDayMode() {{
331
+ currentMode = 'day';
332
+ myGlobe
333
+ .globeImageUrl('//unpkg.com/three-globe/example/img/earth-blue-marble.jpg')
334
+ .bumpImageUrl('//unpkg.com/three-globe/example/img/earth-topology.png');
335
+
336
+ document.getElementById('dayBtn').classList.add('active');
337
+ document.getElementById('nightBtn').classList.remove('active');
338
+ }}
339
+
340
+ function setNightMode() {{
341
+ currentMode = 'night';
342
+ myGlobe
343
+ .globeImageUrl('//unpkg.com/three-globe/example/img/earth-night.jpg')
344
+ .bumpImageUrl('//unpkg.com/three-globe/example/img/earth-topology.png');
345
+
346
+ document.getElementById('nightBtn').classList.add('active');
347
+ document.getElementById('dayBtn').classList.remove('active');
348
+ }}
349
+
350
+ function toggleBorders() {{
351
+ showBorders = !showBorders;
352
+ const btn = document.getElementById('bordersBtn');
353
+
354
+ if (showBorders && window.countriesData) {{
355
+ const countries = topojson.feature(window.countriesData, window.countriesData.objects.countries);
356
+ myGlobe
357
+ .polygonsData(countries.features)
358
+ .polygonAltitude(0.01)
359
+ .polygonCapColor(() => 'rgba(200, 200, 200, 0.1)')
360
+ .polygonSideColor(() => 'rgba(200, 200, 200, 0.05)')
361
+ .polygonStrokeColor(() => '#ffffff')
362
+ .polygonLabel(({{ properties: d }}) => `
363
+ <div class="location-label">
364
+ <b>${{d.name}}</b>
365
+ </div>
366
+ `);
367
+ btn.classList.add('active');
368
+ }} else {{
369
+ myGlobe.polygonsData([]);
370
+ btn.classList.remove('active');
371
+ }}
372
+ }}
373
+ </script>
374
+ <script src="//unpkg.com/topojson-client"></script>
375
+ </body>
376
+ </html>
377
+ """
378
+ return html
379
+
380
+ def create_info_card(parsed: ParsedResponse) -> str:
381
+ """Create information card with details"""
382
+ if not parsed.coords:
383
+ return ""
384
+
385
+ lat, lon = parsed.coords.lat, parsed.coords.lon
386
 
387
+ html = f"""
388
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
389
+ border-radius: 12px; padding: 24px; color: white; margin-top: 20px;">
390
+ <h2 style="margin: 0 0 16px 0; font-size: 24px;">📍 Predicted Location</h2>
391
+
392
+ <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin-bottom: 20px;">
393
+ <div style="background: rgba(255,255,255,0.1); padding: 12px; border-radius: 8px;">
394
+ <div style="font-size: 12px; opacity: 0.8;">City</div>
395
+ <div style="font-size: 18px; font-weight: bold;">{parsed.city or "Unknown"}</div>
396
+ </div>
397
+ <div style="background: rgba(255,255,255,0.1); padding: 12px; border-radius: 8px;">
398
+ <div style="font-size: 12px; opacity: 0.8;">Region</div>
399
+ <div style="font-size: 18px; font-weight: bold;">{parsed.region or "Unknown"}</div>
400
+ </div>
401
+ <div style="background: rgba(255,255,255,0.1); padding: 12px; border-radius: 8px;">
402
+ <div style="font-size: 12px; opacity: 0.8;">Country</div>
403
+ <div style="font-size: 18px; font-weight: bold;">{parsed.country or "Unknown"}</div>
404
+ </div>
405
+ <div style="background: rgba(255,255,255,0.1); padding: 12px; border-radius: 8px;">
406
+ <div style="font-size: 12px; opacity: 0.8;">Coordinates</div>
407
+ <div style="font-size: 14px; font-weight: bold;">{lat:.4f}°, {lon:.4f}°</div>
 
 
408
  </div>
409
  </div>
410
+
411
+ <div style="display: flex; gap: 12px; flex-wrap: wrap;">
412
+ <a href="https://www.google.com/maps?q={lat},{lon}" target="_blank"
413
+ style="background: #4285f4; color: white; padding: 10px 20px;
414
+ border-radius: 6px; text-decoration: none; font-weight: bold;">
415
+ 🗺️ Google Maps
416
+ </a>
417
+ <a href="https://www.openstreetmap.org/?mlat={lat}&mlon={lon}#map=12/{lat}/{lon}" target="_blank"
418
+ style="background: #7ebc6f; color: white; padding: 10px 20px;
419
+ border-radius: 6px; text-decoration: none; font-weight: bold;">
420
+ 🌍 OpenStreetMap
421
+ </a>
422
+ <a href="https://www.google.com/search?q={parsed.city}+{parsed.country}" target="_blank"
423
+ style="background: #ea4335; color: white; padding: 10px 20px;
424
+ border-radius: 6px; text-decoration: none; font-weight: bold;">
425
+ 🔍 Learn More
426
+ </a>
427
+ </div>
428
+ </div>
429
+ """
430
+ return html
431
 
432
  # ============================================================================
433
  # Gradio Interface
434
  # ============================================================================
435
 
436
+ with gr.Blocks(title="GeoVLM - 3D Globe", theme=gr.themes.Soft(), css="""
437
+ .gradio-container {max-width: 1400px !important;}
438
+ .globe-container {height: 600px !important;}
439
+ """) as demo:
440
+
441
+ gr.Markdown("""
442
+ # 🌍 GeoVLM - AI Geolocation with 3D Globe
443
+
444
+ Upload any image and watch the AI predict its location on an interactive 3D globe!
445
+
446
+ **Powered by:** [vlm-gym](https://github.com/sdan/vlm-gym) | Vision-Language Models | Three.js Globe
447
+ """)
 
 
 
448
 
449
  with gr.Row():
450
  with gr.Column(scale=1):
 
453
  label="📸 Upload Image",
454
  height=400
455
  )
 
456
 
457
+ predict_btn = gr.Button(
458
+ "🔍 Analyze & Locate",
459
+ variant="primary",
460
+ size="lg"
 
 
 
 
461
  )
462
+
463
+ gr.Markdown("""
464
+ ### 💡 Tips:
465
+ - Outdoor images work best
466
+ - Street views are ideal
467
+ - Landmarks help accuracy
468
+ - Clear, well-lit photos
469
+
470
+ ### 🎯 Features:
471
+ - 3D interactive globe
472
+ - Flies to predicted location
473
+ - Pulsing marker animation
474
+ - Auto-rotating globe
475
+ """)
476
 
477
+ with gr.Column(scale=2):
478
+ with gr.Tabs():
479
+ with gr.Tab("🌐 3D Globe"):
480
+ globe_output = gr.HTML(
481
+ label="Interactive Globe",
482
+ elem_classes=["globe-container"]
483
+ )
484
+
485
+ with gr.Tab("📊 Details"):
486
+ info_output = gr.HTML(label="Location Info")
487
+ output_text = gr.Markdown(label="Analysis")
488
 
489
+ gr.Markdown("""
490
+ ---
491
+
492
+ ### 🎮 How It Works:
493
+
494
+ 1. **Upload** any image with visible location clues
495
+ 2. **AI analyzes** architecture, vegetation, signs, landscape
496
+ 3. **Globe flies** to the predicted location in 3D
497
+ 4. **Explore** the area with interactive controls
498
+
499
+ ### 🔬 Technology:
500
+ - **Vision Model:** Qwen2-VL-2B-Instruct
501
+ - **Training:** Reinforcement learning on 5M geotagged images
502
+ - **Visualization:** Three.js Globe.GL
503
+ - **Dataset:** OSV5M (OpenStreetView 5M)
504
+
505
+ ### 🚀 Use Cases:
506
+ - **OSINT Research** - Verify photo locations
507
+ - **Education** - Learn world geography
508
+ - **Travel** - Discover new places
509
+ - **Training** - Practice geolocation skills
510
+
511
+ ---
512
+
513
+ Built with ❤️ by AceXRoux | [GitHub](https://github.com/sdan/vlm-gym) | [LinkedIn](https://linkedin.com/in/your-profile)
514
+ """)
515
 
 
516
  predict_btn.click(
517
  fn=predict_location,
518
  inputs=image_input,
519
+ outputs=[output_text, globe_output, info_output]
520
  )
521
 
522
  if __name__ == "__main__":
523
+ print("🌍 Starting GeoVLM with 3D Globe...")
524
  load_model()
525
+ demo.launch(
526
+ server_name="0.0.0.0",
527
+ server_port=7860,
528
+ share=False
529
+ )