ddecosmo commited on
Commit
3105e0c
Β·
verified Β·
1 Parent(s): cbf2680

Upload app (5).py

Browse files
Files changed (1) hide show
  1. app (5).py +340 -0
app (5).py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Lanternfly Field Capture Space
3
+ A Gradio app for capturing photos with GPS coordinates and saving to Hugging Face datasets.
4
+ """
5
+
6
+ import gradio as gr
7
+ import os
8
+ import json
9
+ import uuid
10
+ from datetime import datetime
11
+ from PIL import Image
12
+ from huggingface_hub import HfApi, hf_hub_download, create_repo, file_exists, upload_file
13
+ import io
14
+
15
+ # Configuration
16
+ HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HF_TOKEN_SPACE") # Try both token sources
17
+ DATASET_REPO = os.getenv("DATASET_REPO", "rlogh/lanternfly-data") # Default to your dataset
18
+
19
+ # Initialize HF API only if credentials are available
20
+ api = None
21
+ if HF_TOKEN and DATASET_REPO:
22
+ api = HfApi(token=HF_TOKEN)
23
+ # Ensure dataset repo exists (idempotent)
24
+ try:
25
+ create_repo(DATASET_REPO, repo_type="dataset", exist_ok=True, token=HF_TOKEN)
26
+ print("βœ… Hugging Face credentials found - dataset saving enabled")
27
+ except Exception as e:
28
+ print(f"⚠️ Error creating dataset repo: {e}")
29
+ api = None
30
+ else:
31
+ print("⚠️ Running in test mode - no HF credentials (dataset saving disabled)")
32
+
33
+ # Constants for file paths
34
+ METADATA_PATH = "metadata/entries.jsonl"
35
+ IMAGES_DIR = "images"
36
+
37
+ def get_current_time():
38
+ """Get current timestamp"""
39
+ return datetime.now().isoformat()
40
+
41
+ def _append_jsonl_in_repo(new_row: dict) -> None:
42
+ """
43
+ Appends a JSON line to metadata/entries.jsonl in the dataset repo.
44
+ Downloads the existing file (if any), appends, and uploads back.
45
+ """
46
+ # Create a temp file in memory to reupload
47
+ # First, try to download existing entries.jsonl (if present)
48
+ buf = io.BytesIO()
49
+ existing_lines = []
50
+ try:
51
+ if file_exists(DATASET_REPO, METADATA_PATH, repo_type="dataset", token=HF_TOKEN):
52
+ local_path = hf_hub_download(
53
+ repo_id=DATASET_REPO, filename=METADATA_PATH,
54
+ repo_type="dataset", token=HF_TOKEN
55
+ )
56
+ with open(local_path, "r", encoding="utf-8") as f:
57
+ existing_lines = f.read().splitlines()
58
+ except Exception:
59
+ # If download fails for any reason, proceed as if file doesn't exist
60
+ existing_lines = []
61
+
62
+ # Append our new row
63
+ existing_lines.append(json.dumps(new_row, ensure_ascii=False))
64
+
65
+ # Write back to buffer
66
+ data = "\n".join(existing_lines).encode("utf-8")
67
+ buf.write(data); buf.seek(0)
68
+
69
+ # Upload to the same path (commit creates or updates the file)
70
+ upload_file(
71
+ path_or_fileobj=buf,
72
+ path_in_repo=METADATA_PATH,
73
+ repo_id=DATASET_REPO,
74
+ repo_type="dataset",
75
+ token=HF_TOKEN,
76
+ commit_message=f"Append 1 entry at {datetime.now().isoformat()}Z",
77
+ )
78
+
79
+ def _save_image_to_repo(pil_img: Image.Image, dest_rel_path: str) -> None:
80
+ """
81
+ Uploads a PIL image into the dataset repo (e.g., images/<uuid>.jpg).
82
+ """
83
+ img_bytes = io.BytesIO()
84
+ pil_img.save(img_bytes, format="JPEG", quality=90)
85
+ img_bytes.seek(0)
86
+
87
+ upload_file(
88
+ path_or_fileobj=img_bytes,
89
+ path_in_repo=dest_rel_path,
90
+ repo_id=DATASET_REPO,
91
+ repo_type="dataset",
92
+ token=HF_TOKEN,
93
+ commit_message=f"Upload image {dest_rel_path}",
94
+ )
95
+
96
+ def handle_time_capture():
97
+ """Handle time capture and return status message"""
98
+ timestamp = get_current_time()
99
+ status_msg = f"πŸ• **Time Captured**: {timestamp}"
100
+ return status_msg, timestamp
101
+
102
+ def handle_gps_location(json_str):
103
+ """Handle GPS location data from JavaScript and return values for the textboxes"""
104
+ try:
105
+ data = json.loads(json_str)
106
+ if 'error' in data:
107
+ status_msg = f"❌ **GPS Error**: {data['error']}"
108
+ return status_msg, data['error'], "", "", ""
109
+
110
+ lat = str(data.get('latitude', ''))
111
+ lon = str(data.get('longitude', ''))
112
+ accuracy = str(data.get('accuracy', ''))
113
+ timestamp = data.get('timestamp', '')
114
+
115
+ # Convert timestamp to ISO string if it's a number
116
+ if timestamp and isinstance(timestamp, (int, float)):
117
+ from datetime import datetime
118
+ timestamp = datetime.fromtimestamp(timestamp / 1000).isoformat()
119
+
120
+ status_msg = f"βœ… **GPS Captured**: {lat[:8]}, {lon[:8]} (accuracy: {accuracy}m)"
121
+ return status_msg, lat, lon, accuracy, timestamp
122
+
123
+ except Exception as e:
124
+ status_msg = f"❌ **Error**: {str(e)}"
125
+ return status_msg, f"Error parsing GPS data: {str(e)}", "", "", ""
126
+
127
+ def get_gps_js():
128
+ """JavaScript for GPS capture using hidden textbox approach"""
129
+ return """
130
+ () => {
131
+ // find the textarea element inside Gradio textbox by its elem_id
132
+ const textarea = document.querySelector('#hidden_gps_input textarea');
133
+ if (!textarea) {
134
+ console.log("Hidden GPS textbox not found");
135
+ return;
136
+ }
137
+ if (!navigator.geolocation) {
138
+ textarea.value = JSON.stringify({error: "Geolocation not supported"});
139
+ textarea.dispatchEvent(new Event('input', { bubbles: true }));
140
+ return;
141
+ }
142
+ navigator.geolocation.getCurrentPosition(
143
+ function(position) {
144
+ const data = {
145
+ latitude: position.coords.latitude,
146
+ longitude: position.coords.longitude,
147
+ accuracy: position.coords.accuracy,
148
+ timestamp: new Date().toISOString()
149
+ };
150
+ textarea.value = JSON.stringify(data);
151
+ // dispatch 'input' event so Gradio notices the change
152
+ textarea.dispatchEvent(new Event('input', { bubbles: true }));
153
+ },
154
+ function(err) {
155
+ textarea.value = JSON.stringify({ error: err.message });
156
+ textarea.dispatchEvent(new Event('input', { bubbles: true }));
157
+ },
158
+ { enableHighAccuracy: true, timeout: 10000 }
159
+ );
160
+ }
161
+ """
162
+
163
+ def save_to_dataset(image, lat, lon, accuracy_m, device_ts):
164
+ """
165
+ Save image and metadata to Hugging Face dataset
166
+
167
+ Args:
168
+ image: PIL Image object
169
+ lat: latitude as string
170
+ lon: longitude as string
171
+ accuracy_m: accuracy in meters as string
172
+ device_ts: device timestamp as string
173
+
174
+ Returns:
175
+ tuple: (status_markdown, preview_json)
176
+ """
177
+ try:
178
+ # Validate inputs
179
+ if image is None:
180
+ return "❌ **Error**: No image captured. Please take a photo first.", ""
181
+
182
+ # πŸ”’ Ensure PIL.Image
183
+ try:
184
+ from PIL import Image as _PILImage
185
+ import numpy as _np
186
+ if isinstance(image, _np.ndarray):
187
+ image = _PILImage.fromarray(image)
188
+ except Exception:
189
+ pass
190
+
191
+ if not lat or not lon:
192
+ return "❌ **Error**: GPS coordinates missing. Please click 'Get GPS' first.", ""
193
+
194
+ # Check if running in test mode
195
+ if not api:
196
+ # Test mode - just preview the data without saving
197
+ server_ts = datetime.now().isoformat()
198
+ img_id = str(uuid.uuid4())
199
+
200
+ timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
201
+ row = {
202
+ "id": img_id,
203
+ "image": f"lanternfly_{timestamp_str}_{img_id[:8]}.jpg",
204
+ "latitude": float(lat) if lat else None,
205
+ "longitude": float(lon) if lon else None,
206
+ "accuracy_m": float(accuracy_m) if accuracy_m else None,
207
+ "device_timestamp": device_ts if device_ts else None,
208
+ "server_timestamp_utc": server_ts,
209
+ "notes": ""
210
+ }
211
+
212
+ status = f"πŸ” **Test Mode**: Data validated successfully! Sample {img_id[:8]}"
213
+ preview = json.dumps(row, indent=2)
214
+ return status, preview
215
+
216
+ # Build a unique ID and paths with timestamp for better visibility
217
+ sample_id = str(uuid.uuid4())
218
+ timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
219
+ image_rel_path = f"lanternfly_{timestamp_str}_{sample_id[:8]}.jpg"
220
+
221
+ # Save image first
222
+ try:
223
+ _save_image_to_repo(image, image_rel_path)
224
+ except Exception as e:
225
+ return f"❌ **Error**: Failed to upload image: {e}", ""
226
+
227
+ # Server UTC timestamp
228
+ server_ts_utc = datetime.now().isoformat() + "Z"
229
+
230
+ # Construct metadata row
231
+ row = {
232
+ "id": sample_id,
233
+ "image": image_rel_path, # This will be visible in dataset viewer
234
+ "latitude": float(lat) if lat else None,
235
+ "longitude": float(lon) if lon else None,
236
+ "accuracy_m": float(accuracy_m) if accuracy_m else None,
237
+ "device_timestamp": device_ts if device_ts else None,
238
+ "server_timestamp_utc": server_ts_utc,
239
+ "location": f"{lat}, {lon}" if lat and lon else None, # Human-readable location
240
+ "notes": "" # placeholder for future labels
241
+ }
242
+
243
+ # Append metadata row
244
+ try:
245
+ _append_jsonl_in_repo(row)
246
+ except Exception as e:
247
+ return f"❌ **Error**: Image uploaded, but failed to append metadata: {e}", ""
248
+
249
+ # Return success message and preview
250
+ status = (
251
+ "βœ… **Success!** Saved to dataset!\n\n"
252
+ f"- Image: `{image_rel_path}`\n"
253
+ f"- Lat/Lon: {row['latitude']}, {row['longitude']} (Β±{row['accuracy_m']} m)\n"
254
+ f"- Server time (UTC): {server_ts_utc}"
255
+ )
256
+ preview = json.dumps(row, indent=2)
257
+
258
+ return status, preview
259
+
260
+ except Exception as e:
261
+ error_msg = f"❌ **Error**: {str(e)}"
262
+ return error_msg, ""
263
+
264
+ # Create Gradio interface
265
+ with gr.Blocks(title="Lanternfly Field Capture") as app:
266
+ gr.Markdown("# πŸ¦‹ Lanternfly Field Capture")
267
+ gr.Markdown("Capture photos with GPS coordinates for field research data collection.")
268
+
269
+ with gr.Row():
270
+ with gr.Column(scale=1):
271
+ # Camera input
272
+ camera = gr.Image(
273
+ streaming=False,
274
+ height=380,
275
+ label="πŸ“· Upload Photo (or use camera)",
276
+ type="pil",
277
+ sources=["webcam", "upload"]
278
+ )
279
+
280
+ # GPS data capture
281
+ gr.Markdown("### πŸ“ GPS Coordinates")
282
+ gr.Markdown("Click the button below to automatically capture your location and timestamp.")
283
+
284
+ # GPS capture button
285
+ gps_btn = gr.Button("πŸ“ Get GPS", variant="primary")
286
+
287
+ # Hidden input for GPS data
288
+ hidden_gps_input = gr.Textbox(visible=False, elem_id="hidden_gps_input")
289
+
290
+ with gr.Row():
291
+ lat_box = gr.Textbox(label="Latitude", interactive=True, elem_id="lat")
292
+ lon_box = gr.Textbox(label="Longitude", interactive=True, elem_id="lon")
293
+
294
+ with gr.Row():
295
+ accuracy_box = gr.Textbox(label="Accuracy (meters)", interactive=True, elem_id="accuracy")
296
+ device_ts_box = gr.Textbox(label="Device Timestamp", interactive=True, elem_id="device_ts")
297
+
298
+ # Time capture button
299
+ time_btn = gr.Button("πŸ• Get Current Time", variant="secondary")
300
+
301
+ # Save button
302
+ save_btn = gr.Button("πŸ’Ύ Save to Dataset", variant="secondary")
303
+
304
+ with gr.Column(scale=1):
305
+ # Status display
306
+ status = gr.Markdown("πŸ”„ **Ready to capture data...** Click 'Get GPS' to start or upload a photo.")
307
+
308
+ # Preview JSON
309
+ preview = gr.JSON(label="Preview JSON", visible=True)
310
+
311
+ # Event handlers
312
+ gps_btn.click(
313
+ fn=None,
314
+ inputs=[],
315
+ outputs=[],
316
+ js=get_gps_js()
317
+ )
318
+
319
+ # When the hidden GPS input changes, populate the visible fields
320
+ hidden_gps_input.change(
321
+ fn=handle_gps_location,
322
+ inputs=[hidden_gps_input],
323
+ outputs=[status, lat_box, lon_box, accuracy_box, device_ts_box]
324
+ )
325
+
326
+ time_btn.click(
327
+ fn=handle_time_capture,
328
+ inputs=[],
329
+ outputs=[status, device_ts_box]
330
+ )
331
+
332
+ save_btn.click(
333
+ fn=save_to_dataset,
334
+ inputs=[camera, lat_box, lon_box, accuracy_box, device_ts_box],
335
+ outputs=[status, preview]
336
+ )
337
+
338
+ # Launch the app
339
+ if __name__ == "__main__":
340
+ app.launch()