File size: 12,848 Bytes
3105e0c
0e56c4d
3105e0c
0e56c4d
 
 
3105e0c
 
 
 
 
 
 
 
 
 
0e56c4d
 
3105e0c
 
0e56c4d
 
 
3105e0c
 
 
 
 
0e56c4d
 
3105e0c
 
 
0e56c4d
3105e0c
 
0e56c4d
3105e0c
 
 
 
 
0e56c4d
 
3105e0c
0e56c4d
3105e0c
 
7cc7006
 
 
 
 
 
3105e0c
0e56c4d
 
 
3105e0c
 
0e56c4d
 
 
 
 
 
 
 
 
 
 
 
 
 
3105e0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e56c4d
 
 
3105e0c
 
 
 
 
 
 
 
 
 
 
 
 
 
0e56c4d
 
 
 
 
 
 
 
3105e0c
 
0e56c4d
3105e0c
0e56c4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3105e0c
 
 
0e56c4d
3105e0c
0e56c4d
 
 
 
 
 
3105e0c
0e56c4d
3105e0c
0e56c4d
 
 
 
 
3105e0c
 
0e56c4d
3105e0c
 
0e56c4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3105e0c
 
 
 
0e56c4d
3105e0c
 
 
0e56c4d
3105e0c
0e56c4d
 
3105e0c
 
 
 
 
0e56c4d
 
 
 
3105e0c
 
 
 
0e56c4d
 
3105e0c
 
0e56c4d
 
 
 
3105e0c
0e56c4d
3105e0c
0e56c4d
 
 
 
3105e0c
0e56c4d
 
3105e0c
0e56c4d
 
 
3105e0c
 
 
0e56c4d
3105e0c
 
 
0e56c4d
3105e0c
0e56c4d
 
3105e0c
 
0e56c4d
 
3105e0c
0e56c4d
 
3105e0c
0e56c4d
 
 
3105e0c
 
 
 
 
 
0e56c4d
3105e0c
 
 
 
0e56c4d
 
3105e0c
 
 
 
 
 
 
 
0e56c4d
 
3105e0c
 
0e56c4d
 
 
 
 
 
 
 
 
 
3105e0c
 
0e56c4d
 
 
 
 
3105e0c
 
 
 
 
 
0e56c4d
 
3105e0c
 
 
 
 
0e56c4d
 
3105e0c
 
 
 
 
0e56c4d
 
3105e0c
 
 
 
 
 
 
 
0e56c4d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
"""
Lanternfly Field Capture Space - Resilient GPS (V9)
A Gradio app for capturing photos with GPS coordinates and saving to Hugging Face datasets.

This version incorporates all debugging fixes: safe handling of empty input, 
resilient component selection, and relaxed GPS timeout settings.
"""

import gradio as gr
import os
import json
import uuid
from datetime import datetime
from PIL import Image
from huggingface_hub import HfApi, hf_hub_download, create_repo, file_exists, upload_file
import io
import time
import requests

# Configuration
# NOTE: Set HF_TOKEN environment variable in Colab or your Hugging Face Space settings.
HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HF_TOKEN_SPACE") 
DATASET_REPO = os.getenv("DATASET_REPO", "rlogh/lanternfly-data")

# Initialize HF API only if credentials are available
api = None
if HF_TOKEN and DATASET_REPO:
    try:
        # Initializing HfApi inside the running app environment
        api = HfApi(token=HF_TOKEN)
        create_repo(DATASET_REPO, repo_type="dataset", exist_ok=True, token=HF_TOKEN)
        print("βœ… Hugging Face credentials found - dataset saving enabled")
    except Exception as e:
        print(f"⚠️ Error initializing HF API: {e}")
        api = None
else:
    print("⚠️ Running in test mode - no HF credentials (dataset saving disabled)")

# Constants for file paths
METADATA_PATH = "metadata/entries.jsonl"
IMAGES_DIR = "images"

# --- Utility Functions ---

def get_current_time():
    """Get current timestamp in ISO format"""
    return datetime.now().isoformat()

def handle_time_capture():
    """Handle time capture and return status message and timestamp."""
    timestamp = get_current_time()
    status_msg = f"πŸ• **Time Captured**: {timestamp}"
    return status_msg, timestamp

def _append_jsonl_in_repo(new_row: dict) -> None:
    """Appends a JSON line to metadata/entries.jsonl in the dataset repo."""
    if not api: return 
    
    buf = io.BytesIO()
    existing_lines = []
    
    for i in range(3):
        try:
            if file_exists(DATASET_REPO, METADATA_PATH, repo_type="dataset", token=HF_TOKEN):
                local_path = hf_hub_download(
                    repo_id=DATASET_REPO, filename=METADATA_PATH,
                    repo_type="dataset", token=HF_TOKEN
                )
                with open(local_path, "r", encoding="utf-8") as f:
                    existing_lines = f.read().splitlines()
                break
        except Exception as e:
            if i == 2: raise e
            time.sleep(1 * (i + 1)) 

    existing_lines.append(json.dumps(new_row, ensure_ascii=False))
    data = "\n".join(existing_lines).encode("utf-8")
    buf.write(data); buf.seek(0)

    upload_file(
        path_or_fileobj=buf,
        path_in_repo=METADATA_PATH,
        repo_id=DATASET_REPO,
        repo_type="dataset",
        token=HF_TOKEN,
        commit_message=f"Append 1 entry at {datetime.now().isoformat()}Z",
    )

def _save_image_to_repo(pil_img: Image.Image, dest_rel_path: str) -> None:
    """Uploads a PIL image into the dataset repo."""
    if not api: return
    
    img_bytes = io.BytesIO()
    pil_img.save(img_bytes, format="JPEG", quality=90)
    img_bytes.seek(0)

    upload_file(
        path_or_fileobj=img_bytes,
        path_in_repo=dest_rel_path,
        repo_id=DATASET_REPO,
        repo_type="dataset",
        token=HF_TOKEN,
        commit_message=f"Upload image {dest_rel_path}",
    )

def handle_gps_location(json_str):
    """
    Handles GPS location data from JavaScript. 
    Includes V8 fix to prevent crash on empty initial input.
    """
    # V8 FIX: Ignore empty input from initial component change event
    if not json_str:
        return gr.NoAction(), gr.NoAction(), gr.NoAction(), gr.NoAction(), gr.NoAction()
    
    try:
        data = json.loads(json_str)

        if 'error' in data:
            # Map Geolocation API error codes for better user feedback
            error_map = {
                1: "Permission Denied (Check browser settings)",
                2: "Position Unavailable (Poor signal/network)",
                3: "Timeout Expired (Fix took too long)",
                0: "Geolocation not supported",
                'N/A': "Unknown Geolocation Error"
            }
            error_code = data.get('code', 'N/A')
            error_msg = error_map.get(error_code, data.get('error', 'Unknown Error'))
            
            gr.Warning(f"GPS Error: Code {error_code} ({error_msg})")

            status_msg = f"❌ **GPS Error (Code {error_code})**: {error_msg}"
            return status_msg, "N/A", "N/A", "N/A", get_current_time()

        lat = str(data.get('latitude', ''))
        lon = str(data.get('longitude', ''))
        accuracy = str(data.get('accuracy', ''))
        timestamp = str(data.get('timestamp', ''))
        
        try:
            acc_display = f"{float(accuracy):.1f}"
        except ValueError:
            acc_display = "N/A"

        status_msg = f"βœ… **GPS Captured**: {lat[:8]}, {lon[:8]} (accuracy: {acc_display}m)"
        return status_msg, lat, lon, accuracy, timestamp

    except Exception as e:
        # Catch unexpected JSON errors (shouldn't happen with V8 fix)
        status_msg = f"❌ **Error**: Failed to process GPS JSON: {str(e)}"
        gr.Error(status_msg)
        return status_msg, "Error", "Error", "Error", "Error"


def get_gps_js():
    """JavaScript for robust, manually-triggered GPS capture."""
    return """
    () => {
        // Find the specific hidden textarea within the component container 
        const container = document.querySelector('#hidden_gps_input');
        let textarea = null;

        if (container) {
            // Find the actual textarea element inside the Gradio wrapper
            textarea = container.querySelector('textarea');
        }

        if (!textarea) {
            console.error("DEBUG: Fatal: Hidden GPS textbox cannot be found.");
            return;
        }
        
        if (!navigator.geolocation) {
            console.error("DEBUG: Geolocation not supported by browser.");
            textarea.value = JSON.stringify({error: "Geolocation not supported", code: 0});
            textarea.dispatchEvent(new Event('input', { bubbles: true }));
            return;
        }
        
        console.log("DEBUG: Starting Geolocation request (60s timeout, low accuracy preferred).");

        navigator.geolocation.getCurrentPosition(
            function(position) {
                console.log("DEBUG: Geolocation SUCCESS.", position.coords);
                const data = {
                    latitude: position.coords.latitude,
                    longitude: position.coords.longitude,
                    accuracy: position.coords.accuracy,
                    timestamp: new Date(position.timestamp).toISOString()
                };
                
                textarea.value = JSON.stringify(data);
                textarea.dispatchEvent(new Event('input', { bubbles: true }));
            },
            function(err) {
                // Pass the error code back to Python
                console.error(`DEBUG: Geolocation FAILURE. Code: ${err.code}, Message: ${err.message}`);
                textarea.value = JSON.stringify({ error: err.message, code: err.code });
                textarea.dispatchEvent(new Event('input', { bubbles: true }));
            },
            // Options: enableHighAccuracy: false for faster fix, maximumAge for caching, 60s timeout
            { enableHighAccuracy: false, timeout: 60000, maximumAge: 5000 }
        );
    }
    """

def save_to_dataset(image, lat, lon, accuracy_m, device_ts):
    """Save image and metadata to Hugging Face dataset"""
    try:
        if image is None:
            return "❌ **Error**: No image captured. Please take a photo first.", ""
        if lat == "N/A" or lon == "N/A":
            return "❌ **Error**: GPS coordinates missing. Please click 'Get GPS' first.", ""

        # Test Mode Check (If API is not initialized)
        if not api:
            server_ts = datetime.now().isoformat()
            img_id = str(uuid.uuid4())
            timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
            row = {
                "id": img_id, "image": f"test_{timestamp_str}_{img_id[:8]}.jpg", 
                "latitude": float(lat) if lat != 'N/A' else None, "longitude": float(lon) if lon != 'N/A' else None, 
                "accuracy_m": accuracy_m, "device_timestamp": device_ts, 
                "server_timestamp_utc": server_ts, "notes": ""
            }
            status = f"πŸ” **Test Mode**: Data validated successfully! Sample {img_id[:8]}"
            preview = json.dumps(row, indent=2)
            return status, preview

        # Normal Save Process
        sample_id = str(uuid.uuid4())
        timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
        image_rel_path = f"{IMAGES_DIR}/lanternfly_{timestamp_str}_{sample_id[:8]}.jpg"

        _save_image_to_repo(image, image_rel_path)

        server_ts_utc = datetime.now().isoformat() + "Z"

        row = {
            "id": sample_id, "image": image_rel_path,
            "latitude": float(lat), "longitude": float(lon),
            "accuracy_m": float(accuracy_m),
            "device_timestamp": device_ts,
            "server_timestamp_utc": server_ts_utc,
            "location": f"{lat}, {lon}",
            "notes": ""
        }

        _append_jsonl_in_repo(row)

        status = (
            "βœ… **Success!** Saved to dataset!\n\n"
            f"- Image: `{image_rel_path}`\n"
            f"- Lat/Lon: {row['latitude']}, {row['longitude']} (Β±{row['accuracy_m']} m)"
        )
        preview = json.dumps(row, indent=2)
        return status, preview

    except Exception as e:
        error_msg = f"❌ **Critical Save Error**: {str(e)}"
        gr.Error(error_msg)
        return error_msg, ""

# --- Gradio Interface ---

with gr.Blocks(title="Lanternfly Field Capture") as app:
    gr.Markdown("# πŸ¦‹ Lanternfly Field Capture (Resilient GPS)")
    gr.Markdown("Click **'πŸ“ Get GPS'** to capture location. **You must allow location permission** in your browser.")
    
    # Hidden input for GPS data - MUST retain this ID for the JavaScript selector to work
    hidden_gps_input = gr.Textbox(visible=False, elem_id="hidden_gps_input")

    with gr.Row():
        with gr.Column(scale=1):
            # Camera input
            camera = gr.Image(
                streaming=False,
                height=380,
                label="πŸ“· Capture or Upload Photo",
                type="pil",
                sources=["webcam", "upload"]
            )
            
            # --- GPS Capture Section ---
            gr.Markdown("### πŸ“ Location Capture")
            
            # GPS capture button
            gps_btn = gr.Button("πŸ“ Get GPS", variant="primary")
            
            # Time capture button
            time_btn = gr.Button("πŸ• Get Current Time", variant="secondary")
            
            # Save button
            save_btn = gr.Button("πŸ’Ύ Save Photo and Data to Dataset", variant="stop")

        with gr.Column(scale=1):
            # Status display
            status = gr.Markdown("πŸ”„ **Ready to capture data...**")
            
            # Location Data Fields
            gr.Markdown("### Captured Data")
            with gr.Row():
                lat_box = gr.Textbox(label="Latitude", interactive=True, value="N/A")
                lon_box = gr.Textbox(label="Longitude", interactive=True, value="N/A")
            with gr.Row():
                accuracy_box = gr.Textbox(label="Accuracy (meters)", interactive=True, value="N/A")
                device_ts_box = gr.Textbox(label="Device Timestamp", interactive=True, value="N/A")
            
            # Preview JSON
            preview = gr.JSON(label="Preview Data Payload", visible=True)

    # --- Event Handlers ---

    # 1. GPS Button Click triggers JavaScript injection to run Geolocation API
    gps_btn.click(
        fn=None,
        inputs=[],
        outputs=[],
        js=get_gps_js()
    )

    # 2. Hidden GPS input change triggers Python backend processing (safe from empty string crash)
    hidden_gps_input.change(
        fn=handle_gps_location,
        inputs=[hidden_gps_input],
        outputs=[status, lat_box, lon_box, accuracy_box, device_ts_box]
    )

    # 3. Time Button Click
    time_btn.click(
        fn=handle_time_capture,
        inputs=[],
        outputs=[status, device_ts_box]
    )

    # 4. Save button click
    save_btn.click(
        fn=save_to_dataset,
        inputs=[camera, lat_box, lon_box, accuracy_box, device_ts_box],
        outputs=[status, preview]
    )

# Launch the app
if __name__ == "__main__":
    # In Colab, share=True is mandatory for HTTPS
    app.launch(share=True)