alyex commited on
Commit
8fd2b13
·
verified ·
1 Parent(s): dbd477d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -731
app.py CHANGED
@@ -1,747 +1,55 @@
 
1
  import gradio as gr
2
- import json
3
  import os
4
- import pickle
 
5
  from PIL import Image
6
- import tempfile
7
- import time
8
- from pathlib import Path
9
- import hashlib
10
  import io
11
- import shutil
12
-
13
- # Hugging Face imports
14
- from datasets import load_dataset
15
- import rarfile
16
 
17
- # ========== CONFIGURATION ==========
18
- HF_TOKEN = os.getenv("HF_TOKEN") # Set in Space secrets
19
- HF_DATASET_REPO = "alyex/karnak-data-app" # Your new dataset with images
20
 
21
- # Directories
22
- HF_CACHE_DIR = os.path.join(tempfile.gettempdir(), "karnak_cache")
23
- HF_ANNOTATIONS_DIR = "/data/annotations"
24
- HF_HARD_NEGATIVES_DIR = "/data/hard_negatives"
25
- SEGMENT_EXTRACT_DIR = "/data/kiu_segment_metadata"
26
-
27
- os.makedirs(HF_CACHE_DIR, exist_ok=True)
28
- os.makedirs(HF_ANNOTATIONS_DIR, exist_ok=True)
29
- os.makedirs(HF_HARD_NEGATIVES_DIR, exist_ok=True)
30
- os.makedirs(SEGMENT_EXTRACT_DIR, exist_ok=True)
31
-
32
- MANIFEST_FILE = os.path.join(HF_CACHE_DIR, "manifest.pkl")
33
-
34
- class AnnotationState:
35
- def __init__(self):
36
- self.manifest = []
37
- self.current_index = 0
38
- self.history = []
39
- self.karnak_dataset = None
40
- self.segment_data = {} # Will hold kiu_segment_metadata
41
-
42
- # Load datasets
43
- self.load_datasets()
44
- # Load manifest
45
- self.load_manifest()
46
 
47
- def load_datasets(self):
48
- """Load both datasets"""
49
- print("Loading Karnak dataset...")
50
- try:
51
- self.karnak_dataset = load_dataset(HF_DATASET_REPO, streaming=True)
52
- print(f"✅ Karnak dataset loaded")
53
- except Exception as e:
54
- print(f"❌ Error loading dataset: {e}")
55
- self.karnak_dataset = None
56
-
57
- print("Loading segment metadata from local Space...")
58
- self.load_segment_metadata_local()
59
 
60
- def load_segment_metadata_local(self):
61
- """Load segment metadata from local RAR file in Space repo"""
62
- RAR_FILENAME = "kiu_segment_metadata.rar"
63
- JSON_FILENAME = "kiu_segment_metadata.json"
64
-
65
- print(f"Looking for {RAR_FILENAME} in current directory...")
66
-
67
- # Check if RAR exists
68
- if not os.path.exists(RAR_FILENAME):
69
- print(f"❌ {RAR_FILENAME} not found in Space files.")
70
- print(f"Available files: {os.listdir('.')}")
71
- return
72
-
73
- print(f"✅ Found {RAR_FILENAME}")
74
-
75
- # Check if already extracted
76
- json_path = os.path.join(SEGMENT_EXTRACT_DIR, JSON_FILENAME)
77
- if os.path.exists(json_path):
78
- print(f"Loading cached metadata from {json_path}")
79
- try:
80
- with open(json_path, 'r') as f:
81
- self.segment_data = json.load(f)
82
- print(f"✅ Loaded {len(self.segment_data)} segment entries")
83
- return
84
- except Exception as e:
85
- print(f"Error loading cached JSON: {e}")
86
 
87
- # Extract from RAR
88
  try:
89
- print(f"Extracting {RAR_FILENAME}...")
90
- with rarfile.RarFile(RAR_FILENAME) as rf:
91
- # Find JSON file in RAR
92
- json_file_in_rar = None
93
- for file_info in rf.infolist():
94
- if 'kiu_segment_metadata' in file_info.filename and file_info.filename.endswith('.json'):
95
- json_file_in_rar = file_info.filename
96
- break
97
-
98
- if json_file_in_rar:
99
- print(f"Found JSON file: {json_file_in_rar}")
100
- rf.extract(json_file_in_rar, SEGMENT_EXTRACT_DIR)
101
-
102
- extracted_path = os.path.join(SEGMENT_EXTRACT_DIR, json_file_in_rar)
103
-
104
- # Load the data
105
- with open(extracted_path, 'r') as f:
106
- self.segment_data = json.load(f)
107
 
108
- print(f"✅ Loaded {len(self.segment_data)} segment entries")
 
 
109
 
110
- # Rename to standard name
111
- if extracted_path != json_path:
112
- shutil.move(extracted_path, json_path)
113
- else:
114
- print("❌ No JSON file found in RAR archive")
115
- print("Files in RAR:")
116
- for file_info in rf.infolist():
117
- print(f" - {file_info.filename}")
118
-
119
- except Exception as e:
120
- print(f"❌ Error extracting RAR: {e}")
121
- import traceback
122
- traceback.print_exc()
123
-
124
- def load_manifest(self):
125
- """Build manifest from segment metadata"""
126
- if not self.segment_data:
127
- print("❌ No segment data available")
128
- self.manifest = []
129
- return
130
-
131
- print("Building manifest from segment metadata...")
132
- self.manifest = []
133
-
134
- # The segment_data is a dict where keys are like "KIU_00001_segments"
135
- for key, segment_info in self.segment_data.items():
136
- if not isinstance(segment_info, dict):
137
- continue
138
-
139
- kiu_id = segment_info.get('kiu_id')
140
- if not kiu_id:
141
- # Try to extract from filename
142
- if 'KIU_' in key:
143
- try:
144
- kiu_id = key.split('_')[1] # Extract number from "KIU_00001_segments"
145
- except:
146
- continue
147
-
148
- instances = segment_info.get('instances', [])
149
-
150
- for instance in instances:
151
- if instance.get('class') == 'Line':
152
- manifest_item = {
153
- 'kiu_id': str(kiu_id).zfill(5),
154
- 'instance_id': instance.get('instance_id'),
155
- 'crop_coords': instance.get('crop_coords', [0, 0, 100, 100]),
156
- 'direction': instance.get('direction', ''),
157
- 'annotated': instance.get('annotated', False),
158
- 'line_number': instance.get('line_number'),
159
- 'segment_key': key,
160
- 'segment_info': segment_info
161
- }
162
- self.manifest.append(manifest_item)
163
-
164
- print(f"✅ Built manifest with {len(self.manifest)} instances")
165
-
166
- # Save to cache
167
- with open(MANIFEST_FILE, 'wb') as f:
168
- pickle.dump(self.manifest, f)
169
-
170
- self.current_index = self.find_first_unannotated()
171
-
172
- def get_kiu_data(self, kiu_id):
173
- """Get HTML and image for a KIU from the dataset"""
174
- if not self.karnak_dataset:
175
- return None, None
176
-
177
- kiu_id_str = str(kiu_id).zfill(5)
178
-
179
- # Search in the streaming dataset
180
- try:
181
- for item in self.karnak_dataset["train"]:
182
- if item["kiu_id"] == kiu_id_str:
183
- # Convert image bytes to PIL Image
184
- if item["image"]:
185
- try:
186
- image = Image.open(io.BytesIO(item["image"]))
187
- return item["html"], image
188
- except Exception as e:
189
- print(f"Error converting image for KIU {kiu_id_str}: {e}")
190
- return item["html"], None
191
- except Exception as e:
192
- print(f"Error streaming dataset for KIU {kiu_id_str}: {e}")
193
-
194
- return None, None
195
-
196
- def needs_annotation(self, item):
197
- """Check if item needs annotation"""
198
- if not item.get('annotated', False):
199
- return True
200
-
201
- direction = item.get('direction')
202
- if direction == 'TTB_NEEDS_RECLASSIFICATION':
203
- return True
204
-
205
- return False
206
-
207
- def find_first_unannotated(self):
208
- """Find first item that needs annotation"""
209
- for idx, item in enumerate(self.manifest):
210
- if self.needs_annotation(item):
211
- return idx
212
- return 0
213
-
214
- def find_next_unannotated(self, from_index=None):
215
- """Find next item that needs annotation"""
216
- if not self.manifest:
217
- return 0
218
-
219
- start_idx = from_index if from_index is not None else self.current_index
220
-
221
- # Search forward
222
- for idx in range(start_idx + 1, len(self.manifest)):
223
- if self.needs_annotation(self.manifest[idx]):
224
- return idx
225
-
226
- # Wrap around
227
- for idx in range(0, start_idx):
228
- if self.needs_annotation(self.manifest[idx]):
229
- return idx
230
-
231
- return start_idx
232
-
233
- def get_statistics(self):
234
- """Calculate statistics"""
235
- if not self.manifest:
236
- return {'total': 0, 'remaining': 0, 'processed': 0, 'progress_pct': 0}
237
-
238
- total = len(self.manifest)
239
- remaining = sum(1 for item in self.manifest if self.needs_annotation(item))
240
- processed = total - remaining
241
- progress_pct = (processed / total * 100) if total > 0 else 0
242
-
243
- # Count by direction
244
- ltr = sum(1 for item in self.manifest if item.get('direction') == 'LTR')
245
- rtl = sum(1 for item in self.manifest if item.get('direction') == 'RTL')
246
- ttb_ltr = sum(1 for item in self.manifest if item.get('direction') == 'TTB_LTR')
247
- ttb_rtl = sum(1 for item in self.manifest if item.get('direction') == 'TTB_RTL')
248
- skip = sum(1 for item in self.manifest if item.get('direction') == 'Skip')
249
- unclear = sum(1 for item in self.manifest if item.get('direction') == 'Unclear')
250
- ttb_reclass = sum(1 for item in self.manifest if item.get('direction') == 'TTB_NEEDS_RECLASSIFICATION')
251
-
252
- return {
253
- 'total': total,
254
- 'remaining': remaining,
255
- 'processed': processed,
256
- 'progress_pct': progress_pct,
257
- 'ltr': ltr,
258
- 'rtl': rtl,
259
- 'ttb_ltr': ttb_ltr,
260
- 'ttb_rtl': ttb_rtl,
261
- 'skip': skip,
262
- 'unclear': unclear,
263
- 'ttb_reclass': ttb_reclass
264
- }
265
-
266
- def get_annotation_file_path(self, item):
267
- """Get path for annotation file"""
268
- kiu_id = item['kiu_id']
269
- instance_id = item['instance_id']
270
- filename = f"KIU_{kiu_id}_instance_{instance_id}.json"
271
- return os.path.join(HF_ANNOTATIONS_DIR, filename)
272
-
273
- def save_annotation(self, item, direction, line_number):
274
- """Save annotation to persistent storage"""
275
- annotation_file = self.get_annotation_file_path(item)
276
-
277
- annotation_data = {
278
- 'kiu_id': item['kiu_id'],
279
- 'instance_id': item['instance_id'],
280
- 'direction': direction,
281
- 'annotated': True,
282
- 'line_number': line_number if line_number and line_number.strip() else None,
283
- 'timestamp': time.time(),
284
- 'crop_coords': item['crop_coords'],
285
- 'segment_key': item.get('segment_key', '')
286
- }
287
-
288
- with open(annotation_file, 'w', encoding='utf-8') as f:
289
- json.dump(annotation_data, f, indent=2, ensure_ascii=False)
290
-
291
- return annotation_file
292
-
293
- # Global state
294
- state = AnnotationState()
295
-
296
- def load_current_instance():
297
- """Load and display current instance"""
298
- if not state.manifest or state.current_index >= len(state.manifest):
299
- return None, "", "No items available", ""
300
-
301
- item = state.manifest[state.current_index]
302
- kiu_id = item['kiu_id']
303
-
304
- # Get HTML and image from dataset
305
- html_content, full_image = state.get_kiu_data(kiu_id)
306
-
307
- # Crop the image
308
- cropped = None
309
- if full_image:
310
- try:
311
- x1, y1, x2, y2 = map(int, item['crop_coords'])
312
- x1, y1 = max(0, x1), max(0, y1)
313
- x2, y2 = min(full_image.width, x2), min(full_image.height, y2)
314
 
315
- if x2 > x1 and y2 > y1:
316
- cropped = full_image.crop((x1, y1, x2, y2))
317
- if cropped.height > 800:
318
- ratio = 800 / cropped.height
319
- new_width = int(cropped.width * ratio)
320
- cropped = cropped.resize((new_width, 800), Image.LANCZOS)
321
  except Exception as e:
322
- print(f"Error cropping image: {e}")
323
-
324
- # Format HTML display
325
- if html_content:
326
- html_display = f"""
327
- <div style="padding: 10px; background: #f5f5f5; border-radius: 5px; max-height: 300px; overflow-y: auto;">
328
- <strong>KIU {kiu_id}</strong>
329
- <hr style="margin: 5px 0;">
330
- <div style="font-size: 12px;">
331
- {html_content[:2000]}{'...' if len(html_content) > 2000 else ''}
332
- </div>
333
- </div>
334
- """
335
- else:
336
- html_display = f"<div style='padding: 10px; background: #fff3cd;'>No HTML for KIU {kiu_id}</div>"
337
-
338
- # Load existing annotation
339
- annotation_file = state.get_annotation_file_path(item)
340
- line_num = ""
341
- if os.path.exists(annotation_file):
342
- try:
343
- with open(annotation_file, 'r') as f:
344
- annotation = json.load(f)
345
- line_num = str(annotation.get('line_number', '')) if annotation.get('line_number') else ""
346
- except:
347
- pass
348
 
349
- # Info display
350
- stats = state.get_statistics()
351
- direction = item.get('direction', '')
352
- annotated = item.get('annotated', False)
353
-
354
- # Determine status
355
- if annotated:
356
- if direction in ['LTR', 'RTL', 'TTB_LTR', 'TTB_RTL']:
357
- status = f"✅ {direction}"
358
- elif direction == 'TTB_NEEDS_RECLASSIFICATION':
359
- status = "⚠️ NEEDS RECLASS"
360
- elif direction == 'Skip':
361
- status = "⏭️ Skipped"
362
- elif direction == 'Unclear':
363
- status = "❓ Unclear"
364
- else:
365
- status = f"✅ {direction}"
366
- else:
367
- status = "⏳ Pending"
368
-
369
- line_info = f" | Line #{line_num}" if line_num else ""
370
-
371
- info = f"""
372
- **{state.current_index + 1} / {stats['total']}** | KIU {kiu_id} | Instance {item['instance_id']}{line_info} | {status}
373
-
374
- **Progress:** {stats['processed']}/{stats['total']} ({stats['progress_pct']:.1f}%) | **Left:** {stats['remaining']}
375
 
376
- **Counts:** LTR: {stats['ltr']} | RTL: {stats['rtl']} | TTB+LTR: {stats['ttb_ltr']} | TTB+RTL: {stats['ttb_rtl']}
377
- Skipped: {stats['skip']} | Unclear: {stats['unclear']} | Needs Reclass: {stats['ttb_reclass']}
378
- """
379
-
380
- return cropped, html_display, info, line_num
381
-
382
- def save_annotation_to_state(direction, line_number):
383
- """Save annotation"""
384
- if not state.manifest or state.current_index >= len(state.manifest):
385
- return False
386
-
387
- item = state.manifest[state.current_index]
388
-
389
- try:
390
- # Save to persistent storage
391
- annotation_file = state.save_annotation(item, direction, line_number)
392
-
393
- # Update manifest
394
- item['direction'] = direction
395
- item['annotated'] = True
396
- if line_number and line_number.strip():
397
- try:
398
- item['line_number'] = int(line_number.strip())
399
- except:
400
- pass
401
-
402
- # Update manifest cache
403
- with open(MANIFEST_FILE, 'wb') as f:
404
- pickle.dump(state.manifest, f)
405
-
406
- print(f"✅ Annotation saved to {annotation_file}")
407
- return True
408
- except Exception as e:
409
- print(f"Error saving: {e}")
410
- return False
411
-
412
- def annotate_and_next(direction, line_number):
413
- """Annotate current instance and jump to next unannotated"""
414
- if not state.manifest:
415
- return None, "", "No items available", ""
416
-
417
- # Save to history
418
- state.history.append({
419
- 'index': state.current_index,
420
- 'item': state.manifest[state.current_index].copy()
421
- })
422
-
423
- # Save annotation
424
- save_annotation_to_state(direction, line_number)
425
-
426
- # Jump to next unannotated
427
- next_idx = state.find_next_unannotated()
428
- state.current_index = next_idx
429
-
430
- return load_current_instance()
431
-
432
- def undo_last():
433
- """Undo last annotation"""
434
- if not state.manifest or not state.history:
435
- return load_current_instance()
436
-
437
- last = state.history.pop()
438
- state.current_index = last['index']
439
-
440
- item = state.manifest[state.current_index]
441
-
442
- try:
443
- # Get annotation file path
444
- annotation_file = state.get_annotation_file_path(item)
445
-
446
- if os.path.exists(annotation_file):
447
- os.remove(annotation_file)
448
-
449
- # Restore manifest
450
- state.manifest[state.current_index] = last['item']
451
-
452
- # Update manifest cache
453
- with open(MANIFEST_FILE, 'wb') as f:
454
- pickle.dump(state.manifest, f)
455
-
456
- except Exception as e:
457
- print(f"Error undoing: {e}")
458
-
459
- return load_current_instance()
460
-
461
- def flag_hard_negative(line_number):
462
- """Flag current instance as hard negative"""
463
- if not state.manifest or state.current_index >= len(state.manifest):
464
- return load_current_instance()
465
-
466
- item = state.manifest[state.current_index]
467
-
468
- try:
469
- # Create hard negative directory
470
- instance_dir_name = f"KIU_{item['kiu_id']}_instance_{item['instance_id']}"
471
- instance_dir = os.path.join(HF_HARD_NEGATIVES_DIR, instance_dir_name)
472
- os.makedirs(instance_dir, exist_ok=True)
473
-
474
- # Get artifact image
475
- html, full_image = state.get_kiu_data(item['kiu_id'])
476
- if full_image:
477
- # Crop the full image
478
- x1, y1, x2, y2 = map(int, item['crop_coords'])
479
- x1, y1 = max(0, x1), max(0, y1)
480
- x2, y2 = min(full_image.width, x2), min(full_image.height, y2)
481
-
482
- if x2 > x1 and y2 > y1:
483
- cropped = full_image.crop((x1, y1, x2, y2))
484
- cropped.save(os.path.join(instance_dir, "hard_negative.jpg"), "JPEG")
485
-
486
- # Save metadata
487
- metadata = {
488
- 'kiu_id': item['kiu_id'],
489
- 'instance_id': item['instance_id'],
490
- 'crop_coords': item['crop_coords'],
491
- 'flagged_as_hard_negative': True,
492
- 'timestamp': time.time()
493
- }
494
-
495
- with open(os.path.join(instance_dir, "metadata.json"), 'w', encoding='utf-8') as f:
496
- json.dump(metadata, f, indent=2, ensure_ascii=False)
497
-
498
- # Mark as hard negative in annotations
499
- save_annotation_to_state("HardNegative", line_number)
500
-
501
- # Update manifest
502
- item['direction'] = 'HardNegative'
503
- item['annotated'] = True
504
-
505
- # Update manifest cache
506
- with open(MANIFEST_FILE, 'wb') as f:
507
- pickle.dump(state.manifest, f)
508
-
509
- except Exception as e:
510
- print(f"Error flagging: {e}")
511
-
512
- # Move to next unannotated
513
- next_idx = state.find_next_unannotated()
514
- state.current_index = next_idx
515
-
516
- return load_current_instance()
517
-
518
- # Navigation functions
519
- def go_back():
520
- if not state.manifest:
521
- return None, "", "No items available", ""
522
- state.current_index = max(0, state.current_index - 1)
523
- return load_current_instance()
524
-
525
- def go_forward():
526
- if not state.manifest:
527
- return None, "", "No items available", ""
528
- state.current_index = min(len(state.manifest) - 1, state.current_index + 1)
529
- return load_current_instance()
530
-
531
- def jump_to_next_unannotated():
532
- if not state.manifest:
533
- return None, "", "No items available", ""
534
- next_idx = state.find_next_unannotated()
535
- state.current_index = next_idx
536
- return load_current_instance()
537
-
538
- def jump_to_index(target):
539
- if not state.manifest:
540
- return None, "", "No items available", ""
541
- try:
542
- idx = int(target) - 1
543
- if 0 <= idx < len(state.manifest):
544
- state.current_index = idx
545
- except:
546
- pass
547
- return load_current_instance()
548
-
549
- def jump_to_kiu(kiu_id):
550
- if not state.manifest:
551
- return None, "", "No items available", ""
552
- kiu_id = kiu_id.strip().zfill(5)
553
- for idx, item in enumerate(state.manifest):
554
- if str(item['kiu_id']).zfill(5) == kiu_id:
555
- state.current_index = idx
556
- break
557
- return load_current_instance()
558
-
559
- def jump_to_old_ttb():
560
- """Jump to next TTB that needs reclassification"""
561
- if not state.manifest:
562
- return None, "", "No items available", ""
563
-
564
- for idx in range(state.current_index + 1, len(state.manifest)):
565
- if state.manifest[idx].get('direction') == 'TTB_NEEDS_RECLASSIFICATION':
566
- state.current_index = idx
567
- return load_current_instance()
568
-
569
- for idx in range(0, state.current_index):
570
- if state.manifest[idx].get('direction') == 'TTB_NEEDS_RECLASSIFICATION':
571
- state.current_index = idx
572
- return load_current_instance()
573
-
574
- return load_current_instance()
575
-
576
- def export_annotations():
577
- """Export all annotations as a single JSON file"""
578
- annotations = []
579
-
580
- # Scan annotation directory
581
- for filename in os.listdir(HF_ANNOTATIONS_DIR):
582
- if filename.endswith('.json'):
583
- filepath = os.path.join(HF_ANNOTATIONS_DIR, filename)
584
- try:
585
- with open(filepath, 'r', encoding='utf-8') as f:
586
- annotations.append(json.load(f))
587
- except:
588
- pass
589
-
590
- # Create export file
591
- export_file = os.path.join(HF_ANNOTATIONS_DIR, "all_annotations.json")
592
- with open(export_file, 'w', encoding='utf-8') as f:
593
- json.dump(annotations, f, indent=2, ensure_ascii=False)
594
-
595
- return f"✅ Exported {len(annotations)} annotations to `{export_file}`"
596
-
597
- # Build Gradio Interface
598
- with gr.Blocks(title="⚡ Hieroglyph Annotation Tool - Hugging Face") as app:
599
-
600
- gr.HTML("""
601
- <style>
602
- .gr-button {
603
- margin: 2px !important;
604
- min-width: 80px !important;
605
- height: 36px !important;
606
- }
607
- .gradio-container {
608
- max-width: 1400px !important;
609
- }
610
- h1 {
611
- color: #2c3e50;
612
- margin-bottom: 10px;
613
- }
614
- .info-box {
615
- background: #f8f9fa;
616
- padding: 10px;
617
- border-radius: 5px;
618
- border-left: 4px solid #007bff;
619
- margin-bottom: 10px;
620
- }
621
- .space-info {
622
- background: #e3f2fd;
623
- padding: 10px;
624
- border-radius: 5px;
625
- margin-bottom: 15px;
626
- }
627
- </style>
628
- """)
629
-
630
- gr.Markdown("# ⚡ Hieroglyph Direction Annotation - Hugging Face Space")
631
-
632
- # Display status
633
- stats = state.get_statistics()
634
-
635
- gr.HTML(f"""
636
- <div class="space-info">
637
- <strong>📊 Space Status:</strong><br>
638
- • Dataset: <code>{HF_DATASET_REPO}</code><br>
639
- • Manifest: {stats['total']} instances | {stats['remaining']} remaining<br>
640
- • Progress: {stats['progress_pct']:.1f}%<br>
641
- </div>
642
- """)
643
-
644
- with gr.Row():
645
- with gr.Column(scale=2):
646
- image_display = gr.Image(label="Line Instance", type="pil", height=500)
647
-
648
- line_number_input = gr.Textbox(
649
- label="Line Number (optional)",
650
- placeholder="1, 2, 3...",
651
- max_lines=1
652
- )
653
-
654
- with gr.Row():
655
- ltr_btn = gr.Button("➡️ LTR", variant="primary")
656
- rtl_btn = gr.Button("⬅️ RTL", variant="primary")
657
- ttb_ltr_btn = gr.Button("⬇️➡️ TTB+LTR", variant="primary")
658
- ttb_rtl_btn = gr.Button("⬇️⬅️ TTB+RTL", variant="primary")
659
-
660
- with gr.Row():
661
- skip_btn = gr.Button("⏭️ Skip", variant="secondary")
662
- unclear_btn = gr.Button("❓ Unclear", variant="secondary")
663
- hard_neg_btn = gr.Button("🚫 Bad", variant="secondary")
664
-
665
- with gr.Column(scale=1):
666
- html_display = gr.HTML(label="Reference")
667
-
668
- info_display = gr.Markdown()
669
-
670
- with gr.Row():
671
- back_btn = gr.Button("⬅️ Back")
672
- forward_btn = gr.Button("➡️ Forward")
673
- undo_btn = gr.Button("↩️ Undo")
674
- next_unann_btn = gr.Button("⏭️ Next Unannotated", variant="primary")
675
- export_btn = gr.Button("💾 Export", variant="secondary")
676
-
677
- with gr.Row():
678
- jump_input = gr.Textbox(label="Jump to Index", placeholder="123", scale=2)
679
- jump_btn = gr.Button("Go", scale=1)
680
- kiu_input = gr.Textbox(label="Find KIU", placeholder="00001", scale=2)
681
- kiu_btn = gr.Button("Find", scale=1)
682
- old_ttb_btn = gr.Button("Find TTB Reclass", scale=1, variant="secondary")
683
-
684
- export_output = gr.Markdown()
685
-
686
- # Event handlers
687
- ltr_btn.click(
688
- lambda ln: annotate_and_next("LTR", ln),
689
- inputs=[line_number_input],
690
- outputs=[image_display, html_display, info_display, line_number_input]
691
- )
692
- rtl_btn.click(
693
- lambda ln: annotate_and_next("RTL", ln),
694
- inputs=[line_number_input],
695
- outputs=[image_display, html_display, info_display, line_number_input]
696
- )
697
- ttb_ltr_btn.click(
698
- lambda ln: annotate_and_next("TTB_LTR", ln),
699
- inputs=[line_number_input],
700
- outputs=[image_display, html_display, info_display, line_number_input]
701
- )
702
- ttb_rtl_btn.click(
703
- lambda ln: annotate_and_next("TTB_RTL", ln),
704
- inputs=[line_number_input],
705
- outputs=[image_display, html_display, info_display, line_number_input]
706
- )
707
- skip_btn.click(
708
- lambda ln: annotate_and_next("Skip", ln),
709
- inputs=[line_number_input],
710
- outputs=[image_display, html_display, info_display, line_number_input]
711
- )
712
- unclear_btn.click(
713
- lambda ln: annotate_and_next("Unclear", ln),
714
- inputs=[line_number_input],
715
- outputs=[image_display, html_display, info_display, line_number_input]
716
- )
717
- hard_neg_btn.click(
718
- flag_hard_negative,
719
- inputs=[line_number_input],
720
- outputs=[image_display, html_display, info_display, line_number_input]
721
- )
722
-
723
- back_btn.click(go_back, outputs=[image_display, html_display, info_display, line_number_input])
724
- forward_btn.click(go_forward, outputs=[image_display, html_display, info_display, line_number_input])
725
- undo_btn.click(undo_last, outputs=[image_display, html_display, info_display, line_number_input])
726
- next_unann_btn.click(jump_to_next_unannotated, outputs=[image_display, html_display, info_display, line_number_input])
727
- export_btn.click(export_annotations, outputs=[export_output])
728
-
729
- jump_btn.click(jump_to_index, inputs=[jump_input], outputs=[image_display, html_display, info_display, line_number_input])
730
- kiu_btn.click(jump_to_kiu, inputs=[kiu_input], outputs=[image_display, html_display, info_display, line_number_input])
731
- old_ttb_btn.click(jump_to_old_ttb, outputs=[image_display, html_display, info_display, line_number_input])
732
-
733
- # Load initial state
734
- app.load(load_current_instance, outputs=[image_display, html_display, info_display, line_number_input])
735
-
736
- if __name__ == "__main__":
737
- stats = state.get_statistics()
738
- print("\n" + "="*80)
739
- print("⚡ Hieroglyph Annotation Tool - Hugging Face Space")
740
- print("="*80)
741
- print(f"📊 Statistics:")
742
- print(f" Total instances: {stats['total']:,}")
743
- print(f" Remaining: {stats['remaining']:,}")
744
- print(f" Progress: {stats['progress_pct']:.1f}%")
745
- print("="*80)
746
-
747
- app.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ # app_minimal.py - TEST VERSION
2
  import gradio as gr
 
3
  import os
4
+ import json
5
+ from datasets import load_dataset
6
  from PIL import Image
 
 
 
 
7
  import io
 
 
 
 
 
8
 
9
+ HF_DATASET_REPO = "alyex/karnak-data-app"
 
 
10
 
11
+ with gr.Blocks(title="⚡ Minimal Test") as demo:
12
+ gr.Markdown("# 🧪 Testing Dataset Access")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ kiu_input = gr.Textbox(label="Enter KIU ID", value="00001")
15
+ fetch_btn = gr.Button("Fetch Data")
 
 
 
 
 
 
 
 
 
 
16
 
17
+ image_output = gr.Image(label="Image")
18
+ html_output = gr.HTML(label="HTML Preview")
19
+ status = gr.Markdown()
20
+
21
+ def fetch_data(kiu_id):
22
+ kiu_id = kiu_id.zfill(5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
 
24
  try:
25
+ # Load dataset
26
+ dataset = load_dataset(HF_DATASET_REPO, streaming=True)
27
+
28
+ # Find the KIU
29
+ found = False
30
+ html_content = ""
31
+ image_data = None
32
+
33
+ for item in dataset["train"].take(100): # Limit search
34
+ if item["kiu_id"] == kiu_id:
35
+ found = True
36
+ html_content = item.get("html", "No HTML")
 
 
 
 
 
 
37
 
38
+ if item.get("image"):
39
+ image = Image.open(io.BytesIO(item["image"]))
40
+ image_data = image
41
 
42
+ break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ if found:
45
+ html_display = f"<div style='padding:10px;background:#f0f0f0'>{html_content[:500]}...</div>"
46
+ return image_data, html_display, f"✅ Found KIU {kiu_id}"
47
+ else:
48
+ return None, "Not found", f"❌ KIU {kiu_id} not found in first 100 items"
49
+
50
  except Exception as e:
51
+ return None, f"Error: {str(e)}", f"❌ Failed: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ fetch_btn.click(fetch_data, inputs=[kiu_input], outputs=[image_output, html_output, status])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ demo.launch(server_name="0.0.0.0", server_port=7860)