alyex commited on
Commit
ea1d4b9
Β·
verified Β·
1 Parent(s): f6a4475

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -51
app.py CHANGED
@@ -54,83 +54,154 @@ class DataManager:
54
  self.dataset = None
55
 
56
  def _load_segment_metadata(self):
57
- """Load segment metadata from RAR file"""
58
  RAR_FILE = "kiu_segment_metadata.rar"
59
- JSON_FILE = os.path.join(SEGMENT_EXTRACT_DIR, "kiu_segment_metadata.json")
60
 
61
  # Check if already extracted
62
- if os.path.exists(JSON_FILE):
63
- print(f"Loading cached segment metadata...")
64
- with open(JSON_FILE, 'r') as f:
65
- self.segment_data = json.load(f)
66
- print(f"βœ… Loaded {len(self.segment_data)} segment entries")
67
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  # Extract from RAR
70
  if not os.path.exists(RAR_FILE):
71
  print(f"❌ {RAR_FILE} not found in Space root")
 
 
72
  return
73
 
74
  print(f"Extracting {RAR_FILE}...")
 
75
  try:
 
 
 
76
  with rarfile.RarFile(RAR_FILE) as rf:
77
- # Find JSON file
78
- json_in_rar = None
79
- for file_info in rf.infolist():
80
- if file_info.filename.endswith('.json') and 'segment_metadata' in file_info.filename:
81
- json_in_rar = file_info.filename
82
- break
83
 
84
- if not json_in_rar:
85
- print("❌ No JSON file found in RAR")
 
 
 
86
  return
87
 
88
- # Extract
89
- rf.extract(json_in_rar, SEGMENT_EXTRACT_DIR)
90
- extracted = os.path.join(SEGMENT_EXTRACT_DIR, json_in_rar)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- # Load
93
- with open(extracted, 'r') as f:
94
- self.segment_data = json.load(f)
95
 
96
- # Rename to standard location
97
- if extracted != JSON_FILE:
98
- shutil.move(extracted, JSON_FILE)
99
 
100
- print(f"βœ… Extracted and loaded {len(self.segment_data)} entries")
 
 
 
 
 
 
 
 
 
101
 
 
 
 
 
 
 
 
102
  except Exception as e:
103
- print(f"❌ Error extracting RAR: {e}")
104
  import traceback
105
  traceback.print_exc()
106
 
107
  def _build_manifest(self):
108
- """Build manifest of all line instances"""
109
  if not self.segment_data:
110
  print("❌ No segment data - cannot build manifest")
111
  return
112
 
113
- print("Building manifest...")
114
  self.manifest = []
115
 
 
 
 
 
116
  for key, segment_info in self.segment_data.items():
117
  if not isinstance(segment_info, dict):
118
  continue
119
 
120
- # Extract KIU ID
121
- kiu_id = segment_info.get('kiu_id')
122
- if not kiu_id and 'KIU_' in key:
123
  try:
124
- kiu_id = int(key.split('_')[1])
 
 
 
 
 
125
  except:
126
- continue
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  # Process line instances
129
  instances = segment_info.get('instances', [])
 
130
  for instance in instances:
131
  if instance.get('class') == 'Line':
132
  self.manifest.append({
133
- 'kiu_id': str(kiu_id).zfill(5),
134
  'instance_id': instance.get('instance_id'),
135
  'crop_coords': instance.get('crop_coords', [0, 0, 100, 100]),
136
  'direction': instance.get('direction', ''),
@@ -594,28 +665,60 @@ Skipped: {stats['skip']} | Unclear: {stats['unclear']} | Needs Reclass: {stats['
594
  app_state = AnnotationApp()
595
 
596
  # Build Gradio UI
597
- with gr.Blocks(title="⚑ Hieroglyph Annotation", theme=gr.themes.Soft()) as demo:
598
 
599
  gr.Markdown("# ⚑ Hieroglyph Direction Annotation Tool")
600
 
 
601
  stats = app_state.get_statistics()
 
602
 
603
- gr.HTML(f"""
604
- <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
605
- color: white; padding: 15px; border-radius: 10px; margin-bottom: 20px;">
606
- <h3 style="margin: 0 0 10px 0;">πŸ“Š System Status</h3>
607
- <div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px;">
608
- <div><strong>Dataset:</strong> {HF_DATASET_REPO}</div>
609
- <div><strong>Total Instances:</strong> {stats['total']:,}</div>
610
- <div><strong>Remaining:</strong> {stats['remaining']:,}</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
611
  </div>
612
- <div style="margin-top: 10px; background: rgba(255,255,255,0.2);
613
- padding: 8px; border-radius: 5px;">
614
- <strong>Progress:</strong> {stats['progress_pct']:.1f}%
615
- ({stats['processed']:,} / {stats['total']:,} annotated)
 
 
 
 
 
 
 
 
 
 
 
 
616
  </div>
617
- </div>
618
- """)
619
 
620
  with gr.Row():
621
  with gr.Column(scale=2):
@@ -713,4 +816,4 @@ if __name__ == "__main__":
713
  print(f"Progress: {stats['progress_pct']:.1f}%")
714
  print("="*80 + "\n")
715
 
716
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
54
  self.dataset = None
55
 
56
  def _load_segment_metadata(self):
57
+ """Load segment metadata from RAR file containing multiple JSON files"""
58
  RAR_FILE = "kiu_segment_metadata.rar"
59
+ EXTRACT_DIR = SEGMENT_EXTRACT_DIR
60
 
61
  # Check if already extracted
62
+ if os.path.exists(EXTRACT_DIR) and os.listdir(EXTRACT_DIR):
63
+ json_files = [f for f in os.listdir(EXTRACT_DIR) if f.endswith('.json')]
64
+ if json_files:
65
+ print(f"Loading cached segment metadata from {len(json_files)} files...")
66
+ self.segment_data = {}
67
+
68
+ for json_file in json_files:
69
+ filepath = os.path.join(EXTRACT_DIR, json_file)
70
+ try:
71
+ with open(filepath, 'r') as f:
72
+ data = json.load(f)
73
+ # Use filename as key (e.g., "KIU_00001_segments")
74
+ key = json_file.replace('.json', '')
75
+ self.segment_data[key] = data
76
+ except Exception as e:
77
+ print(f"⚠️ Error loading {json_file}: {e}")
78
+
79
+ print(f"βœ… Loaded {len(self.segment_data)} segment entries from cache")
80
+ return
81
 
82
  # Extract from RAR
83
  if not os.path.exists(RAR_FILE):
84
  print(f"❌ {RAR_FILE} not found in Space root")
85
+ print(f"Current directory: {os.getcwd()}")
86
+ print(f"Files available: {os.listdir('.')}")
87
  return
88
 
89
  print(f"Extracting {RAR_FILE}...")
90
+
91
  try:
92
+ # Set rarfile to use unrar
93
+ rarfile.UNRAR_TOOL = "unrar"
94
+
95
  with rarfile.RarFile(RAR_FILE) as rf:
96
+ # List all files
97
+ all_files = [f.filename for f in rf.infolist()]
98
+ json_files = [f for f in all_files if f.endswith('.json')]
 
 
 
99
 
100
+ print(f"Found {len(json_files)} JSON files in RAR")
101
+
102
+ if not json_files:
103
+ print(f"❌ No JSON files found in RAR")
104
+ print(f"Files in RAR: {all_files[:10]}...") # Show first 10
105
  return
106
 
107
+ # Extract all JSON files
108
+ os.makedirs(EXTRACT_DIR, exist_ok=True)
109
+
110
+ for json_file in json_files:
111
+ try:
112
+ rf.extract(json_file, EXTRACT_DIR)
113
+ except Exception as e:
114
+ print(f"⚠️ Error extracting {json_file}: {e}")
115
+ # Try reading directly
116
+ try:
117
+ with rf.open(json_file) as f:
118
+ content = f.read()
119
+ # Get just the filename without path
120
+ filename = os.path.basename(json_file)
121
+ output_path = os.path.join(EXTRACT_DIR, filename)
122
+ with open(output_path, 'wb') as out:
123
+ out.write(content)
124
+ except Exception as e2:
125
+ print(f"❌ Failed to extract {json_file}: {e2}")
126
+ continue
127
 
128
+ # Now load all extracted JSON files
129
+ self.segment_data = {}
130
+ extracted_files = [f for f in os.listdir(EXTRACT_DIR) if f.endswith('.json')]
131
 
132
+ print(f"Loading {len(extracted_files)} extracted JSON files...")
 
 
133
 
134
+ for json_file in extracted_files:
135
+ filepath = os.path.join(EXTRACT_DIR, json_file)
136
+ try:
137
+ with open(filepath, 'r') as f:
138
+ data = json.load(f)
139
+ # Use filename as key (e.g., "KIU_00001_segments")
140
+ key = json_file.replace('.json', '')
141
+ self.segment_data[key] = data
142
+ except Exception as e:
143
+ print(f"⚠️ Error loading {json_file}: {e}")
144
 
145
+ print(f"βœ… Extracted and loaded {len(self.segment_data)} segment entries")
146
+
147
+ except rarfile.RarCannotExec as e:
148
+ print(f"❌ RAR tool not available: {e}")
149
+ print("⚠️ SOLUTION: Ensure packages.txt contains 'unrar'")
150
+ print(" Or extract manually and upload JSON files to:")
151
+ print(f" {EXTRACT_DIR}/")
152
  except Exception as e:
153
+ print(f"❌ Error with RAR file: {e}")
154
  import traceback
155
  traceback.print_exc()
156
 
157
  def _build_manifest(self):
158
+ """Build manifest of all line instances from multiple JSON files"""
159
  if not self.segment_data:
160
  print("❌ No segment data - cannot build manifest")
161
  return
162
 
163
+ print("Building manifest from segment metadata...")
164
  self.manifest = []
165
 
166
+ # self.segment_data is now a dict where:
167
+ # key = "KIU_00001_segments" (filename without .json)
168
+ # value = the JSON content for that KIU
169
+
170
  for key, segment_info in self.segment_data.items():
171
  if not isinstance(segment_info, dict):
172
  continue
173
 
174
+ # Extract KIU ID from key (e.g., "KIU_00001_segments" -> "00001")
175
+ kiu_id = None
176
+ if 'KIU_' in key:
177
  try:
178
+ # Extract the numeric part after KIU_
179
+ parts = key.split('_')
180
+ for part in parts:
181
+ if part.isdigit() or (part.startswith('0') and part[1:].isdigit()):
182
+ kiu_id = part
183
+ break
184
  except:
185
+ pass
186
+
187
+ # Also check if kiu_id is in the data itself
188
+ if not kiu_id and 'kiu_id' in segment_info:
189
+ kiu_id = str(segment_info['kiu_id'])
190
+
191
+ if not kiu_id:
192
+ print(f"⚠️ Could not extract KIU ID from key: {key}")
193
+ continue
194
+
195
+ # Ensure it's zero-padded to 5 digits
196
+ kiu_id = str(kiu_id).zfill(5)
197
 
198
  # Process line instances
199
  instances = segment_info.get('instances', [])
200
+
201
  for instance in instances:
202
  if instance.get('class') == 'Line':
203
  self.manifest.append({
204
+ 'kiu_id': kiu_id,
205
  'instance_id': instance.get('instance_id'),
206
  'crop_coords': instance.get('crop_coords', [0, 0, 100, 100]),
207
  'direction': instance.get('direction', ''),
 
665
  app_state = AnnotationApp()
666
 
667
  # Build Gradio UI
668
+ with gr.Blocks(title="⚑ Hieroglyph Annotation") as demo:
669
 
670
  gr.Markdown("# ⚑ Hieroglyph Direction Annotation Tool")
671
 
672
+ # Check if data is loaded
673
  stats = app_state.get_statistics()
674
+ data_loaded = len(app_state.data_mgr.manifest) > 0
675
 
676
+ if not data_loaded:
677
+ gr.HTML(f"""
678
+ <div style="background: #fff3cd; padding: 20px; border-radius: 10px; margin-bottom: 20px; border-left: 5px solid #ffc107;">
679
+ <h3 style="margin-top: 0; color: #856404;">⚠️ No Segment Data Loaded</h3>
680
+ <p><strong>The segment metadata could not be loaded from the RAR file.</strong></p>
681
+
682
+ <h4>Solution Options:</h4>
683
+ <ol>
684
+ <li><strong>Install unrar tool:</strong>
685
+ <br>Add to your Space's Dockerfile or requirements:
686
+ <pre style="background: #f8f9fa; padding: 10px; border-radius: 5px; margin: 10px 0;">RUN apt-get update && apt-get install -y unrar</pre>
687
+ </li>
688
+ <li><strong>Upload extracted JSON directly:</strong>
689
+ <br>Extract <code>kiu_segment_metadata.json</code> locally and upload to:
690
+ <pre style="background: #f8f9fa; padding: 10px; border-radius: 5px; margin: 10px 0;">{SEGMENT_EXTRACT_DIR}/kiu_segment_metadata.json</pre>
691
+ </li>
692
+ <li><strong>Use a different format:</strong>
693
+ <br>Upload the JSON file as a regular file in your Space repo instead of RAR.
694
+ </li>
695
+ </ol>
696
+
697
+ <p><strong>Current Status:</strong></p>
698
+ <ul>
699
+ <li>Dataset: {HF_DATASET_REPO} - {'βœ… Loaded' if app_state.data_mgr.dataset else '❌ Failed'}</li>
700
+ <li>Segment Metadata: ❌ Not loaded</li>
701
+ <li>Manifest: {len(app_state.data_mgr.manifest)} instances</li>
702
+ </ul>
703
  </div>
704
+ """)
705
+ else:
706
+ gr.HTML(f"""
707
+ <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
708
+ color: white; padding: 15px; border-radius: 10px; margin-bottom: 20px;">
709
+ <h3 style="margin: 0 0 10px 0;">πŸ“Š System Status</h3>
710
+ <div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px;">
711
+ <div><strong>Dataset:</strong> {HF_DATASET_REPO}</div>
712
+ <div><strong>Total Instances:</strong> {stats['total']:,}</div>
713
+ <div><strong>Remaining:</strong> {stats['remaining']:,}</div>
714
+ </div>
715
+ <div style="margin-top: 10px; background: rgba(255,255,255,0.2);
716
+ padding: 8px; border-radius: 5px;">
717
+ <strong>Progress:</strong> {stats['progress_pct']:.1f}%
718
+ ({stats['processed']:,} / {stats['total']:,} annotated)
719
+ </div>
720
  </div>
721
+ """)
 
722
 
723
  with gr.Row():
724
  with gr.Column(scale=2):
 
816
  print(f"Progress: {stats['progress_pct']:.1f}%")
817
  print("="*80 + "\n")
818
 
819
+ demo.launch(server_name="0.0.0.0", server_port=7860, theme=gr.themes.Soft())