Spaces:
Running
on
Zero
Running
on
Zero
wjm55
commited on
Commit
·
52222e9
1
Parent(s):
0f72e2b
fixed the issue with entities and text output
Browse files
app.py
CHANGED
|
@@ -140,10 +140,17 @@ def run_example(image, model_id="Qwen/Qwen2-VL-7B-Instruct", run_ner=False, ner_
|
|
| 140 |
if last_end < len(ocr_text):
|
| 141 |
highlighted_text.append((ocr_text[last_end:], None))
|
| 142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
return highlighted_text
|
| 144 |
|
| 145 |
# If NER is disabled, return the text without highlighting
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
css = """
|
| 149 |
/* Overall app styling */
|
|
@@ -265,37 +272,23 @@ with gr.Blocks(css=css) as demo:
|
|
| 265 |
if isinstance(image, np.ndarray):
|
| 266 |
image = Image.fromarray(image)
|
| 267 |
|
| 268 |
-
# Create a temporary directory
|
| 269 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 270 |
# Save image
|
| 271 |
img_path = os.path.join(temp_dir, f"{fname}.png")
|
| 272 |
image.save(img_path)
|
| 273 |
|
| 274 |
-
#
|
| 275 |
-
|
| 276 |
-
entities = []
|
| 277 |
-
current_pos = 0
|
| 278 |
-
|
| 279 |
-
# Process the highlighted text data
|
| 280 |
-
for segment, label in text_data:
|
| 281 |
-
full_text += segment
|
| 282 |
-
if label: # If this segment has a label (is an entity)
|
| 283 |
-
entities.append({
|
| 284 |
-
"text": segment,
|
| 285 |
-
"label": label,
|
| 286 |
-
"start": current_pos,
|
| 287 |
-
"end": current_pos + len(segment)
|
| 288 |
-
})
|
| 289 |
-
current_pos += len(segment)
|
| 290 |
|
| 291 |
# Save text
|
| 292 |
txt_path = os.path.join(temp_dir, f"{fname}.txt")
|
| 293 |
with open(txt_path, 'w', encoding='utf-8') as f:
|
| 294 |
-
f.write(
|
| 295 |
|
| 296 |
# Create JSON with text and entities
|
| 297 |
json_data = {
|
| 298 |
-
"text":
|
| 299 |
"entities": entities,
|
| 300 |
"image_file": f"{fname}.png"
|
| 301 |
}
|
|
|
|
| 140 |
if last_end < len(ocr_text):
|
| 141 |
highlighted_text.append((ocr_text[last_end:], None))
|
| 142 |
|
| 143 |
+
# Store the original text and entities as attributes of the highlighted_text list
|
| 144 |
+
highlighted_text.original_text = ocr_text
|
| 145 |
+
highlighted_text.entities = ner_results
|
| 146 |
+
|
| 147 |
return highlighted_text
|
| 148 |
|
| 149 |
# If NER is disabled, return the text without highlighting
|
| 150 |
+
result = [(ocr_text, None)]
|
| 151 |
+
result.original_text = ocr_text
|
| 152 |
+
result.entities = []
|
| 153 |
+
return result
|
| 154 |
|
| 155 |
css = """
|
| 156 |
/* Overall app styling */
|
|
|
|
| 272 |
if isinstance(image, np.ndarray):
|
| 273 |
image = Image.fromarray(image)
|
| 274 |
|
|
|
|
| 275 |
with tempfile.TemporaryDirectory() as temp_dir:
|
| 276 |
# Save image
|
| 277 |
img_path = os.path.join(temp_dir, f"{fname}.png")
|
| 278 |
image.save(img_path)
|
| 279 |
|
| 280 |
+
# Get the original OCR text and entities
|
| 281 |
+
original_text = getattr(text_data, 'original_text', '')
|
| 282 |
+
entities = getattr(text_data, 'entities', [])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
# Save text
|
| 285 |
txt_path = os.path.join(temp_dir, f"{fname}.txt")
|
| 286 |
with open(txt_path, 'w', encoding='utf-8') as f:
|
| 287 |
+
f.write(original_text)
|
| 288 |
|
| 289 |
# Create JSON with text and entities
|
| 290 |
json_data = {
|
| 291 |
+
"text": original_text,
|
| 292 |
"entities": entities,
|
| 293 |
"image_file": f"{fname}.png"
|
| 294 |
}
|