Spaces:
Paused
Paused
Update inference.py
Browse files- inference.py +53 -4
inference.py
CHANGED
|
@@ -362,18 +362,67 @@ def extract_part_json(image, part_name, model, processor, device):
|
|
| 362 |
}
|
| 363 |
|
| 364 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
def merge_page_results(page_results):
|
| 366 |
-
final_json = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
|
| 368 |
for item in page_results:
|
| 369 |
if item["status"] != "success" or not item["parsed"]:
|
| 370 |
continue
|
| 371 |
|
|
|
|
| 372 |
parsed = item["parsed"]
|
| 373 |
-
for key, value in parsed.items():
|
| 374 |
-
final_json[key] = value
|
| 375 |
|
| 376 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
|
| 378 |
|
| 379 |
def process_document(file_path):
|
|
|
|
| 362 |
}
|
| 363 |
|
| 364 |
|
| 365 |
+
# def merge_page_results(page_results):
|
| 366 |
+
# final_json = {}
|
| 367 |
+
|
| 368 |
+
# for item in page_results:
|
| 369 |
+
# if item["status"] != "success" or not item["parsed"]:
|
| 370 |
+
# continue
|
| 371 |
+
|
| 372 |
+
# parsed = item["parsed"]
|
| 373 |
+
# for key, value in parsed.items():
|
| 374 |
+
# final_json[key] = value
|
| 375 |
+
|
| 376 |
+
# return final_json
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
# Adding these to handle json in structured format add from line 381 to 425
|
| 380 |
+
|
| 381 |
def merge_page_results(page_results):
|
| 382 |
+
final_json = {
|
| 383 |
+
"PART-1": {},
|
| 384 |
+
"PART-2": {},
|
| 385 |
+
"PART-3": {},
|
| 386 |
+
"PART-4": {},
|
| 387 |
+
"PART-5": {},
|
| 388 |
+
"PART-6": {}
|
| 389 |
+
}
|
| 390 |
|
| 391 |
for item in page_results:
|
| 392 |
if item["status"] != "success" or not item["parsed"]:
|
| 393 |
continue
|
| 394 |
|
| 395 |
+
part = item["part"]
|
| 396 |
parsed = item["parsed"]
|
|
|
|
|
|
|
| 397 |
|
| 398 |
+
final_json[part] = _merge_values(final_json[part], parsed)
|
| 399 |
+
|
| 400 |
+
return {key: value for key, value in final_json.items() if value}
|
| 401 |
+
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
def _merge_values(old_value, new_value):
|
| 405 |
+
if old_value is None:
|
| 406 |
+
return new_value
|
| 407 |
+
|
| 408 |
+
if isinstance(old_value, list) and isinstance(new_value, list):
|
| 409 |
+
return old_value + new_value
|
| 410 |
+
|
| 411 |
+
if isinstance(old_value, dict) and isinstance(new_value, dict):
|
| 412 |
+
merged = dict(old_value)
|
| 413 |
+
|
| 414 |
+
for key, value in new_value.items():
|
| 415 |
+
if key in merged:
|
| 416 |
+
merged[key] = _merge_values(merged[key], value)
|
| 417 |
+
else:
|
| 418 |
+
merged[key] = value
|
| 419 |
+
|
| 420 |
+
return merged
|
| 421 |
+
|
| 422 |
+
if old_value in ("", None, [], {}):
|
| 423 |
+
return new_value
|
| 424 |
+
|
| 425 |
+
return old_value
|
| 426 |
|
| 427 |
|
| 428 |
def process_document(file_path):
|