Update app.py
Browse files
app.py
CHANGED
|
@@ -1531,19 +1531,79 @@ def get_latex_from_base64(base64_string: str) -> str:
|
|
| 1531 |
|
| 1532 |
|
| 1533 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1534 |
def run_yolo_detection_and_count(
|
| 1535 |
image: np.ndarray, model: YOLO, page_num: int,
|
| 1536 |
current_eq_count: int, current_fig_count: int
|
| 1537 |
) -> Tuple[List[Dict[str, Union[Image.Image, str, Tuple[float,...]]]], int, int]:
|
| 1538 |
"""
|
| 1539 |
-
Performs YOLO detection and returns
|
| 1540 |
-
|
| 1541 |
"""
|
| 1542 |
-
|
| 1543 |
eq_counter = current_eq_count
|
| 1544 |
fig_counter = current_fig_count
|
| 1545 |
|
| 1546 |
-
detected_items
|
| 1547 |
yolo_detections = []
|
| 1548 |
|
| 1549 |
try:
|
|
@@ -1562,9 +1622,11 @@ def run_yolo_detection_and_count(
|
|
| 1562 |
logging.error(f"ERROR: YOLO inference failed on page {page_num}: {e}")
|
| 1563 |
return [], eq_counter, fig_counter
|
| 1564 |
|
|
|
|
| 1565 |
merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
|
| 1566 |
final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
|
| 1567 |
|
|
|
|
| 1568 |
for det in final_detections:
|
| 1569 |
bbox = det["coords"]
|
| 1570 |
crop_pil = crop_and_convert_to_pil(image, bbox)
|
|
@@ -1573,7 +1635,7 @@ def run_yolo_detection_and_count(
|
|
| 1573 |
"type": det["class"],
|
| 1574 |
"coords": bbox,
|
| 1575 |
"pil_image": crop_pil,
|
| 1576 |
-
"page_num": page_num,
|
| 1577 |
}
|
| 1578 |
|
| 1579 |
if det["class"] == "equation":
|
|
@@ -1592,11 +1654,6 @@ def run_yolo_detection_and_count(
|
|
| 1592 |
|
| 1593 |
|
| 1594 |
|
| 1595 |
-
|
| 1596 |
-
|
| 1597 |
-
|
| 1598 |
-
|
| 1599 |
-
|
| 1600 |
# ============================================================================
|
| 1601 |
# --- MAIN DOCUMENT PROCESSING FUNCTION (Retained Logic) ---
|
| 1602 |
# ============================================================================
|
|
|
|
| 1531 |
|
| 1532 |
|
| 1533 |
|
| 1534 |
+
# def run_yolo_detection_and_count(
|
| 1535 |
+
# image: np.ndarray, model: YOLO, page_num: int,
|
| 1536 |
+
# current_eq_count: int, current_fig_count: int
|
| 1537 |
+
# ) -> Tuple[List[Dict[str, Union[Image.Image, str, Tuple[float,...]]]], int, int]:
|
| 1538 |
+
# """
|
| 1539 |
+
# Performs YOLO detection and returns a list of detected item dictionaries
|
| 1540 |
+
# and the updated total counters.
|
| 1541 |
+
# """
|
| 1542 |
+
|
| 1543 |
+
# eq_counter = current_eq_count
|
| 1544 |
+
# fig_counter = current_fig_count
|
| 1545 |
+
|
| 1546 |
+
# detected_items: List[Dict[str, Union[Image.Image, str, Tuple[float,...]]]] = []
|
| 1547 |
+
# yolo_detections = []
|
| 1548 |
+
|
| 1549 |
+
# try:
|
| 1550 |
+
# results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
|
| 1551 |
+
# if results and results[0].boxes:
|
| 1552 |
+
# for box in results[0].boxes.data.tolist():
|
| 1553 |
+
# x1, y1, x2, y2, conf, cls_id = box
|
| 1554 |
+
# cls_name = model.names[int(cls_id)]
|
| 1555 |
+
# if cls_name in TARGET_CLASSES:
|
| 1556 |
+
# yolo_detections.append({
|
| 1557 |
+
# 'coords': (x1, y1, x2, y2),
|
| 1558 |
+
# 'class': cls_name,
|
| 1559 |
+
# 'conf': conf
|
| 1560 |
+
# })
|
| 1561 |
+
# except Exception as e:
|
| 1562 |
+
# logging.error(f"ERROR: YOLO inference failed on page {page_num}: {e}")
|
| 1563 |
+
# return [], eq_counter, fig_counter
|
| 1564 |
+
|
| 1565 |
+
# merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
|
| 1566 |
+
# final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
|
| 1567 |
+
|
| 1568 |
+
# for det in final_detections:
|
| 1569 |
+
# bbox = det["coords"]
|
| 1570 |
+
# crop_pil = crop_and_convert_to_pil(image, bbox)
|
| 1571 |
+
|
| 1572 |
+
# item = {
|
| 1573 |
+
# "type": det["class"],
|
| 1574 |
+
# "coords": bbox,
|
| 1575 |
+
# "pil_image": crop_pil,
|
| 1576 |
+
# "page_num": page_num, # ← ADD THIS LINE
|
| 1577 |
+
# }
|
| 1578 |
+
|
| 1579 |
+
# if det["class"] == "equation":
|
| 1580 |
+
# eq_counter += 1
|
| 1581 |
+
# item["id"] = f"EQUATION{eq_counter}"
|
| 1582 |
+
# item["latex"] = ""
|
| 1583 |
+
# elif det["class"] == "figure":
|
| 1584 |
+
# fig_counter += 1
|
| 1585 |
+
# item["id"] = f"FIGURE{fig_counter}"
|
| 1586 |
+
# item["latex"] = "[FIGURE - No LaTeX]"
|
| 1587 |
+
|
| 1588 |
+
# detected_items.append(item)
|
| 1589 |
+
|
| 1590 |
+
# return detected_items, eq_counter, fig_counter
|
| 1591 |
+
|
| 1592 |
+
|
| 1593 |
+
|
| 1594 |
+
|
| 1595 |
def run_yolo_detection_and_count(
|
| 1596 |
image: np.ndarray, model: YOLO, page_num: int,
|
| 1597 |
current_eq_count: int, current_fig_count: int
|
| 1598 |
) -> Tuple[List[Dict[str, Union[Image.Image, str, Tuple[float,...]]]], int, int]:
|
| 1599 |
"""
|
| 1600 |
+
Performs YOLO detection and returns detected items with counters.
|
| 1601 |
+
This version is for the Gradio script.
|
| 1602 |
"""
|
|
|
|
| 1603 |
eq_counter = current_eq_count
|
| 1604 |
fig_counter = current_fig_count
|
| 1605 |
|
| 1606 |
+
detected_items = []
|
| 1607 |
yolo_detections = []
|
| 1608 |
|
| 1609 |
try:
|
|
|
|
| 1622 |
logging.error(f"ERROR: YOLO inference failed on page {page_num}: {e}")
|
| 1623 |
return [], eq_counter, fig_counter
|
| 1624 |
|
| 1625 |
+
# CRITICAL: Use exact same processing order
|
| 1626 |
merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
|
| 1627 |
final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
|
| 1628 |
|
| 1629 |
+
# Create items from final detections
|
| 1630 |
for det in final_detections:
|
| 1631 |
bbox = det["coords"]
|
| 1632 |
crop_pil = crop_and_convert_to_pil(image, bbox)
|
|
|
|
| 1635 |
"type": det["class"],
|
| 1636 |
"coords": bbox,
|
| 1637 |
"pil_image": crop_pil,
|
| 1638 |
+
"page_num": page_num,
|
| 1639 |
}
|
| 1640 |
|
| 1641 |
if det["class"] == "equation":
|
|
|
|
| 1654 |
|
| 1655 |
|
| 1656 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1657 |
# ============================================================================
|
| 1658 |
# --- MAIN DOCUMENT PROCESSING FUNCTION (Retained Logic) ---
|
| 1659 |
# ============================================================================
|