Spaces:
Paused
Paused
reset previous changes
Browse files- src/converter.py +18 -33
- src/parser_factory.py +1 -41
- src/ui.py +1 -1
src/converter.py
CHANGED
|
@@ -11,28 +11,15 @@ from parser_factory import ParserFactory
|
|
| 11 |
# Import all parsers to ensure they're registered
|
| 12 |
import parsers
|
| 13 |
|
| 14 |
-
#
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
# Global cancellation flag
|
| 19 |
-
_cancellation_flag = None
|
| 20 |
|
| 21 |
def set_cancellation_flag(flag):
|
| 22 |
-
"""Set the
|
| 23 |
-
global
|
| 24 |
-
|
| 25 |
-
logger.info("Cancellation flag set in converter module")
|
| 26 |
|
| 27 |
-
def is_cancelled():
|
| 28 |
-
"""Check if cancellation has been requested."""
|
| 29 |
-
global _cancellation_flag
|
| 30 |
-
if _cancellation_flag is None:
|
| 31 |
-
return False
|
| 32 |
-
cancelled = _cancellation_flag.is_set()
|
| 33 |
-
if cancelled:
|
| 34 |
-
logger.info("Cancellation detected in converter")
|
| 35 |
-
return cancelled
|
| 36 |
|
| 37 |
def convert_file(file_path, parser_name, ocr_method_name, output_format):
|
| 38 |
"""
|
|
@@ -47,14 +34,15 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
|
|
| 47 |
Returns:
|
| 48 |
tuple: (content, download_file_path)
|
| 49 |
"""
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
logger.info("Conversion cancelled before starting parser")
|
| 53 |
-
return "Conversion cancelled.", None
|
| 54 |
-
|
| 55 |
if not file_path:
|
| 56 |
return "Please upload a file.", None
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
# Create a temporary file with English filename
|
| 59 |
try:
|
| 60 |
original_ext = Path(file_path).suffix
|
|
@@ -66,9 +54,8 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
|
|
| 66 |
except Exception as e:
|
| 67 |
return f"Error creating temporary file: {e}", None
|
| 68 |
|
| 69 |
-
# Check for cancellation
|
| 70 |
-
if
|
| 71 |
-
logger.info("Conversion cancelled after parser creation")
|
| 72 |
# Clean up temp file
|
| 73 |
try:
|
| 74 |
os.unlink(temp_input.name)
|
|
@@ -89,15 +76,14 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
|
|
| 89 |
parser_name=parser_name,
|
| 90 |
ocr_method_name=ocr_method_name,
|
| 91 |
output_format=output_format.lower(),
|
| 92 |
-
cancellation_flag=
|
| 93 |
)
|
| 94 |
|
| 95 |
duration = time.time() - start
|
| 96 |
logging.info(f"Processed in {duration:.2f} seconds.")
|
| 97 |
|
| 98 |
# Check for cancellation after processing
|
| 99 |
-
if
|
| 100 |
-
logger.info("Conversion cancelled after parsing")
|
| 101 |
# Clean up temp file
|
| 102 |
try:
|
| 103 |
os.unlink(temp_input.name)
|
|
@@ -125,9 +111,8 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
|
|
| 125 |
else:
|
| 126 |
ext = ".txt"
|
| 127 |
|
| 128 |
-
# Check for cancellation
|
| 129 |
-
if
|
| 130 |
-
logger.info("Conversion cancelled after formatting")
|
| 131 |
# Clean up temp file
|
| 132 |
try:
|
| 133 |
os.unlink(temp_input.name)
|
|
|
|
| 11 |
# Import all parsers to ensure they're registered
|
| 12 |
import parsers
|
| 13 |
|
| 14 |
+
# Reference to the cancellation flag from ui.py
|
| 15 |
+
# This will be set by the UI when the cancel button is clicked
|
| 16 |
+
conversion_cancelled = None
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
def set_cancellation_flag(flag):
|
| 19 |
+
"""Set the reference to the cancellation flag from ui.py"""
|
| 20 |
+
global conversion_cancelled
|
| 21 |
+
conversion_cancelled = flag
|
|
|
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
def convert_file(file_path, parser_name, ocr_method_name, output_format):
|
| 25 |
"""
|
|
|
|
| 34 |
Returns:
|
| 35 |
tuple: (content, download_file_path)
|
| 36 |
"""
|
| 37 |
+
global conversion_cancelled
|
| 38 |
+
|
|
|
|
|
|
|
|
|
|
| 39 |
if not file_path:
|
| 40 |
return "Please upload a file.", None
|
| 41 |
|
| 42 |
+
# Check for cancellation
|
| 43 |
+
if conversion_cancelled and conversion_cancelled.is_set():
|
| 44 |
+
return "Conversion cancelled.", None
|
| 45 |
+
|
| 46 |
# Create a temporary file with English filename
|
| 47 |
try:
|
| 48 |
original_ext = Path(file_path).suffix
|
|
|
|
| 54 |
except Exception as e:
|
| 55 |
return f"Error creating temporary file: {e}", None
|
| 56 |
|
| 57 |
+
# Check for cancellation again
|
| 58 |
+
if conversion_cancelled and conversion_cancelled.is_set():
|
|
|
|
| 59 |
# Clean up temp file
|
| 60 |
try:
|
| 61 |
os.unlink(temp_input.name)
|
|
|
|
| 76 |
parser_name=parser_name,
|
| 77 |
ocr_method_name=ocr_method_name,
|
| 78 |
output_format=output_format.lower(),
|
| 79 |
+
cancellation_flag=conversion_cancelled # Pass the flag to parsers
|
| 80 |
)
|
| 81 |
|
| 82 |
duration = time.time() - start
|
| 83 |
logging.info(f"Processed in {duration:.2f} seconds.")
|
| 84 |
|
| 85 |
# Check for cancellation after processing
|
| 86 |
+
if conversion_cancelled and conversion_cancelled.is_set():
|
|
|
|
| 87 |
# Clean up temp file
|
| 88 |
try:
|
| 89 |
os.unlink(temp_input.name)
|
|
|
|
| 111 |
else:
|
| 112 |
ext = ".txt"
|
| 113 |
|
| 114 |
+
# Check for cancellation again
|
| 115 |
+
if conversion_cancelled and conversion_cancelled.is_set():
|
|
|
|
| 116 |
# Clean up temp file
|
| 117 |
try:
|
| 118 |
os.unlink(temp_input.name)
|
src/parser_factory.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
from typing import Optional, Dict, Any, Union
|
| 2 |
from pathlib import Path
|
| 3 |
import threading
|
| 4 |
-
from docling.datamodel.document import Document
|
| 5 |
|
| 6 |
from parser_interface import DocumentParser
|
| 7 |
from parser_registry import ParserRegistry
|
|
@@ -63,43 +62,4 @@ class ParserFactory:
|
|
| 63 |
|
| 64 |
# Parse the document, passing the cancellation flag
|
| 65 |
kwargs['cancellation_flag'] = cancellation_flag
|
| 66 |
-
return parser.parse(file_path, ocr_method=ocr_method_id, **kwargs)
|
| 67 |
-
|
| 68 |
-
class BaseParser:
|
| 69 |
-
def __init__(self):
|
| 70 |
-
self._cancellation_check = lambda: False
|
| 71 |
-
|
| 72 |
-
def set_cancellation_check(self, check_func):
|
| 73 |
-
"""Set a function that will be called to check for cancellation."""
|
| 74 |
-
self._cancellation_check = check_func
|
| 75 |
-
|
| 76 |
-
def is_cancelled(self):
|
| 77 |
-
"""Check if processing should be cancelled."""
|
| 78 |
-
return self._cancellation_check()
|
| 79 |
-
|
| 80 |
-
def parse(self, file_path):
|
| 81 |
-
"""Parse the file and return the content."""
|
| 82 |
-
# Initialize document
|
| 83 |
-
document = Document()
|
| 84 |
-
|
| 85 |
-
# Check for cancellation
|
| 86 |
-
if self.is_cancelled():
|
| 87 |
-
return document
|
| 88 |
-
|
| 89 |
-
# Open the file
|
| 90 |
-
# ...
|
| 91 |
-
|
| 92 |
-
# Process each page
|
| 93 |
-
for page_num in range(num_pages):
|
| 94 |
-
# Check for cancellation before processing each page
|
| 95 |
-
if self.is_cancelled():
|
| 96 |
-
return document
|
| 97 |
-
|
| 98 |
-
# Process page
|
| 99 |
-
# ...
|
| 100 |
-
|
| 101 |
-
# Check for cancellation after processing each page
|
| 102 |
-
if self.is_cancelled():
|
| 103 |
-
return document
|
| 104 |
-
|
| 105 |
-
return document
|
|
|
|
| 1 |
from typing import Optional, Dict, Any, Union
|
| 2 |
from pathlib import Path
|
| 3 |
import threading
|
|
|
|
| 4 |
|
| 5 |
from parser_interface import DocumentParser
|
| 6 |
from parser_registry import ParserRegistry
|
|
|
|
| 62 |
|
| 63 |
# Parse the document, passing the cancellation flag
|
| 64 |
kwargs['cancellation_flag'] = cancellation_flag
|
| 65 |
+
return parser.parse(file_path, ocr_method=ocr_method_id, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ui.py
CHANGED
|
@@ -265,4 +265,4 @@ def launch_ui(server_name="0.0.0.0", server_port=7860, share=False):
|
|
| 265 |
root_path="",
|
| 266 |
show_error=True,
|
| 267 |
share=share
|
| 268 |
-
)
|
|
|
|
| 265 |
root_path="",
|
| 266 |
show_error=True,
|
| 267 |
share=share
|
| 268 |
+
)
|