AnseMin commited on
Commit
66e859e
·
1 Parent(s): f4bb2aa

reset previous changes

Browse files
Files changed (3) hide show
  1. src/converter.py +18 -33
  2. src/parser_factory.py +1 -41
  3. src/ui.py +1 -1
src/converter.py CHANGED
@@ -11,28 +11,15 @@ from parser_factory import ParserFactory
11
  # Import all parsers to ensure they're registered
12
  import parsers
13
 
14
- # Configure logging
15
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
16
- logger = logging.getLogger(__name__)
17
-
18
- # Global cancellation flag
19
- _cancellation_flag = None
20
 
21
  def set_cancellation_flag(flag):
22
- """Set the cancellation flag to be used by the converter."""
23
- global _cancellation_flag
24
- _cancellation_flag = flag
25
- logger.info("Cancellation flag set in converter module")
26
 
27
- def is_cancelled():
28
- """Check if cancellation has been requested."""
29
- global _cancellation_flag
30
- if _cancellation_flag is None:
31
- return False
32
- cancelled = _cancellation_flag.is_set()
33
- if cancelled:
34
- logger.info("Cancellation detected in converter")
35
- return cancelled
36
 
37
  def convert_file(file_path, parser_name, ocr_method_name, output_format):
38
  """
@@ -47,14 +34,15 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
47
  Returns:
48
  tuple: (content, download_file_path)
49
  """
50
- # Check for cancellation at the start
51
- if is_cancelled():
52
- logger.info("Conversion cancelled before starting parser")
53
- return "Conversion cancelled.", None
54
-
55
  if not file_path:
56
  return "Please upload a file.", None
57
 
 
 
 
 
58
  # Create a temporary file with English filename
59
  try:
60
  original_ext = Path(file_path).suffix
@@ -66,9 +54,8 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
66
  except Exception as e:
67
  return f"Error creating temporary file: {e}", None
68
 
69
- # Check for cancellation after parser creation
70
- if is_cancelled():
71
- logger.info("Conversion cancelled after parser creation")
72
  # Clean up temp file
73
  try:
74
  os.unlink(temp_input.name)
@@ -89,15 +76,14 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
89
  parser_name=parser_name,
90
  ocr_method_name=ocr_method_name,
91
  output_format=output_format.lower(),
92
- cancellation_flag=is_cancelled # Pass the flag to parsers
93
  )
94
 
95
  duration = time.time() - start
96
  logging.info(f"Processed in {duration:.2f} seconds.")
97
 
98
  # Check for cancellation after processing
99
- if is_cancelled():
100
- logger.info("Conversion cancelled after parsing")
101
  # Clean up temp file
102
  try:
103
  os.unlink(temp_input.name)
@@ -125,9 +111,8 @@ def convert_file(file_path, parser_name, ocr_method_name, output_format):
125
  else:
126
  ext = ".txt"
127
 
128
- # Check for cancellation after formatting
129
- if is_cancelled():
130
- logger.info("Conversion cancelled after formatting")
131
  # Clean up temp file
132
  try:
133
  os.unlink(temp_input.name)
 
11
  # Import all parsers to ensure they're registered
12
  import parsers
13
 
14
+ # Reference to the cancellation flag from ui.py
15
+ # This will be set by the UI when the cancel button is clicked
16
+ conversion_cancelled = None
 
 
 
17
 
18
  def set_cancellation_flag(flag):
19
+ """Set the reference to the cancellation flag from ui.py"""
20
+ global conversion_cancelled
21
+ conversion_cancelled = flag
 
22
 
 
 
 
 
 
 
 
 
 
23
 
24
  def convert_file(file_path, parser_name, ocr_method_name, output_format):
25
  """
 
34
  Returns:
35
  tuple: (content, download_file_path)
36
  """
37
+ global conversion_cancelled
38
+
 
 
 
39
  if not file_path:
40
  return "Please upload a file.", None
41
 
42
+ # Check for cancellation
43
+ if conversion_cancelled and conversion_cancelled.is_set():
44
+ return "Conversion cancelled.", None
45
+
46
  # Create a temporary file with English filename
47
  try:
48
  original_ext = Path(file_path).suffix
 
54
  except Exception as e:
55
  return f"Error creating temporary file: {e}", None
56
 
57
+ # Check for cancellation again
58
+ if conversion_cancelled and conversion_cancelled.is_set():
 
59
  # Clean up temp file
60
  try:
61
  os.unlink(temp_input.name)
 
76
  parser_name=parser_name,
77
  ocr_method_name=ocr_method_name,
78
  output_format=output_format.lower(),
79
+ cancellation_flag=conversion_cancelled # Pass the flag to parsers
80
  )
81
 
82
  duration = time.time() - start
83
  logging.info(f"Processed in {duration:.2f} seconds.")
84
 
85
  # Check for cancellation after processing
86
+ if conversion_cancelled and conversion_cancelled.is_set():
 
87
  # Clean up temp file
88
  try:
89
  os.unlink(temp_input.name)
 
111
  else:
112
  ext = ".txt"
113
 
114
+ # Check for cancellation again
115
+ if conversion_cancelled and conversion_cancelled.is_set():
 
116
  # Clean up temp file
117
  try:
118
  os.unlink(temp_input.name)
src/parser_factory.py CHANGED
@@ -1,7 +1,6 @@
1
  from typing import Optional, Dict, Any, Union
2
  from pathlib import Path
3
  import threading
4
- from docling.datamodel.document import Document
5
 
6
  from parser_interface import DocumentParser
7
  from parser_registry import ParserRegistry
@@ -63,43 +62,4 @@ class ParserFactory:
63
 
64
  # Parse the document, passing the cancellation flag
65
  kwargs['cancellation_flag'] = cancellation_flag
66
- return parser.parse(file_path, ocr_method=ocr_method_id, **kwargs)
67
-
68
- class BaseParser:
69
- def __init__(self):
70
- self._cancellation_check = lambda: False
71
-
72
- def set_cancellation_check(self, check_func):
73
- """Set a function that will be called to check for cancellation."""
74
- self._cancellation_check = check_func
75
-
76
- def is_cancelled(self):
77
- """Check if processing should be cancelled."""
78
- return self._cancellation_check()
79
-
80
- def parse(self, file_path):
81
- """Parse the file and return the content."""
82
- # Initialize document
83
- document = Document()
84
-
85
- # Check for cancellation
86
- if self.is_cancelled():
87
- return document
88
-
89
- # Open the file
90
- # ...
91
-
92
- # Process each page
93
- for page_num in range(num_pages):
94
- # Check for cancellation before processing each page
95
- if self.is_cancelled():
96
- return document
97
-
98
- # Process page
99
- # ...
100
-
101
- # Check for cancellation after processing each page
102
- if self.is_cancelled():
103
- return document
104
-
105
- return document
 
1
  from typing import Optional, Dict, Any, Union
2
  from pathlib import Path
3
  import threading
 
4
 
5
  from parser_interface import DocumentParser
6
  from parser_registry import ParserRegistry
 
62
 
63
  # Parse the document, passing the cancellation flag
64
  kwargs['cancellation_flag'] = cancellation_flag
65
+ return parser.parse(file_path, ocr_method=ocr_method_id, **kwargs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ui.py CHANGED
@@ -265,4 +265,4 @@ def launch_ui(server_name="0.0.0.0", server_port=7860, share=False):
265
  root_path="",
266
  show_error=True,
267
  share=share
268
- )
 
265
  root_path="",
266
  show_error=True,
267
  share=share
268
+ )