AnseMin commited on
Commit
38d1b1a
·
1 Parent(s): 3416b72

change in UI

Browse files
Files changed (1) hide show
  1. src/ui/ui.py +23 -87
src/ui/ui.py CHANGED
@@ -35,44 +35,6 @@ def format_markdown_content(content):
35
  html_content = markdown.markdown(str(content), extensions=['tables'])
36
  return html_content
37
 
38
-
39
- def split_content_into_pages(content, chars_per_page=6000):
40
- if not content:
41
- return ["No content to display"]
42
-
43
- # Split by natural breaks (double newlines) first
44
- sections = str(content).split('\n\n')
45
- pages = []
46
- current_page = []
47
- current_length = 0
48
-
49
- for section in sections:
50
- section_length = len(section) + 2 # +2 for double newline
51
-
52
- if current_length + section_length > chars_per_page and current_page:
53
- # Format each page with markdown
54
- page_content = '\n\n'.join(current_page)
55
- pages.append(format_markdown_content(page_content))
56
- current_page = [section]
57
- current_length = section_length
58
- else:
59
- current_page.append(section)
60
- current_length += section_length
61
-
62
- if current_page:
63
- # Format the last page with markdown
64
- page_content = '\n\n'.join(current_page)
65
- pages.append(format_markdown_content(page_content))
66
-
67
- return pages
68
-
69
-
70
- def update_page_content(pages, page_number):
71
- if not pages or page_number < 1 or page_number > len(pages):
72
- return "Invalid page", page_number, "Page 0/0"
73
- return str(pages[page_number - 1]), page_number, f"Page {page_number}/{len(pages)}"
74
-
75
-
76
  # Function to run conversion in a separate thread
77
  def run_conversion_thread(file_path, parser_name, ocr_method_name, output_format):
78
  """Run the conversion in a separate thread and return the thread object"""
@@ -100,7 +62,6 @@ def run_conversion_thread(file_path, parser_name, ocr_method_name, output_format
100
 
101
  return thread, results
102
 
103
-
104
  def handle_convert(file_path, parser_name, ocr_method_name, output_format, is_cancelled):
105
  """Handle file conversion."""
106
  global conversion_cancelled
@@ -108,7 +69,7 @@ def handle_convert(file_path, parser_name, ocr_method_name, output_format, is_ca
108
  # Check if we should cancel before starting
109
  if is_cancelled:
110
  logger.info("Conversion cancelled before starting")
111
- return "Conversion cancelled.", None, [], 1, "", gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), None
112
 
113
  logger.info("Starting conversion with cancellation flag cleared")
114
 
@@ -129,14 +90,14 @@ def handle_convert(file_path, parser_name, ocr_method_name, output_format, is_ca
129
  thread.join(timeout=0.5)
130
  if thread.is_alive():
131
  logger.warning("Thread did not finish within timeout")
132
- return "Conversion cancelled.", None, [], 1, "", gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), None
133
 
134
  # Sleep briefly to avoid busy waiting
135
  time.sleep(0.1)
136
 
137
  # Thread has completed, check results
138
  if results["error"]:
139
- return f"Error: {results['error']}", None, [], 1, "", gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), None
140
 
141
  content = results["content"]
142
  download_file = results["download_file"]
@@ -144,32 +105,29 @@ def handle_convert(file_path, parser_name, ocr_method_name, output_format, is_ca
144
  # If conversion returned a cancellation message
145
  if content == "Conversion cancelled.":
146
  logger.info("Converter returned cancellation message")
147
- return content, None, [], 1, "", gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), None
148
 
149
- # Process results
150
- pages = split_content_into_pages(str(content))
151
- page_info = f"Page 1/{len(pages)}"
152
 
153
  logger.info("Conversion completed successfully")
154
- return str(pages[0]) if pages else "", download_file, pages, 1, page_info, gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), None
155
-
156
-
157
- def handle_page_navigation(direction, current, pages):
158
- new_page = current + direction
159
- if new_page < 1:
160
- new_page = 1
161
- elif new_page > len(pages):
162
- new_page = len(pages)
163
- content, page_num, page_info = update_page_content(pages, new_page)
164
- return content, new_page, page_info
165
-
166
 
167
  def create_ui():
168
  with gr.Blocks(css="""
169
- .page-navigation { text-align: center; margin-top: 1rem; }
170
- .page-navigation button { margin: 0 0.5rem; }
171
- .page-info { display: inline-block; margin: 0 1rem; }
172
- .processing-controls { display: flex; justify-content: center; gap: 10px; margin-top: 10px; }
 
 
 
 
 
 
 
 
 
173
  """) as demo:
174
  gr.Markdown("Markit: Convert any documents to Markdown")
175
 
@@ -182,18 +140,8 @@ def create_ui():
182
  with gr.Tab("Upload and Convert"):
183
  file_input = gr.File(label="Upload PDF", type="filepath")
184
 
185
- # Content display with navigation
186
- content_pages = gr.State([])
187
- current_page = gr.State(1)
188
- file_display = gr.Markdown(label="Converted Markdown")
189
-
190
- with gr.Row(visible=False) as navigation_row:
191
- with gr.Column(scale=1):
192
- prev_btn = gr.Button("←", elem_classes=["page-navigation"])
193
- with gr.Column(scale=1):
194
- page_info = gr.Markdown("Page 1/1", elem_classes=["page-info"])
195
- with gr.Column(scale=1):
196
- next_btn = gr.Button("→", elem_classes=["page-navigation"])
197
 
198
  file_download = gr.File(label="Download File")
199
 
@@ -278,7 +226,7 @@ def create_ui():
278
  ).then(
279
  fn=handle_convert,
280
  inputs=[file_input, provider_dropdown, ocr_dropdown, output_format, cancel_requested],
281
- outputs=[file_display, file_download, content_pages, current_page, page_info, navigation_row, convert_button, cancel_button, conversion_thread]
282
  )
283
 
284
  # Handle cancel button click
@@ -289,18 +237,6 @@ def create_ui():
289
  queue=False # Execute immediately
290
  )
291
 
292
- prev_btn.click(
293
- fn=lambda curr, pages: handle_page_navigation(-1, curr, pages),
294
- inputs=[current_page, content_pages],
295
- outputs=[file_display, current_page, page_info]
296
- )
297
-
298
- next_btn.click(
299
- fn=lambda curr, pages: handle_page_navigation(1, curr, pages),
300
- inputs=[current_page, content_pages],
301
- outputs=[file_display, current_page, page_info]
302
- )
303
-
304
  file_display.change(
305
  lambda text: text,
306
  inputs=[file_display],
 
35
  html_content = markdown.markdown(str(content), extensions=['tables'])
36
  return html_content
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # Function to run conversion in a separate thread
39
  def run_conversion_thread(file_path, parser_name, ocr_method_name, output_format):
40
  """Run the conversion in a separate thread and return the thread object"""
 
62
 
63
  return thread, results
64
 
 
65
  def handle_convert(file_path, parser_name, ocr_method_name, output_format, is_cancelled):
66
  """Handle file conversion."""
67
  global conversion_cancelled
 
69
  # Check if we should cancel before starting
70
  if is_cancelled:
71
  logger.info("Conversion cancelled before starting")
72
+ return "Conversion cancelled.", None, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), None
73
 
74
  logger.info("Starting conversion with cancellation flag cleared")
75
 
 
90
  thread.join(timeout=0.5)
91
  if thread.is_alive():
92
  logger.warning("Thread did not finish within timeout")
93
+ return "Conversion cancelled.", None, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), None
94
 
95
  # Sleep briefly to avoid busy waiting
96
  time.sleep(0.1)
97
 
98
  # Thread has completed, check results
99
  if results["error"]:
100
+ return f"Error: {results['error']}", None, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), None
101
 
102
  content = results["content"]
103
  download_file = results["download_file"]
 
105
  # If conversion returned a cancellation message
106
  if content == "Conversion cancelled.":
107
  logger.info("Converter returned cancellation message")
108
+ return content, None, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), None
109
 
110
+ # Format the content
111
+ formatted_content = format_markdown_content(str(content))
 
112
 
113
  logger.info("Conversion completed successfully")
114
+ return formatted_content, download_file, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), None
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  def create_ui():
117
  with gr.Blocks(css="""
118
+ .markdown-display {
119
+ height: 600px;
120
+ overflow-y: auto;
121
+ border: 1px solid #ddd;
122
+ padding: 1rem;
123
+ margin-bottom: 1rem;
124
+ }
125
+ .processing-controls {
126
+ display: flex;
127
+ justify-content: center;
128
+ gap: 10px;
129
+ margin-top: 10px;
130
+ }
131
  """) as demo:
132
  gr.Markdown("Markit: Convert any documents to Markdown")
133
 
 
140
  with gr.Tab("Upload and Convert"):
141
  file_input = gr.File(label="Upload PDF", type="filepath")
142
 
143
+ # Single scrollable content display
144
+ file_display = gr.HTML(label="Converted Markdown", elem_classes=["markdown-display"])
 
 
 
 
 
 
 
 
 
 
145
 
146
  file_download = gr.File(label="Download File")
147
 
 
226
  ).then(
227
  fn=handle_convert,
228
  inputs=[file_input, provider_dropdown, ocr_dropdown, output_format, cancel_requested],
229
+ outputs=[file_display, file_download, convert_button, cancel_button, conversion_thread]
230
  )
231
 
232
  # Handle cancel button click
 
237
  queue=False # Execute immediately
238
  )
239
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  file_display.change(
241
  lambda text: text,
242
  inputs=[file_display],