Spaces:
Paused
Paused
change in UI
Browse files- src/ui/ui.py +23 -87
src/ui/ui.py
CHANGED
|
@@ -35,44 +35,6 @@ def format_markdown_content(content):
|
|
| 35 |
html_content = markdown.markdown(str(content), extensions=['tables'])
|
| 36 |
return html_content
|
| 37 |
|
| 38 |
-
|
| 39 |
-
def split_content_into_pages(content, chars_per_page=6000):
|
| 40 |
-
if not content:
|
| 41 |
-
return ["No content to display"]
|
| 42 |
-
|
| 43 |
-
# Split by natural breaks (double newlines) first
|
| 44 |
-
sections = str(content).split('\n\n')
|
| 45 |
-
pages = []
|
| 46 |
-
current_page = []
|
| 47 |
-
current_length = 0
|
| 48 |
-
|
| 49 |
-
for section in sections:
|
| 50 |
-
section_length = len(section) + 2 # +2 for double newline
|
| 51 |
-
|
| 52 |
-
if current_length + section_length > chars_per_page and current_page:
|
| 53 |
-
# Format each page with markdown
|
| 54 |
-
page_content = '\n\n'.join(current_page)
|
| 55 |
-
pages.append(format_markdown_content(page_content))
|
| 56 |
-
current_page = [section]
|
| 57 |
-
current_length = section_length
|
| 58 |
-
else:
|
| 59 |
-
current_page.append(section)
|
| 60 |
-
current_length += section_length
|
| 61 |
-
|
| 62 |
-
if current_page:
|
| 63 |
-
# Format the last page with markdown
|
| 64 |
-
page_content = '\n\n'.join(current_page)
|
| 65 |
-
pages.append(format_markdown_content(page_content))
|
| 66 |
-
|
| 67 |
-
return pages
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
def update_page_content(pages, page_number):
|
| 71 |
-
if not pages or page_number < 1 or page_number > len(pages):
|
| 72 |
-
return "Invalid page", page_number, "Page 0/0"
|
| 73 |
-
return str(pages[page_number - 1]), page_number, f"Page {page_number}/{len(pages)}"
|
| 74 |
-
|
| 75 |
-
|
| 76 |
# Function to run conversion in a separate thread
|
| 77 |
def run_conversion_thread(file_path, parser_name, ocr_method_name, output_format):
|
| 78 |
"""Run the conversion in a separate thread and return the thread object"""
|
|
@@ -100,7 +62,6 @@ def run_conversion_thread(file_path, parser_name, ocr_method_name, output_format
|
|
| 100 |
|
| 101 |
return thread, results
|
| 102 |
|
| 103 |
-
|
| 104 |
def handle_convert(file_path, parser_name, ocr_method_name, output_format, is_cancelled):
|
| 105 |
"""Handle file conversion."""
|
| 106 |
global conversion_cancelled
|
|
@@ -108,7 +69,7 @@ def handle_convert(file_path, parser_name, ocr_method_name, output_format, is_ca
|
|
| 108 |
# Check if we should cancel before starting
|
| 109 |
if is_cancelled:
|
| 110 |
logger.info("Conversion cancelled before starting")
|
| 111 |
-
return "Conversion cancelled.", None,
|
| 112 |
|
| 113 |
logger.info("Starting conversion with cancellation flag cleared")
|
| 114 |
|
|
@@ -129,14 +90,14 @@ def handle_convert(file_path, parser_name, ocr_method_name, output_format, is_ca
|
|
| 129 |
thread.join(timeout=0.5)
|
| 130 |
if thread.is_alive():
|
| 131 |
logger.warning("Thread did not finish within timeout")
|
| 132 |
-
return "Conversion cancelled.", None,
|
| 133 |
|
| 134 |
# Sleep briefly to avoid busy waiting
|
| 135 |
time.sleep(0.1)
|
| 136 |
|
| 137 |
# Thread has completed, check results
|
| 138 |
if results["error"]:
|
| 139 |
-
return f"Error: {results['error']}", None,
|
| 140 |
|
| 141 |
content = results["content"]
|
| 142 |
download_file = results["download_file"]
|
|
@@ -144,32 +105,29 @@ def handle_convert(file_path, parser_name, ocr_method_name, output_format, is_ca
|
|
| 144 |
# If conversion returned a cancellation message
|
| 145 |
if content == "Conversion cancelled.":
|
| 146 |
logger.info("Converter returned cancellation message")
|
| 147 |
-
return content, None,
|
| 148 |
|
| 149 |
-
#
|
| 150 |
-
|
| 151 |
-
page_info = f"Page 1/{len(pages)}"
|
| 152 |
|
| 153 |
logger.info("Conversion completed successfully")
|
| 154 |
-
return
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
def handle_page_navigation(direction, current, pages):
|
| 158 |
-
new_page = current + direction
|
| 159 |
-
if new_page < 1:
|
| 160 |
-
new_page = 1
|
| 161 |
-
elif new_page > len(pages):
|
| 162 |
-
new_page = len(pages)
|
| 163 |
-
content, page_num, page_info = update_page_content(pages, new_page)
|
| 164 |
-
return content, new_page, page_info
|
| 165 |
-
|
| 166 |
|
| 167 |
def create_ui():
|
| 168 |
with gr.Blocks(css="""
|
| 169 |
-
.
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
""") as demo:
|
| 174 |
gr.Markdown("Markit: Convert any documents to Markdown")
|
| 175 |
|
|
@@ -182,18 +140,8 @@ def create_ui():
|
|
| 182 |
with gr.Tab("Upload and Convert"):
|
| 183 |
file_input = gr.File(label="Upload PDF", type="filepath")
|
| 184 |
|
| 185 |
-
#
|
| 186 |
-
|
| 187 |
-
current_page = gr.State(1)
|
| 188 |
-
file_display = gr.Markdown(label="Converted Markdown")
|
| 189 |
-
|
| 190 |
-
with gr.Row(visible=False) as navigation_row:
|
| 191 |
-
with gr.Column(scale=1):
|
| 192 |
-
prev_btn = gr.Button("←", elem_classes=["page-navigation"])
|
| 193 |
-
with gr.Column(scale=1):
|
| 194 |
-
page_info = gr.Markdown("Page 1/1", elem_classes=["page-info"])
|
| 195 |
-
with gr.Column(scale=1):
|
| 196 |
-
next_btn = gr.Button("→", elem_classes=["page-navigation"])
|
| 197 |
|
| 198 |
file_download = gr.File(label="Download File")
|
| 199 |
|
|
@@ -278,7 +226,7 @@ def create_ui():
|
|
| 278 |
).then(
|
| 279 |
fn=handle_convert,
|
| 280 |
inputs=[file_input, provider_dropdown, ocr_dropdown, output_format, cancel_requested],
|
| 281 |
-
outputs=[file_display, file_download,
|
| 282 |
)
|
| 283 |
|
| 284 |
# Handle cancel button click
|
|
@@ -289,18 +237,6 @@ def create_ui():
|
|
| 289 |
queue=False # Execute immediately
|
| 290 |
)
|
| 291 |
|
| 292 |
-
prev_btn.click(
|
| 293 |
-
fn=lambda curr, pages: handle_page_navigation(-1, curr, pages),
|
| 294 |
-
inputs=[current_page, content_pages],
|
| 295 |
-
outputs=[file_display, current_page, page_info]
|
| 296 |
-
)
|
| 297 |
-
|
| 298 |
-
next_btn.click(
|
| 299 |
-
fn=lambda curr, pages: handle_page_navigation(1, curr, pages),
|
| 300 |
-
inputs=[current_page, content_pages],
|
| 301 |
-
outputs=[file_display, current_page, page_info]
|
| 302 |
-
)
|
| 303 |
-
|
| 304 |
file_display.change(
|
| 305 |
lambda text: text,
|
| 306 |
inputs=[file_display],
|
|
|
|
| 35 |
html_content = markdown.markdown(str(content), extensions=['tables'])
|
| 36 |
return html_content
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# Function to run conversion in a separate thread
|
| 39 |
def run_conversion_thread(file_path, parser_name, ocr_method_name, output_format):
|
| 40 |
"""Run the conversion in a separate thread and return the thread object"""
|
|
|
|
| 62 |
|
| 63 |
return thread, results
|
| 64 |
|
|
|
|
| 65 |
def handle_convert(file_path, parser_name, ocr_method_name, output_format, is_cancelled):
|
| 66 |
"""Handle file conversion."""
|
| 67 |
global conversion_cancelled
|
|
|
|
| 69 |
# Check if we should cancel before starting
|
| 70 |
if is_cancelled:
|
| 71 |
logger.info("Conversion cancelled before starting")
|
| 72 |
+
return "Conversion cancelled.", None, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), None
|
| 73 |
|
| 74 |
logger.info("Starting conversion with cancellation flag cleared")
|
| 75 |
|
|
|
|
| 90 |
thread.join(timeout=0.5)
|
| 91 |
if thread.is_alive():
|
| 92 |
logger.warning("Thread did not finish within timeout")
|
| 93 |
+
return "Conversion cancelled.", None, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), None
|
| 94 |
|
| 95 |
# Sleep briefly to avoid busy waiting
|
| 96 |
time.sleep(0.1)
|
| 97 |
|
| 98 |
# Thread has completed, check results
|
| 99 |
if results["error"]:
|
| 100 |
+
return f"Error: {results['error']}", None, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), None
|
| 101 |
|
| 102 |
content = results["content"]
|
| 103 |
download_file = results["download_file"]
|
|
|
|
| 105 |
# If conversion returned a cancellation message
|
| 106 |
if content == "Conversion cancelled.":
|
| 107 |
logger.info("Converter returned cancellation message")
|
| 108 |
+
return content, None, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), None
|
| 109 |
|
| 110 |
+
# Format the content
|
| 111 |
+
formatted_content = format_markdown_content(str(content))
|
|
|
|
| 112 |
|
| 113 |
logger.info("Conversion completed successfully")
|
| 114 |
+
return formatted_content, download_file, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
def create_ui():
|
| 117 |
with gr.Blocks(css="""
|
| 118 |
+
.markdown-display {
|
| 119 |
+
height: 600px;
|
| 120 |
+
overflow-y: auto;
|
| 121 |
+
border: 1px solid #ddd;
|
| 122 |
+
padding: 1rem;
|
| 123 |
+
margin-bottom: 1rem;
|
| 124 |
+
}
|
| 125 |
+
.processing-controls {
|
| 126 |
+
display: flex;
|
| 127 |
+
justify-content: center;
|
| 128 |
+
gap: 10px;
|
| 129 |
+
margin-top: 10px;
|
| 130 |
+
}
|
| 131 |
""") as demo:
|
| 132 |
gr.Markdown("Markit: Convert any documents to Markdown")
|
| 133 |
|
|
|
|
| 140 |
with gr.Tab("Upload and Convert"):
|
| 141 |
file_input = gr.File(label="Upload PDF", type="filepath")
|
| 142 |
|
| 143 |
+
# Single scrollable content display
|
| 144 |
+
file_display = gr.HTML(label="Converted Markdown", elem_classes=["markdown-display"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
file_download = gr.File(label="Download File")
|
| 147 |
|
|
|
|
| 226 |
).then(
|
| 227 |
fn=handle_convert,
|
| 228 |
inputs=[file_input, provider_dropdown, ocr_dropdown, output_format, cancel_requested],
|
| 229 |
+
outputs=[file_display, file_download, convert_button, cancel_button, conversion_thread]
|
| 230 |
)
|
| 231 |
|
| 232 |
# Handle cancel button click
|
|
|
|
| 237 |
queue=False # Execute immediately
|
| 238 |
)
|
| 239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
file_display.change(
|
| 241 |
lambda text: text,
|
| 242 |
inputs=[file_display],
|