Spaces:

rbughao
/

MarkdownMaker

Sleeping

App Files Files Community

rbughao commited on Jan 26

Commit

fe22f31

verified ·

1 Parent(s): 7f7bfa6

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -17

app.py CHANGED Viewed

@@ -8,47 +8,49 @@ from markitdown import MarkItDown
 md = MarkItDown()
 def convert_file(file, output_format):
-    """
-    Convert the uploaded file using MarkItDown and return the extracted text,
-    plus a downloadable file in the chosen format.
-    """
     if file is None:
         return gr.update(value="Please upload a file."), None
     try:
-        # MarkItDown accepts a path-like string; gradio gives a temp file object
         result = md.convert(file.name)
-        # Prefer text_content; fall back to markdown_content if available
         text = getattr(result, "text_content", None)
         if not text:
             text = getattr(result, "markdown_content", "")
         if not text:
             text = "No textual content extracted."
-        # Build downloadable content
         base = os.path.splitext(os.path.basename(file.name))[0]
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
         if output_format == "markdown":
             out_name = f"{base}_extracted_{timestamp}.md"
-            bytes_io = io.BytesIO(text.encode("utf-8"))
         else:
             out_name = f"{base}_extracted_{timestamp}.txt"
-            bytes_io = io.BytesIO(text.encode("utf-8"))
         bytes_io.seek(0)
         return text, (out_name, bytes_io)
     except Exception as e:
-        # Be user-friendly but retain the message for debugging in the UI
         return f"❌ Conversion failed: {e}", None
 with gr.Blocks(title="MarkItDown - Document Extractor") as demo:
     gr.Markdown(
         """
         # 📝 MarkItDown – Document Text Extractor
-        Upload a **PDF, DOCX, PPTX, EML, HTML**, or similar file and extract clean text using [MarkItDown](https://github.com/microsoft/markitdown).
         """
     )
@@ -68,11 +70,11 @@ with gr.Blocks(title="MarkItDown - Document Extractor") as demo:
     with gr.Row():
         convert_btn = gr.Button("Convert", variant="primary")
         clear_btn = gr.Button("Clear")
     text_output = gr.Textbox(
         label="Extracted Text",
-        lines=20,
-        show_copy_button=True
     )
     download_file = gr.File(
         label="Download Extracted File",
@@ -86,15 +88,15 @@ with gr.Blocks(title="MarkItDown - Document Extractor") as demo:
         api_name="convert"
     )
-    def clear():
-        return None, "",""
     clear_btn.click(
         fn=lambda: (None, "", None),
         inputs=[],
         outputs=[file_input, text_output, download_file]
     )
 if __name__ == "__main__":
-    # Spaces will call `demo.launch()` automatically, but this helps local runs.
     demo.launch()

 md = MarkItDown()
 def convert_file(file, output_format):
     if file is None:
         return gr.update(value="Please upload a file."), None
     try:
         result = md.convert(file.name)
         text = getattr(result, "text_content", None)
         if not text:
             text = getattr(result, "markdown_content", "")
         if not text:
             text = "No textual content extracted."
         base = os.path.splitext(os.path.basename(file.name))[0]
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
         if output_format == "markdown":
             out_name = f"{base}_extracted_{timestamp}.md"
         else:
             out_name = f"{base}_extracted_{timestamp}.txt"
+        bytes_io = io.BytesIO(text.encode("utf-8"))
         bytes_io.seek(0)
         return text, (out_name, bytes_io)
     except Exception as e:
         return f"❌ Conversion failed: {e}", None
+copy_js = """
+() => {
+  const tb = document.querySelector('textarea');
+  if (!tb) { alert('Nothing to copy'); return; }
+  tb.select();
+  document.execCommand('copy');
+  alert('Copied to clipboard');
+}
+"""
 with gr.Blocks(title="MarkItDown - Document Extractor") as demo:
     gr.Markdown(
         """
         # 📝 MarkItDown – Document Text Extractor
+        Upload a **PDF, DOCX, PPTX, EML, HTML**, or similar file and extract clean text using https://github.com/microsoft/markitdown.
         """
     )
     with gr.Row():
         convert_btn = gr.Button("Convert", variant="primary")
         clear_btn = gr.Button("Clear")
+        copy_btn = gr.Button("Copy Text")
     text_output = gr.Textbox(
         label="Extracted Text",
+        lines=20
     )
     download_file = gr.File(
         label="Download Extracted File",
         api_name="convert"
     )
     clear_btn.click(
         fn=lambda: (None, "", None),
         inputs=[],
         outputs=[file_input, text_output, download_file]
     )
+    # Client-side copy to clipboard
+    copy_btn.click(None, [], [], _js=copy_js)
 if __name__ == "__main__":
     demo.launch()