Spaces:

not-lain
/

utils

Sleeping

not-lain commited on Nov 29, 2024

Commit

5d2e8ec

1 Parent(s): 0772fb4

seperate doc and docx

Files changed (1) hide show

app.py CHANGED Viewed

@@ -405,11 +405,23 @@ pdf_to_text = gr.Interface(
     api_name="pdf_to_text",
 )
-doc_or_docx_to_text = gr.Interface(
-    extract_text_from_doc_or_docx,
     gr.File(),
-    gr.Textbox(placeholder="Extracted text from DOC or DOCX will appear here"),
-    api_name="doc_or_docx_to_text",
 )
 # pptx_or_ppt_to_text = gr.Interface(
@@ -458,11 +470,12 @@ url_parser = gr.Interface(
     api_name="url_to_text",
 )
 demo = gr.TabbedInterface(
-    [pdf_to_img, pdf_to_text, doc_or_docx_to_text, ppt_to_text, pptx_to_text, url_parser, str_to_json],
     [
         "PDF to Image",
         "Extract PDF Text",
-        "Extract DOC/DOCX Text",
         "Extract PPT Text",
         "Extract PPTX Text",
         "Extract text from URL",

     api_name="pdf_to_text",
 )
+# doc_or_docx_to_text = gr.Interface(
+#     extract_text_from_doc_or_docx,
+#     gr.File(),
+#     gr.Textbox(placeholder="Extracted text from DOC or DOCX will appear here"),
+#     api_name="doc_or_docx_to_text",
+# )
+doc_to_txt = gr.Interface(
+    convert_doc_to_text,
+    gr.File(),
+    gr.Textbox(),
+    api_name="doc_to_txt"
+)
+docx_to_txt = gr.Interface(
+    extract_text_from_docx,
     gr.File(),
+    gr.Textbox(),
+    api_name="docx_to_txt"
 )
 # pptx_or_ppt_to_text = gr.Interface(
     api_name="url_to_text",
 )
 demo = gr.TabbedInterface(
+    [pdf_to_img, pdf_to_text, doc_to_txt, docx_to_txt , ppt_to_text, pptx_to_text, url_parser, str_to_json],
     [
         "PDF to Image",
         "Extract PDF Text",
+        "Extract DOC Text",
+        "Extract DOCX Text",
         "Extract PPT Text",
         "Extract PPTX Text",
         "Extract text from URL",