seperate doc and docx
Browse files
app.py
CHANGED
|
@@ -405,11 +405,23 @@ pdf_to_text = gr.Interface(
|
|
| 405 |
api_name="pdf_to_text",
|
| 406 |
)
|
| 407 |
|
| 408 |
-
doc_or_docx_to_text = gr.Interface(
|
| 409 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
gr.File(),
|
| 411 |
-
gr.Textbox(
|
| 412 |
-
api_name="
|
| 413 |
)
|
| 414 |
|
| 415 |
# pptx_or_ppt_to_text = gr.Interface(
|
|
@@ -458,11 +470,12 @@ url_parser = gr.Interface(
|
|
| 458 |
api_name="url_to_text",
|
| 459 |
)
|
| 460 |
demo = gr.TabbedInterface(
|
| 461 |
-
[pdf_to_img, pdf_to_text,
|
| 462 |
[
|
| 463 |
"PDF to Image",
|
| 464 |
"Extract PDF Text",
|
| 465 |
-
"Extract DOC
|
|
|
|
| 466 |
"Extract PPT Text",
|
| 467 |
"Extract PPTX Text",
|
| 468 |
"Extract text from URL",
|
|
|
|
| 405 |
api_name="pdf_to_text",
|
| 406 |
)
|
| 407 |
|
| 408 |
+
# doc_or_docx_to_text = gr.Interface(
|
| 409 |
+
# extract_text_from_doc_or_docx,
|
| 410 |
+
# gr.File(),
|
| 411 |
+
# gr.Textbox(placeholder="Extracted text from DOC or DOCX will appear here"),
|
| 412 |
+
# api_name="doc_or_docx_to_text",
|
| 413 |
+
# )
|
| 414 |
+
doc_to_txt = gr.Interface(
|
| 415 |
+
convert_doc_to_text,
|
| 416 |
+
gr.File(),
|
| 417 |
+
gr.Textbox(),
|
| 418 |
+
api_name="doc_to_txt"
|
| 419 |
+
)
|
| 420 |
+
docx_to_txt = gr.Interface(
|
| 421 |
+
extract_text_from_docx,
|
| 422 |
gr.File(),
|
| 423 |
+
gr.Textbox(),
|
| 424 |
+
api_name="docx_to_txt"
|
| 425 |
)
|
| 426 |
|
| 427 |
# pptx_or_ppt_to_text = gr.Interface(
|
|
|
|
| 470 |
api_name="url_to_text",
|
| 471 |
)
|
| 472 |
demo = gr.TabbedInterface(
|
| 473 |
+
[pdf_to_img, pdf_to_text, doc_to_txt, docx_to_txt , ppt_to_text, pptx_to_text, url_parser, str_to_json],
|
| 474 |
[
|
| 475 |
"PDF to Image",
|
| 476 |
"Extract PDF Text",
|
| 477 |
+
"Extract DOC Text",
|
| 478 |
+
"Extract DOCX Text",
|
| 479 |
"Extract PPT Text",
|
| 480 |
"Extract PPTX Text",
|
| 481 |
"Extract text from URL",
|