update app
Browse files
app.py
CHANGED
|
@@ -221,6 +221,7 @@ def extract_text_from_pptx(file_path):
|
|
| 221 |
|
| 222 |
def extract_text_from_ppt(file_path):
|
| 223 |
try:
|
|
|
|
| 224 |
# Convert PPT to PPTX using unoconv
|
| 225 |
pptx_file_path = os.path.splitext(file_path)[0] + ".pptx"
|
| 226 |
subprocess.run(["unoconv", "-f", "pptx", file_path], check=True)
|
|
@@ -417,6 +418,13 @@ pptx_or_ppt_to_text = gr.Interface(
|
|
| 417 |
api_name="pptx_or_ppt_to_text",
|
| 418 |
)
|
| 419 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
str_to_json = gr.Interface(
|
| 421 |
sanitize_list_of_lists,
|
| 422 |
gr.Text(),
|
|
@@ -443,12 +451,13 @@ url_parser = gr.Interface(
|
|
| 443 |
api_name="url_to_text",
|
| 444 |
)
|
| 445 |
demo = gr.TabbedInterface(
|
| 446 |
-
[pdf_to_img, pdf_to_text, doc_or_docx_to_text, pptx_or_ppt_to_text, url_parser, str_to_json],
|
| 447 |
[
|
| 448 |
"PDF to Image",
|
| 449 |
"Extract PDF Text",
|
| 450 |
"Extract DOC/DOCX Text",
|
| 451 |
"Extract PPTX/PPT Text",
|
|
|
|
| 452 |
"Extract text from URL",
|
| 453 |
"Extract Json",
|
| 454 |
],
|
|
|
|
| 221 |
|
| 222 |
def extract_text_from_ppt(file_path):
|
| 223 |
try:
|
| 224 |
+
print("file_path = ",file_path)
|
| 225 |
# Convert PPT to PPTX using unoconv
|
| 226 |
pptx_file_path = os.path.splitext(file_path)[0] + ".pptx"
|
| 227 |
subprocess.run(["unoconv", "-f", "pptx", file_path], check=True)
|
|
|
|
| 418 |
api_name="pptx_or_ppt_to_text",
|
| 419 |
)
|
| 420 |
|
| 421 |
+
ppt_to_text = gr.Interface(
|
| 422 |
+
extract_text_from_ppt,
|
| 423 |
+
gr.File(),
|
| 424 |
+
gr.Textbox(),
|
| 425 |
+
api_name="ppt_to_text",
|
| 426 |
+
)
|
| 427 |
+
|
| 428 |
str_to_json = gr.Interface(
|
| 429 |
sanitize_list_of_lists,
|
| 430 |
gr.Text(),
|
|
|
|
| 451 |
api_name="url_to_text",
|
| 452 |
)
|
| 453 |
demo = gr.TabbedInterface(
|
| 454 |
+
[pdf_to_img, pdf_to_text, doc_or_docx_to_text, pptx_or_ppt_to_text, ppt_to_text, url_parser, str_to_json],
|
| 455 |
[
|
| 456 |
"PDF to Image",
|
| 457 |
"Extract PDF Text",
|
| 458 |
"Extract DOC/DOCX Text",
|
| 459 |
"Extract PPTX/PPT Text",
|
| 460 |
+
"Extract PPT Text",
|
| 461 |
"Extract text from URL",
|
| 462 |
"Extract Json",
|
| 463 |
],
|