Spaces:
Sleeping
Sleeping
| import fitz | |
| from pdf2docx import Converter | |
| import pypandoc | |
| def export_to_word(pdf_path: str, output_path: str = "output.docx") -> str: | |
| """Convert PDF to DOCX (layout-aware).""" | |
| cv = Converter(pdf_path) | |
| cv.convert(output_path, start=0, end=None) | |
| cv.close() | |
| return output_path | |
| def export_to_text(pdf_path: str, output_path: str = "output.txt") -> str: | |
| """Export selectable text to TXT.""" | |
| text = "" | |
| with fitz.open(pdf_path) as doc: | |
| for page in doc: | |
| text += page.get_text() | |
| with open(output_path, "w", encoding="utf-8") as f: | |
| f.write(text) | |
| return output_path | |
| def export_text_to_markdown(text: str, output_path: str = "output.md") -> str: | |
| """Export text (already extracted) to Markdown.""" | |
| pypandoc.convert_text(text, "md", format="md", outputfile=output_path, extra_args=["--standalone"]) | |
| return output_path | |