Spaces:
Paused
Paused
| from smolagents import tool | |
| import PyPDF2 | |
| def extract_text_from_pdf(pdf_path: str) -> str: | |
| """Extracts all text content from a PDF file. | |
| Args: | |
| pdf_path: The file path to the PDF file to extract text from (e.g., '/tmp/document.pdf') | |
| Returns: | |
| The extracted text content from the PDF file | |
| """ | |
| try: | |
| extracted_text = [] | |
| with open(pdf_path, 'rb') as pdf_file: | |
| pdf_reader = PyPDF2.PdfReader(pdf_file) | |
| num_pages = len(pdf_reader.pages) | |
| for page_num in range(num_pages): | |
| page = pdf_reader.pages[page_num] | |
| text = page.extract_text() | |
| extracted_text.append(f"--- Page {page_num + 1} ---\n{text}") | |
| return "\n\n".join(extracted_text) | |
| except FileNotFoundError: | |
| return f"Error: PDF file not found at path: {pdf_path}" | |
| except Exception as e: | |
| return f"Error extracting text from PDF: {str(e)}" | |