Spaces:
Runtime error
Runtime error
| import fitz # PyMuPDF | |
| import pandas as pd | |
| from pptx import Presentation | |
| def extract_text_from_file(v_file_path): | |
| """ | |
| Extracts text from PDF, PPTX, or CSV files. | |
| """ | |
| v_text = "" | |
| if v_file_path.lower().endswith('.pdf'): | |
| obj_pdf = fitz.open(v_file_path) | |
| for obj_page in obj_pdf: | |
| v_text += obj_page.get_text() | |
| obj_pdf.close() | |
| elif v_file_path.lower().endswith('.pptx'): | |
| obj_ppt = Presentation(v_file_path) | |
| for obj_slide in obj_ppt.slides: | |
| for obj_shape in obj_slide.shapes: | |
| if obj_shape.has_text_frame: | |
| v_text += obj_shape.text_frame.text + "\n" | |
| elif v_file_path.lower().endswith('.csv'): | |
| v_data = pd.read_csv(v_file_path) | |
| v_text += v_data.to_string() | |
| return v_text | |