Spaces:
Runtime error
Runtime error
| import os | |
| from fastapi import UploadFile | |
| import mimetypes | |
| from app.parser.parsers import * | |
| from app.schemas.document import Document | |
| async def get_document_from_file(file: UploadFile, temp_file_path="/tmp/temp_file"): | |
| mimetype = file.content_type | |
| stream = await file.read() | |
| with open(temp_file_path, "wb") as file: | |
| file.write(stream) | |
| try: | |
| parsed_text = await extract_text_with_mimetype(temp_file_path, mimetype) | |
| except Exception as e: | |
| os.remove(temp_file_path) | |
| raise Exception("Couldn't get document from file") | |
| os.remove(temp_file_path) | |
| return Document( | |
| text=parsed_text, | |
| ) | |
| async def extract_text_with_mimetype(file_path, mimetype): | |
| if mimetype is None: | |
| mimetype, _ = mimetypes.guess_type(file_path) | |
| if mimetype is None: | |
| raise Exception("Unsupported file type") | |
| if mimetype == "application/pdf": | |
| parsed_text = PdfParser.parse(file_path) | |
| elif mimetype == "text/plain": | |
| parsed_text = TxtParser.parse(file_path) | |
| elif ( | |
| mimetype | |
| == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" | |
| ): | |
| parsed_text = DocxParser.parse(file_path) | |
| return parsed_text | |