Spaces:
Runtime error
Runtime error
| import asyncio | |
| import io | |
| import os | |
| import time | |
| from pathlib import Path | |
| from typing import Dict, Tuple | |
| import nest_asyncio | |
| from fastapi import UploadFile | |
| from llama_parse import LlamaParse | |
| LLAMAPARSE_API_KEY = os.getenv("LLAMAPARSE_API_KEY") | |
| parser = LlamaParse( | |
| api_key=LLAMAPARSE_API_KEY, | |
| result_type="markdown", | |
| num_workers=4, | |
| verbose=True, | |
| language="en", | |
| ) | |
| class DocumentParser: | |
| """DocumentParser is an asynchronous context manager class that provides functionality to parse the content of a document file. | |
| Methods: | |
| __aenter__() -> DocumentParser: | |
| Enter the runtime context related to this object. | |
| __aexit__(exc_type, exc_val, exc_tb) -> None: | |
| Exit the runtime context related to this object. | |
| parse_file_content(file_path: str) -> Tuple[Tuple[int, str], ...]: | |
| Parse document content using a parser library. | |
| file_path (str): Path to the file to parse. | |
| """ | |
| async def __aenter__(self): | |
| return self | |
| async def __aexit__(self, exc_type, exc_val, exc_tb): | |
| pass | |
| async def parse_file_content(self, file_path: str) -> Tuple[Tuple[int, str], ...]: | |
| """ | |
| Parse document content using parser library. | |
| Args: | |
| file_path: Path to the file to parse | |
| Returns: | |
| Tuple of (page_number, content) pairs | |
| """ | |
| loop = asyncio.get_event_loop() | |
| result = await loop.run_in_executor(None, parser.load_data, file_path) | |
| if not result: | |
| return tuple() | |
| return tuple( | |
| (i, page.text.strip()) | |
| for i, page in enumerate(result, start=1) | |
| if hasattr(page, "text") and page.text | |
| ) | |