import asyncio import io import os import time from pathlib import Path from typing import Dict, Tuple import nest_asyncio from fastapi import UploadFile from llama_parse import LlamaParse LLAMAPARSE_API_KEY = os.getenv("LLAMAPARSE_API_KEY") parser = LlamaParse( api_key=LLAMAPARSE_API_KEY, result_type="markdown", num_workers=4, verbose=True, language="en", ) class DocumentParser: """DocumentParser is an asynchronous context manager class that provides functionality to parse the content of a document file. Methods: __aenter__() -> DocumentParser: Enter the runtime context related to this object. __aexit__(exc_type, exc_val, exc_tb) -> None: Exit the runtime context related to this object. parse_file_content(file_path: str) -> Tuple[Tuple[int, str], ...]: Parse document content using a parser library. file_path (str): Path to the file to parse. """ async def __aenter__(self): return self async def __aexit__(self, exc_type, exc_val, exc_tb): pass async def parse_file_content(self, file_path: str) -> Tuple[Tuple[int, str], ...]: """ Parse document content using parser library. Args: file_path: Path to the file to parse Returns: Tuple of (page_number, content) pairs """ loop = asyncio.get_event_loop() result = await loop.run_in_executor(None, parser.load_data, file_path) if not result: return tuple() return tuple( (i, page.text.strip()) for i, page in enumerate(result, start=1) if hasattr(page, "text") and page.text )