Spaces:

sifars
/

mandala-for-us

Runtime error

File size: 1,741 Bytes

adb221d
040da4c
 
adb221d
040da4c
adb221d
 
 
040da4c
adb221d
040da4c
 
 
 
 
 
 
 
 
 
a540238
 
adb221d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a540238
 
 
 
 
 
 
b4a5816
a540238

import asyncio
import io
import os
import time
from pathlib import Path
from typing import Dict, Tuple

import nest_asyncio
from fastapi import UploadFile
from llama_parse import LlamaParse

LLAMAPARSE_API_KEY = os.getenv("LLAMAPARSE_API_KEY")
parser = LlamaParse(
    api_key=LLAMAPARSE_API_KEY,
    result_type="markdown",
    num_workers=4,
    verbose=True,
    language="en",
)


class DocumentParser:
    """DocumentParser is an asynchronous context manager class that provides functionality to parse the content of a document file.

    Methods:
        __aenter__() -> DocumentParser:
            Enter the runtime context related to this object.

        __aexit__(exc_type, exc_val, exc_tb) -> None:
            Exit the runtime context related to this object.

        parse_file_content(file_path: str) -> Tuple[Tuple[int, str], ...]:
            Parse document content using a parser library.

                file_path (str): Path to the file to parse.

    """

    async def __aenter__(self):
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        pass

    async def parse_file_content(self, file_path: str) -> Tuple[Tuple[int, str], ...]:
        """
        Parse document content using parser library.

        Args:
            file_path: Path to the file to parse

        Returns:
            Tuple of (page_number, content) pairs
        """
        loop = asyncio.get_event_loop()
        result = await loop.run_in_executor(None, parser.load_data, file_path)

        if not result:
            return tuple()

        return tuple(
            (i, page.text.strip())
            for i, page in enumerate(result, start=1)
            if hasattr(page, "text") and page.text
        )