Spaces:

sifars
/

mandala-for-us

Runtime error

mandala-for-us / src /utils /_document_parser.py

add documentation comments

adb221d over 1 year ago

1.74 kB

	import asyncio
	import io
	import os
	import time
	from pathlib import Path
	from typing import Dict, Tuple

	import nest_asyncio
	from fastapi import UploadFile
	from llama_parse import LlamaParse

	LLAMAPARSE_API_KEY = os.getenv("LLAMAPARSE_API_KEY")
	parser = LlamaParse(
	api_key=LLAMAPARSE_API_KEY,
	result_type="markdown",
	num_workers=4,
	verbose=True,
	language="en",
	)


	class DocumentParser:
	"""DocumentParser is an asynchronous context manager class that provides functionality to parse the content of a document file.

	Methods:
	__aenter__() -> DocumentParser:
	Enter the runtime context related to this object.

	__aexit__(exc_type, exc_val, exc_tb) -> None:
	Exit the runtime context related to this object.

	parse_file_content(file_path: str) -> Tuple[Tuple[int, str], ...]:
	Parse document content using a parser library.

	file_path (str): Path to the file to parse.

	"""

	async def __aenter__(self):
	return self

	async def __aexit__(self, exc_type, exc_val, exc_tb):
	pass

	async def parse_file_content(self, file_path: str) -> Tuple[Tuple[int, str], ...]:
	"""
	Parse document content using parser library.

	Args:
	file_path: Path to the file to parse

	Returns:
	Tuple of (page_number, content) pairs
	"""
	loop = asyncio.get_event_loop()
	result = await loop.run_in_executor(None, parser.load_data, file_path)

	if not result:
	return tuple()

	return tuple(
	(i, page.text.strip())
	for i, page in enumerate(result, start=1)
	if hasattr(page, "text") and page.text
	)