import os
import html2text
from llama_parse import LlamaParse
import mammoth
from server.logger.logger_config import my_logger as logger

USE_LLAMA_PARSE = int(os.getenv('USE_LLAMA_PARSE'))
LLAMA_CLOUD_API_KEY = os.getenv('LLAMA_CLOUD_API_KEY')


class AsyncDocxLoader:
    def __init__(self, file_path: str) -> None:
        logger.info(f"[FILE LOADER] init docx, file_path: '{file_path}'")
        self.file_path = file_path

    async def get_content(self) -> str:
        try:
            content = ''

            if USE_LLAMA_PARSE:
                parser = LlamaParse(
                    api_key=LLAMA_CLOUD_API_KEY,
                    result_type="markdown",
                )

                text_vec = []

                import nest_asyncio
                nest_asyncio.apply()

                documents = parser.load_data(self.file_path)
                for doc in documents:
                    text_vec.append(doc.text)
                content = "\n\n".join(text_vec)
            else:
                html_text = ''
                with open(self.file_path, 'rb') as fd:
                    result = mammoth.convert_to_html(fd)
                    html_text = result.value
                    messages = result.messages
                    if messages:
                        logger.warning(
                            f"Read file_path: '{self.file_path}', messages: {messages}"
                        )

                    if html_text:
                        # Create an html2text converter
                        h = html2text.HTML2Text()
                        h.ignore_images = True
                        content = h.handle(html_text)
                    else:
                        logger.warning(
                            f"file_path: '{self.file_path}', convert_to_html is empty!"
                        )

            if not content:
                logger.warning(f"file_path: '{self.file_path}' is empty!")
            return content
        except Exception as e:
            logger.error(f"get_content is failed, exception: {e}")
            return ''