Spaces:
Sleeping
Sleeping
| import openai | |
| import os | |
| from dotenv import load_dotenv | |
| import yaml | |
| from langchain_openai import OpenAIEmbeddings | |
| from pyprojroot import here | |
| import shutil | |
| load_dotenv() | |
| class LoadConfig: | |
| """ | |
| A class for loading configuration settings and managing directories. | |
| This class loads various configuration settings from the 'app_config.yml' file, | |
| including language model (LLM) configurations, retrieval configurations, summarizer | |
| configurations, and memory configurations. It also sets up OpenAI API credentials | |
| and performs directory-related operations such as creating and removing directories. | |
| ... | |
| Attributes: | |
| llm_engine : str | |
| The language model engine specified in the configuration. | |
| llm_system_role : str | |
| The role of the language model system specified in the configuration. | |
| persist_directory : str | |
| The path to the persist directory where data is stored. | |
| custom_persist_directory : str | |
| The path to the custom persist directory. | |
| embedding_model : OpenAIEmbeddings | |
| An instance of the OpenAIEmbeddings class for language model embeddings. | |
| data_directory : str | |
| The path to the data directory. | |
| k : int | |
| The value of 'k' specified in the retrieval configuration. | |
| embedding_model_engine : str | |
| The engine specified in the embedding model configuration. | |
| chunk_size : int | |
| The chunk size specified in the splitter configuration. | |
| chunk_overlap : int | |
| The chunk overlap specified in the splitter configuration. | |
| max_final_token : int | |
| The maximum number of final tokens specified in the summarizer configuration. | |
| token_threshold : float | |
| The token threshold specified in the summarizer configuration. | |
| summarizer_llm_system_role : str | |
| The role of the summarizer language model system specified in the configuration. | |
| temperature : float | |
| The temperature specified in the LLM configuration. | |
| number_of_q_a_pairs : int | |
| The number of question-answer pairs specified in the memory configuration. | |
| Methods: | |
| load_openai_cfg(): | |
| Load OpenAI configuration settings. | |
| create_directory(directory_path): | |
| Create a directory if it does not exist. | |
| remove_directory(directory_path): | |
| Removes the specified directory. | |
| """ | |
| def __init__(self) -> None: | |
| with open(here("configs/app_config.yml")) as cfg: | |
| app_config = yaml.load(cfg, Loader=yaml.FullLoader) | |
| # LLM configs | |
| self.llm_engine = app_config["llm_config"]["engine"] | |
| self.llm_system_role = app_config["llm_config"]["llm_system_role"] | |
| self.persist_directory = str(here( | |
| app_config["directories"]["persist_directory"])) # needs to be strin for summation in chromadb backend: self._settings.require("persist_directory") + "/chroma.sqlite3" | |
| self.custom_persist_directory = str(here( | |
| app_config["directories"]["custom_persist_directory"])) | |
| self.embedding_model = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY")) | |
| # Retrieval configs | |
| self.data_directory = app_config["directories"]["data_directory"] | |
| self.k = app_config["retrieval_config"]["k"] | |
| self.embedding_model_engine = app_config["embedding_model_config"]["engine"] | |
| self.chunk_size = app_config["splitter_config"]["chunk_size"] | |
| self.chunk_overlap = app_config["splitter_config"]["chunk_overlap"] | |
| # Summarizer config | |
| self.max_final_token = app_config["summarizer_config"]["max_final_token"] | |
| self.token_threshold = app_config["summarizer_config"]["token_threshold"] | |
| self.summarizer_llm_system_role = app_config["summarizer_config"]["summarizer_llm_system_role"] | |
| self.character_overlap = app_config["summarizer_config"]["character_overlap"] | |
| self.final_summarizer_llm_system_role = app_config[ | |
| "summarizer_config"]["final_summarizer_llm_system_role"] | |
| self.temperature = app_config["llm_config"]["temperature"] | |
| # Memory | |
| self.number_of_q_a_pairs = app_config["memory"]["number_of_q_a_pairs"] | |
| # Load OpenAI credentials | |
| self.load_openai_cfg() | |
| # clean up the upload doc vectordb if it exists | |
| self.create_directory(self.persist_directory) | |
| self.remove_directory(self.custom_persist_directory) | |
| def load_openai_cfg(self): | |
| """ | |
| Load OpenAI configuration settings. | |
| This function sets the OpenAI API configuration settings, including the API type, base URL, | |
| version, and API key. It is intended to be called at the beginning of the script or application | |
| to configure OpenAI settings. | |
| Note: | |
| Replace "Your API TYPE," "Your API BASE," "Your API VERSION," and "Your API KEY" with your actual | |
| OpenAI API credentials. | |
| """ | |
| openai.api_type = os.getenv("OPENAI_API_TYPE") | |
| openai.api_base = os.getenv("OPENAI_API_BASE") | |
| openai.api_version = os.getenv("OPENAI_API_VERSION") | |
| openai.api_key = os.getenv("OPENAI_API_KEY") | |
| def create_directory(self, directory_path: str): | |
| """ | |
| Create a directory if it does not exist. | |
| Parameters: | |
| directory_path (str): The path of the directory to be created. | |
| """ | |
| if not os.path.exists(directory_path): | |
| os.makedirs(directory_path) | |
| def remove_directory(self, directory_path: str): | |
| """ | |
| Removes the specified directory. | |
| Parameters: | |
| directory_path (str): The path of the directory to be removed. | |
| Raises: | |
| OSError: If an error occurs during the directory removal process. | |
| Returns: | |
| None | |
| """ | |
| if os.path.exists(directory_path): | |
| try: | |
| shutil.rmtree(directory_path) | |
| print( | |
| f"The directory '{directory_path}' has been successfully removed.") | |
| except OSError as e: | |
| print(f"Error: {e}") | |
| else: | |
| print(f"The directory '{directory_path}' does not exist.") | |