Spaces:
No application file
No application file
Create load_config.py
Browse files- src/utils/load_config.py +72 -0
src/utils/load_config.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from openai import OpenAI
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
import yaml
|
| 5 |
+
from langchain_openai import OpenAIEmbeddings
|
| 6 |
+
from pyprojroot import here
|
| 7 |
+
import shutil
|
| 8 |
+
|
| 9 |
+
load_dotenv()
|
| 10 |
+
|
| 11 |
+
class LoadConfig:
|
| 12 |
+
|
| 13 |
+
def __init__(self) -> None:
|
| 14 |
+
with open(here("configs/app_config.yml")) as cfg:
|
| 15 |
+
app_config = yaml.load(cfg, Loader=yaml.FullLoader)
|
| 16 |
+
|
| 17 |
+
# LLM configs
|
| 18 |
+
self.llm_engine = app_config["llm_config"]["engine"]
|
| 19 |
+
self.llm_system_role = app_config["llm_config"]["llm_system_role"]
|
| 20 |
+
self.persist_directory = str(here(
|
| 21 |
+
app_config["directories"]["persist_directory"]))
|
| 22 |
+
self.custom_persist_directory = str(here(
|
| 23 |
+
app_config["directories"]["custom_persist_directory"]))
|
| 24 |
+
# self.embedding_model = app_config["embedding_model_config"]["engine"]
|
| 25 |
+
self.embedding_model = OpenAIEmbeddings()
|
| 26 |
+
|
| 27 |
+
# Retrieval configs
|
| 28 |
+
self.data_directory = app_config["directories"]["data_directory"]
|
| 29 |
+
self.k = app_config["retrieval_config"]["k"]
|
| 30 |
+
self.embedding_model_engine = app_config["embedding_model_config"]["engine"]
|
| 31 |
+
self.chunk_size = app_config["splitter_config"]["chunk_size"]
|
| 32 |
+
self.chunk_overlap = app_config["splitter_config"]["chunk_overlap"]
|
| 33 |
+
|
| 34 |
+
# Summarizer config
|
| 35 |
+
self.max_final_token = app_config["summarizer_config"]["max_final_token"]
|
| 36 |
+
self.token_threshold = app_config["summarizer_config"]["token_threshold"]
|
| 37 |
+
self.summarizer_llm_system_role = app_config["summarizer_config"]["summarizer_llm_system_role"]
|
| 38 |
+
self.character_overlap = app_config["summarizer_config"]["character_overlap"]
|
| 39 |
+
self.final_summarizer_llm_system_role = app_config[
|
| 40 |
+
"summarizer_config"]["final_summarizer_llm_system_role"]
|
| 41 |
+
self.temperature = app_config["llm_config"]["temperature"]
|
| 42 |
+
|
| 43 |
+
# Memory
|
| 44 |
+
self.number_of_q_a_pairs = app_config["memory"]["number_of_q_a_pairs"]
|
| 45 |
+
|
| 46 |
+
# Load OpenAI credentials
|
| 47 |
+
self.load_openai_cfg()
|
| 48 |
+
|
| 49 |
+
# clean up the upload doc vectordb if it exists
|
| 50 |
+
self.create_directory(self.persist_directory)
|
| 51 |
+
self.remove_directory(self.custom_persist_directory)
|
| 52 |
+
|
| 53 |
+
def load_openai_cfg(self):
|
| 54 |
+
# openai.api_type = os.getenv("OPENAI_API_TYPE")
|
| 55 |
+
# openai.api_base = os.getenv("OPENAI_API_BASE")
|
| 56 |
+
# openai.api_version = os.getenv("OPENAI_API_VERSION")
|
| 57 |
+
OpenAI.api_key = os.getenv("OPENAI_API_KEY")
|
| 58 |
+
|
| 59 |
+
def create_directory(self, directory_path: str):
|
| 60 |
+
if not os.path.exists(directory_path):
|
| 61 |
+
os.makedirs(directory_path)
|
| 62 |
+
|
| 63 |
+
def remove_directory(self, directory_path: str):
|
| 64 |
+
if os.path.exists(directory_path):
|
| 65 |
+
try:
|
| 66 |
+
shutil.rmtree(directory_path)
|
| 67 |
+
print(
|
| 68 |
+
f"The directory '{directory_path}' has been successfully removed.")
|
| 69 |
+
except OSError as e:
|
| 70 |
+
print(f"Error: {e}")
|
| 71 |
+
else:
|
| 72 |
+
print(f"The directory '{directory_path}' does not exist.")
|