Multi-Rag / src /entity /config_entity.py
VashuTheGreat2's picture
Upload folder using huggingface_hub
9c90775 verified
Raw
History Blame Contribute Delete
1.54 kB
from dataclasses import dataclass, field
import uuid
from src.constants import *
import os
from typing import List
import time
BASE_FOLDER_NAME=f"artifacts/{time.time()}"
@dataclass
class DataIngestionConfig:
input_file_path: str
save_file_path: str = field(default=None)
def __post_init__(self):
if self.save_file_path is None:
# Generate a random UUID for the ingested file name
random_name = f"{uuid.uuid4()}.pdf"
self.save_file_path = os.path.join(BASE_FOLDER_NAME, INGESTION_FOLDER_NAME, random_name)
@dataclass
class ContentEmbedderConfig:
data_ingestion_configs:List[DataIngestionConfig]
@dataclass
class DataTransformationConfig:
vector_store_path: str = field(default=None)
def __post_init__(self):
if self.vector_store_path is None:
self.vector_store_path = os.path.join(BASE_FOLDER_NAME, "transformation", "vector_store")
@dataclass
class ContentTransformationConfig:
data_transformation_configs: List[DataTransformationConfig]
@dataclass
class RetreiverConfig:
vector_store_path: str = field(default=None)
k: int = 5
ensemble_weights: List[float] = field(default_factory=lambda: [0.7, 0.3])
partition_strategy: str = "hi_res"
max_characters: int = 3000
new_after_n_chars: int = 2400
combine_text_under_n_chars: int = 50
def __post_init__(self):
if self.vector_store_path is None:
self.vector_store_path = os.path.join(BASE_FOLDER_NAME, "transformation", "vector_store")