Spaces:
Sleeping
Sleeping
| from dataclasses import dataclass, field | |
| import uuid | |
| from src.constants import * | |
| import os | |
| from typing import List | |
| import time | |
| BASE_FOLDER_NAME=f"artifacts/{time.time()}" | |
| class DataIngestionConfig: | |
| input_file_path: str | |
| save_file_path: str = field(default=None) | |
| def __post_init__(self): | |
| if self.save_file_path is None: | |
| # Generate a random UUID for the ingested file name | |
| random_name = f"{uuid.uuid4()}.pdf" | |
| self.save_file_path = os.path.join(BASE_FOLDER_NAME, INGESTION_FOLDER_NAME, random_name) | |
| class ContentEmbedderConfig: | |
| data_ingestion_configs:List[DataIngestionConfig] | |
| class DataTransformationConfig: | |
| vector_store_path: str = field(default=None) | |
| def __post_init__(self): | |
| if self.vector_store_path is None: | |
| self.vector_store_path = os.path.join(BASE_FOLDER_NAME, "transformation", "vector_store") | |
| class ContentTransformationConfig: | |
| data_transformation_configs: List[DataTransformationConfig] | |
| class RetreiverConfig: | |
| vector_store_path: str = field(default=None) | |
| k: int = 5 | |
| ensemble_weights: List[float] = field(default_factory=lambda: [0.7, 0.3]) | |
| partition_strategy: str = "hi_res" | |
| max_characters: int = 3000 | |
| new_after_n_chars: int = 2400 | |
| combine_text_under_n_chars: int = 50 | |
| def __post_init__(self): | |
| if self.vector_store_path is None: | |
| self.vector_store_path = os.path.join(BASE_FOLDER_NAME, "transformation", "vector_store") |