datasciencesage's picture
Upload 6 files
65fc30a verified
import os
from pathlib import Path
from loguru import logger
from dotenv import load_dotenv
load_dotenv()
class Config:
"""app configuration"""
# API stuff
API_TITLE = "Document Intelligence: Bureau & GST Data Extraction"
API_VERSION = "1.0.0"
API_HOST = "127.0.0.1"
API_PORT = 8000
# OpenAI config
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
# model settings
OPENAI_MODEL = "gpt-4o-mini" # for RAG
OPENAI_VISION_MODEL = "gpt-4o" # for vision extraction
OPENAI_EMBEDDING_MODEL = "text-embedding-3-large"
OPENAI_TEMPERATURE = 0.0 # deterministic extraction
# vision settings
USE_VISION = True # enable GPT-4 Vision
VISION_DPI = 200 # image quality for PDF conversion
VISION_PRIORITY = True # use vision as primary method
# RAG settings
CHUNK_SIZE = 500
CHUNK_OVERLAP = 50
TOP_K_CHUNKS = 5
SIMILARITY_THRESHOLD = 0.3
# directories
BASE_DIR = Path(__file__).parent
STORAGE_DIR = BASE_DIR / "storage"
UPLOADS_DIR = STORAGE_DIR / "uploads"
LOGS_DIR = BASE_DIR / "logs"
# default file locations
DEFAULT_BUREAU_DIR = STORAGE_DIR / "bureau_reports"
DEFAULT_GST_DIR = STORAGE_DIR / "gst_returns"
DEFAULT_PARAMETERS_DIR = STORAGE_DIR / "parameters"
DEFAULT_PARAMETERS_EXCEL = DEFAULT_PARAMETERS_DIR / "Bureau-parameters-Report.xlsx"
@classmethod
def ensure_directories(cls):
"""create necessary directories"""
directories = [
cls.STORAGE_DIR,
cls.UPLOADS_DIR,
cls.LOGS_DIR,
cls.DEFAULT_BUREAU_DIR,
cls.DEFAULT_GST_DIR,
cls.DEFAULT_PARAMETERS_DIR
]
for directory in directories:
directory.mkdir(parents=True, exist_ok=True)
@classmethod
def validate_configuration(cls):
"""validate config and check for issues"""
issues = []
# check API key
if not cls.OPENAI_API_KEY:
issues.append("OPENAI_API_KEY not set")
# make sure directories exist
cls.ensure_directories()
# check vision settings
if cls.USE_VISION and not cls.OPENAI_VISION_MODEL:
issues.append("USE_VISION=True but OPENAI_VISION_MODEL not set")
# log any issues
for issue in issues:
logger.error(f"Config issue: {issue}")
if len(issues) == 0:
return True
return False
@classmethod
def get_default_parameters_excel(cls):
"""get default parameters Excel file path"""
if cls.DEFAULT_PARAMETERS_EXCEL.exists():
return cls.DEFAULT_PARAMETERS_EXCEL
return None