|
|
|
|
|
import os |
|
|
from pathlib import Path |
|
|
|
|
|
class Config: |
|
|
"""Centralized configuration for the PDF Analysis Orchestrator""" |
|
|
|
|
|
|
|
|
OPENAI_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4") |
|
|
OPENAI_TEMPERATURE = float(os.environ.get("OPENAI_TEMPERATURE", "0.2")) |
|
|
OPENAI_MAX_TOKENS = int(os.environ.get("OPENAI_MAX_TOKENS", "1000")) |
|
|
|
|
|
|
|
|
CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "15000")) |
|
|
CHUNK_OVERLAP = int(os.environ.get("CHUNK_OVERLAP", "1000")) |
|
|
MAX_FILE_SIZE_MB = int(os.environ.get("ANALYSIS_MAX_UPLOAD_MB", "50")) |
|
|
|
|
|
|
|
|
CACHE_ENABLED = os.environ.get("CACHE_ENABLED", "true").lower() == "true" |
|
|
CACHE_TTL_HOURS = int(os.environ.get("CACHE_TTL_HOURS", "24")) |
|
|
|
|
|
|
|
|
SESSION_DIR = os.environ.get("ANALYSIS_SESSION_DIR", "/tmp/analysis_sessions") |
|
|
|
|
|
|
|
|
SERVER_NAME = os.environ.get("SERVER_NAME", "0.0.0.0") |
|
|
SERVER_PORT = int(os.environ.get("PORT", "7860")) |
|
|
|
|
|
|
|
|
EXPORT_DIR = os.environ.get("EXPORT_DIR", "/tmp/analysis_exports") |
|
|
SUPPORTED_EXPORT_FORMATS = ["txt", "json", "pdf"] |
|
|
|
|
|
|
|
|
PROMPTS_DIR = os.environ.get("PROMPTS_DIR", "/tmp/analysis_prompts") |
|
|
|
|
|
@classmethod |
|
|
def ensure_directories(cls): |
|
|
"""Ensure all required directories exist""" |
|
|
directories = [ |
|
|
cls.SESSION_DIR, |
|
|
cls.EXPORT_DIR, |
|
|
cls.PROMPTS_DIR |
|
|
] |
|
|
for directory in directories: |
|
|
Path(directory).mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
@classmethod |
|
|
def get_chunk_size_for_text(cls, text_length: int) -> int: |
|
|
"""Determine appropriate chunk size based on text length""" |
|
|
if text_length <= cls.CHUNK_SIZE: |
|
|
return text_length |
|
|
return cls.CHUNK_SIZE |
|
|
|