File size: 1,914 Bytes
2c5e855
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# config.py - Configuration management for PDF Analysis & Orchestrator
import os
from pathlib import Path

class Config:
    """Centralized configuration for the PDF Analysis Orchestrator"""
    
    # OpenAI Configuration
    OPENAI_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4")
    OPENAI_TEMPERATURE = float(os.environ.get("OPENAI_TEMPERATURE", "0.2"))
    OPENAI_MAX_TOKENS = int(os.environ.get("OPENAI_MAX_TOKENS", "1000"))
    
    # Document Processing
    CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "15000"))
    CHUNK_OVERLAP = int(os.environ.get("CHUNK_OVERLAP", "1000"))
    MAX_FILE_SIZE_MB = int(os.environ.get("ANALYSIS_MAX_UPLOAD_MB", "50"))
    
    # Caching
    CACHE_ENABLED = os.environ.get("CACHE_ENABLED", "true").lower() == "true"
    CACHE_TTL_HOURS = int(os.environ.get("CACHE_TTL_HOURS", "24"))
    
    # Session Management
    SESSION_DIR = os.environ.get("ANALYSIS_SESSION_DIR", "/tmp/analysis_sessions")
    
    # UI Configuration
    SERVER_NAME = os.environ.get("SERVER_NAME", "0.0.0.0")
    SERVER_PORT = int(os.environ.get("PORT", "7860"))
    
    # Export Settings
    EXPORT_DIR = os.environ.get("EXPORT_DIR", "/tmp/analysis_exports")
    SUPPORTED_EXPORT_FORMATS = ["txt", "json", "pdf"]
    
    # Custom Prompts
    PROMPTS_DIR = os.environ.get("PROMPTS_DIR", "/tmp/analysis_prompts")
    
    @classmethod
    def ensure_directories(cls):
        """Ensure all required directories exist"""
        directories = [
            cls.SESSION_DIR,
            cls.EXPORT_DIR,
            cls.PROMPTS_DIR
        ]
        for directory in directories:
            Path(directory).mkdir(parents=True, exist_ok=True)
    
    @classmethod
    def get_chunk_size_for_text(cls, text_length: int) -> int:
        """Determine appropriate chunk size based on text length"""
        if text_length <= cls.CHUNK_SIZE:
            return text_length
        return cls.CHUNK_SIZE