# Local Hugging Face Model Settings EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" # Fast embedding model CHAT_MODEL = "google/flan-t5-base" # Better for summarization and QA tasks # Alternative chat models you can use (just change CHAT_MODEL): # "google/flan-t5-small" (faster, smaller - 250MB) # "google/flan-t5-base" (good balance - 990MB) - RECOMMENDED # "google/flan-t5-large" (better quality, slower - 3GB) # "facebook/bart-large-cnn" (excellent for summarization but larger) # "t5-small" (good for summarization, 240MB) # Model Settings MODEL_MAX_LENGTH = 1000 # Maximum tokens for generation TEMPERATURE = 0.7 # Creativity (0.0 = deterministic, 1.0 = very creative) USE_CUDA = True # Set to False if you don't have GPU DEVICE = "cpu" # "auto", "cuda", "cpu" MODEL_CACHE_DIR = "./models" # Local directory to cache downloaded models # Document Processing Settings CHUNK_SIZE = 1000 CHUNK_OVERLAP = 200 # Vector Store Settings SIMILARITY_THRESHOLD = 0.1 MAX_SEARCH_RESULTS = 5 # Web Search Settings WEB_SEARCH_RESULTS = 5 WEB_SEARCH_TIMEOUT = 10 # Query Routing Settings WEB_SEARCH_CONFIDENCE_THRESHOLD = 0.6 DOCUMENT_SEARCH_CONFIDENCE_THRESHOLD = 0.7 HYBRID_THRESHOLD = 0.3 # Fallback Settings (if local OpenAI models are not available) USE_SENTENCE_TRANSFORMERS_FALLBACK = True FALLBACK_EMBEDDING_MODEL = "all-MiniLM-L6-v2" # Sentence Transformers model # UI Settings PAGE_TITLE = "Universal Document Intelligence Chatbot" LAYOUT = "wide" # File Settings SUPPORTED_FILE_TYPES = ['pdf'] MAX_FILE_SIZE_MB = 50 # Response Settings MAX_RESPONSE_LENGTH = 2000 MAX_SOURCES_DISPLAYED = 3