Spaces:
Running
Running
File size: 7,902 Bytes
0a372e8 ff7c628 0a372e8 ff7c628 cdf68de ff7c628 268baab ff7c628 0a372e8 ff7c628 0a372e8 ff7c628 0a372e8 ff7c628 0a372e8 ff7c628 b482b16 ff7c628 593a090 ff7c628 b482b16 ff7c628 0a372e8 ff7c628 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | """
Configuration settings for the Executive Education RAG Chatbot.
PLEASE CONSIDER READING THE 'docs/configuration_system_documentation.md' TO PROPERLY USE THE NEW CONFIGURATION SYSTEM.
"""
# ========================================= General Configuration ===========================================
# A list of ISO 639 language codes. Defines a list of languages in which
# the application can operate. Defaults to ['en', 'de'].
AVAILABLE_LANGUAGES = ['en', 'de']
# A string representing a path (relative to the project root or absolute) to the directory
# where the data output files such as scraping or document processing outputs will be stored.
DATA_PATH = 'data'
# A string representing a path (relative to the project root or absolute) to the directory
# where the loging files will be stored.
LOGS_PATH = 'logs'
# =================================== Conversation State Configuration ======================================
# A boolean; either True or False. Enables the collection of user preferences
# during conversation to avoid repetetive questions. Defaults to True.
TRACK_USER_PROFILE = True
# An integer. Defines the amount of user messages after which the language
# of the conversation will be locked. If set to 0, the language will not be locked.
LOCK_LANGUAGE_AFTER_N_MESSAGES = 3
# An integer. Sets the maximum amount of conversation turns as the sum of user queries
# and agent responses. The conversation ends after the maximum turns amount is reached.
MAX_CONVERSATION_TURNS = 20
# ============================================ LLM Configuration ============================================
# A string, either 'openai', 'groq', 'open_router' or 'ollama' (local).
# Defines the main model provider for the application.
LLM_PROVIDER = 'openai'
# A string. Defines the model that will be used by the application agents.
OPENAI_MODEL = 'gpt-5.1'
# GROQ_MODEL =
# OLLAMA_MODEL =
# OPEN_ROUTER_MODEL =
# ==================================== Weaviate Database Configuration ======================================
# A boolean; either True or False.
# Defines whether the database is set as a local instance (via Docker container),
# or as a cloud service. More information on https://docs.weaviate.io/weaviate.
WEAVIATE_IS_LOCAL = False
# A string. Defines the name of the colletions stored in the database.
# For each available language a new collection will be created
# with set name <WEAVIATE_COLLECTION_BASENAME>_<LANGUAGE>.
WEAVIATE_COLLECTION_BASENAME = 'hsg_rag_content'
# A string; either 'manual', 'filesystem' (local instance), 's3' (AWS).
# Defines the service for storing the database backups.
# More information on https://docs.weaviate.io/deploy/configuration/backups.
WEAVIATE_BACKUP_METHOD = 'manual'
# A string representing a path in the system where backups will be stored
# only if WEAVIATE_BACKUP_METHOD is set to 'manual'.
BACKUPS_PATH = 'data/database/backups'
# A string representing a system path where collection properties will be stored.
PROPERTIES_PATH = 'data/database'
# A string representing a system path where property strategies will be stored.
# More information on property strategies in the documentation.
STRATEGIES_PATH = 'data/database/strategies'
# An integer. Defines a connection timeout to the cloud weaviate service (in seconds).
# Defaults to 90.
WEAVIATE_INIT_TIMEOUT = 90
# An integer. Defines the query response time limit upon querying the database (in seconds).
# Defaults to 60.
WEAVIATE_QUERY_TIMEOUT = 60
# An integer. Defines the chunk insertion time limit when importing new chunks to database (in seconds).
# Defaults to 600
WEAVIATE_INSERT_TIMEOUT = 600
# ========================================== Cache Configuration ============================================
# A string; either 'local', 'cloud' (Redis) or 'dict'. Defaults to 'cloud'.
# Sets the default cache mode. More information on cache modes in documentation.
CACHE_MODE = 'cloud'
# An integer. Sets the reset time (time to live) in seconds for the cache storage.
# The cache storage will be cleared upon reset time exceedance.
# Defaults to 86400 seconds (24 hours).
CACHE_TTL = 86400
# An integer. Maximum amount of cached messages that will be held in the cache storage.
# Defaults to 1000.
CACHE_MAX_SIZE = 1000
# A string. Defines the IP adress to access the local cache storage. Defaults to 'localhost'.
CACHE_LOCAL_HOST = 'localhost'
# An integer. Defines the port for accessing the local cache storage. Defaults to 6379.
CACHE_LOCAL_PORT = 6379
# ===================================== Data Processing Configuration =======================================
# A string representing the name of an embeding model for embedding generation.
# The parameter MAX_TOKENS must match this model's maximum token amount.
EMBEDDING_MODEL = 'sentence-transformers/multi-qa-mpnet-base-dot-v1'
# A float in range from 0 to 1. Sets the threshold for english language in the language detector.
# If the language detection certanty is lower than the threshold, the English language will be returned.
LANG_AMBIGUITY_THRESHOLD = 0.6
# An integer. Defines the maximum amount of tokens pro single chunk.
MAX_TOKENS = 512
# An integer. Defines the amount of overlapping tokens between chunks to keep the context.
CHUNK_OVERLAP = 100
# An integer representing seconds. Defines the maximum waiting time for the target server
# responses during the scraping procedures.
SCRAPING_TIMEOUT = 30
# An integer. Defines the maximum amount of additional tries that will be performed
# if the initial request to the server failed.
SCRAPING_MAX_RETRIES = 3
# An integer representing seconds. Defines the waiting interval between two server calls.
# This value might be overwritten by the delay set by the server.
SCRAPING_CRAWL_DELAY = 1
# An integer. Defines the backoff base value for retries with exponential backoff.
# The higher is the number, the longer is the waiting interval between subsequent retries going to be.
SCRAPING_BACKOFF_RATE = 1.25
# A list of string URLs. Defines the starting points for the website scraping.
SCRAPING_TARGET_URLS = [
# 'https://emba.unisg.ch/', # EMBA HSG root
'https://embax.ch/', # emba X root
]
# Scraping Priority Interval in days
SCRAPING_PRIO_INTERVAL = {
"high": 1,
"medium": 7,
"low": 30
}
# ======================================== Agent Chain Configuration ========================================
# A boolean; either True or False. Activates the response quality evaluation procedure
# for agentic responses. Defaults to True.
ENABLE_EVALUATE_RESPONSE_QUALITY = True
# A float in range from 0 to 1. Sets the treshold value for the quality evaluation.
# The fallback mechanism will be activated if the quality of the agentic response
# is lower than the confidence threshold.
CONFIDENCE_THRESHOLD = 0.6
# An integer. Defines the amount of chunks that should be retrieved from the database
# upon querying by subagents during conversation. Defaults to 4.
TOP_K_RETRIEVAL = 4
# An integer. Sets the amount of model invocation retries after which the fallback model
# will be invoked. Defaults to 3.
MODEL_MAX_RETRIES = 3
# An integer. Sets the maximum amount of words in the response from the lead agent.
MAX_RESPONSE_WORDS_LEAD = 100
# An integer. Sets the maximum amount of words in the response for subagents.
MAX_RESPONSE_WORDS_SUBAGENT = 200
# A boolean; either True or False. If response chunking is enabled, long responses
# from the lead agent will be split and retuned through multiple conversation turns.
ENABLE_RESPONSE_CHUNKING = True
# ========================================== Notification Configuration =====================================
NOTIFY_ENABLE_EMAIL_ALERTS= True
NOTIFY_ENABLE_SLACK_ALERTS = True
# ===========================================================================================================
|