File size: 7,902 Bytes
0a372e8
 
ff7c628
0a372e8
ff7c628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cdf68de
 
ff7c628
 
 
 
 
 
268baab
ff7c628
 
 
0a372e8
ff7c628
 
 
0a372e8
ff7c628
 
 
0a372e8
ff7c628
 
 
0a372e8
ff7c628
 
 
 
 
b482b16
ff7c628
 
 
 
 
 
 
 
 
 
 
593a090
 
ff7c628
 
 
 
b482b16
ff7c628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a372e8
ff7c628
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
"""
Configuration settings for the Executive Education RAG Chatbot.
PLEASE CONSIDER READING THE 'docs/configuration_system_documentation.md' TO PROPERLY USE THE NEW CONFIGURATION SYSTEM.
"""
# ========================================= General Configuration ===========================================

# A list of ISO 639 language codes. Defines a list of languages in which 
# the application can operate. Defaults to ['en', 'de'].
AVAILABLE_LANGUAGES = ['en', 'de']

# A string representing a path (relative to the project root or absolute) to the directory
# where the data output files such as scraping or document processing outputs will be stored.
DATA_PATH = 'data'

# A string representing a path (relative to the project root or absolute) to the directory
# where the loging files will be stored.
LOGS_PATH = 'logs'

# =================================== Conversation State Configuration ======================================

# A boolean; either True or False. Enables the collection of user preferences 
# during conversation to avoid repetetive questions. Defaults to True. 
TRACK_USER_PROFILE = True 

# An integer. Defines the amount of user messages after which the language 
# of the conversation will be locked. If set to 0, the language will not be locked.
LOCK_LANGUAGE_AFTER_N_MESSAGES = 3

# An integer. Sets the maximum amount of conversation turns as the sum of user queries
# and agent responses. The conversation ends after the maximum turns amount is reached.
MAX_CONVERSATION_TURNS = 20

# ============================================ LLM Configuration ============================================

# A string, either 'openai', 'groq', 'open_router' or 'ollama' (local).
# Defines the main model provider for the application.
LLM_PROVIDER = 'openai' 

# A string. Defines the model that will be used by the application agents. 
OPENAI_MODEL = 'gpt-5.1'
# GROQ_MODEL = 
# OLLAMA_MODEL = 
# OPEN_ROUTER_MODEL = 

# ==================================== Weaviate Database Configuration ======================================

# A boolean; either True or False. 
# Defines whether the database is set as a local instance (via Docker container), 
# or as a cloud service. More information on https://docs.weaviate.io/weaviate.
WEAVIATE_IS_LOCAL = False

# A string. Defines the name of the colletions stored in the database.
# For each available language a new collection will be created
# with set name <WEAVIATE_COLLECTION_BASENAME>_<LANGUAGE>.
WEAVIATE_COLLECTION_BASENAME = 'hsg_rag_content'

# A string; either 'manual', 'filesystem' (local instance), 's3' (AWS).
# Defines the service for storing the database backups.
# More information on https://docs.weaviate.io/deploy/configuration/backups.
WEAVIATE_BACKUP_METHOD = 'manual'

# A string representing a path in the system where backups will be stored 
# only if WEAVIATE_BACKUP_METHOD is set to 'manual'.
BACKUPS_PATH = 'data/database/backups'

# A string representing a system path where collection properties will be stored.
PROPERTIES_PATH = 'data/database'

# A string representing a system path where property strategies will be stored.
# More information on property strategies in the documentation.
STRATEGIES_PATH = 'data/database/strategies'

# An integer. Defines a connection timeout to the cloud weaviate service (in seconds). 
# Defaults to 90.
WEAVIATE_INIT_TIMEOUT = 90

# An integer. Defines the query response time limit upon querying the database (in seconds). 
# Defaults to 60.
WEAVIATE_QUERY_TIMEOUT = 60

# An integer. Defines the chunk insertion time limit when importing new chunks to database (in seconds).
# Defaults to 600
WEAVIATE_INSERT_TIMEOUT = 600

# ========================================== Cache Configuration ============================================

# A string; either 'local', 'cloud' (Redis) or 'dict'. Defaults to 'cloud'.
# Sets the default cache mode. More information on cache modes in documentation.
CACHE_MODE = 'cloud'

# An integer. Sets the reset time (time to live) in seconds for the cache storage.
# The cache storage will be cleared upon reset time exceedance.
# Defaults to 86400 seconds (24 hours).
CACHE_TTL = 86400 

# An integer. Maximum amount of cached messages that will be held in the cache storage.
# Defaults to 1000.
CACHE_MAX_SIZE = 1000 

# A string. Defines the IP adress to access the local cache storage. Defaults to 'localhost'.
CACHE_LOCAL_HOST = 'localhost'

# An integer. Defines the port for accessing the local cache storage. Defaults to 6379.
CACHE_LOCAL_PORT = 6379 

# ===================================== Data Processing Configuration =======================================

# A string representing the name of an embeding model for embedding generation.
# The parameter MAX_TOKENS must match this model's maximum token amount.
EMBEDDING_MODEL = 'sentence-transformers/multi-qa-mpnet-base-dot-v1'

# A float in range from 0 to 1. Sets the threshold for english language in the language detector.
# If the language detection certanty is lower than the threshold, the English language will be returned.
LANG_AMBIGUITY_THRESHOLD = 0.6

# An integer. Defines the maximum amount of tokens pro single chunk.
MAX_TOKENS = 512

# An integer. Defines the amount of overlapping tokens between chunks to keep the context. 
CHUNK_OVERLAP = 100


# An integer representing seconds. Defines the maximum waiting time for the target server 
# responses during the scraping procedures.
SCRAPING_TIMEOUT = 30 

# An integer. Defines the maximum amount of additional tries that will be performed 
# if the initial request to the server failed.
SCRAPING_MAX_RETRIES = 3

# An integer representing seconds. Defines the waiting interval between two server calls.
# This value might be overwritten by the delay set by the server.
SCRAPING_CRAWL_DELAY = 1

# An integer. Defines the backoff base value for retries with exponential backoff.
# The higher is the number, the longer is the waiting interval between subsequent retries going to be.
SCRAPING_BACKOFF_RATE = 1.25

# A list of string URLs. Defines the starting points for the website scraping.
SCRAPING_TARGET_URLS = [
    # 'https://emba.unisg.ch/',       # EMBA HSG root
    'https://embax.ch/',            # emba X root
]

# Scraping Priority Interval in days
SCRAPING_PRIO_INTERVAL = {
    "high": 1,
    "medium": 7,
    "low": 30
}

# ======================================== Agent Chain Configuration ========================================

# A boolean; either True or False. Activates the response quality evaluation procedure
# for agentic responses. Defaults to True.
ENABLE_EVALUATE_RESPONSE_QUALITY = True

# A float in range from 0 to 1. Sets the treshold value for the quality evaluation.
# The fallback mechanism will be activated if the quality of the agentic response 
# is lower than the confidence threshold.
CONFIDENCE_THRESHOLD = 0.6

# An integer. Defines the amount of chunks that should be retrieved from the database 
# upon querying by subagents during conversation. Defaults to 4.
TOP_K_RETRIEVAL = 4  

# An integer. Sets the amount of model invocation retries after which the fallback model 
# will be invoked. Defaults to 3.
MODEL_MAX_RETRIES = 3

# An integer. Sets the maximum amount of words in the response from the lead agent.
MAX_RESPONSE_WORDS_LEAD = 100 

# An integer. Sets the maximum amount of words in the response for subagents.
MAX_RESPONSE_WORDS_SUBAGENT = 200

# A boolean; either True or False. If response chunking is enabled, long responses 
# from the lead agent will be split and retuned through multiple conversation turns.
ENABLE_RESPONSE_CHUNKING = True

# ========================================== Notification Configuration =====================================

NOTIFY_ENABLE_EMAIL_ALERTS= True
NOTIFY_ENABLE_SLACK_ALERTS = True

# ===========================================================================================================