WFRaain commited on
Commit
dafe4be
·
1 Parent(s): 09f99aa
Files changed (1) hide show
  1. flowsettings.py +160 -36
flowsettings.py CHANGED
@@ -4,39 +4,43 @@ from inspect import currentframe, getframeinfo
4
  from pathlib import Path
5
 
6
  from decouple import config
 
7
  from theflow.settings.default import * # noqa
8
 
9
  cur_frame = currentframe()
10
  if cur_frame is None:
11
  raise ValueError("Cannot get the current frame.")
12
  this_file = getframeinfo(cur_frame).filename
13
- this_dir = Path(this_file).parent
14
 
15
  # change this if your app use a different name
16
  KH_PACKAGE_NAME = "kotaemon_app"
17
 
18
- KH_APP_VERSION = config("KH_APP_VERSION", "local")
19
  if not KH_APP_VERSION:
20
  try:
21
  # Caution: This might produce the wrong version
22
  # https://stackoverflow.com/a/59533071
23
  KH_APP_VERSION = version(KH_PACKAGE_NAME)
24
- except Exception as e:
25
- print(f"Failed to get app version: {e}")
 
 
 
 
 
26
 
27
  # App can be ran from anywhere and it's not trivial to decide where to store app data.
28
  # So let's use the same directory as the flowsetting.py file.
29
- # KH_APP_DATA_DIR = this_dir / "ktem_app_data"
30
-
31
- # override app data dir to fit preview data
32
- KH_APP_DATA_DIR = Path("/home/ubuntu/lib-knowledgehub/kotaemon/ktem_app_data")
33
  KH_APP_DATA_DIR.mkdir(parents=True, exist_ok=True)
34
 
35
  # User data directory
36
  KH_USER_DATA_DIR = KH_APP_DATA_DIR / "user_data"
37
  KH_USER_DATA_DIR.mkdir(parents=True, exist_ok=True)
38
 
39
- # markdowm output directory
40
  KH_MARKDOWN_OUTPUT_DIR = KH_APP_DATA_DIR / "markdown_cache_dir"
41
  KH_MARKDOWN_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
42
 
@@ -62,7 +66,14 @@ os.environ["HF_HUB_CACHE"] = str(KH_APP_DATA_DIR / "huggingface")
62
  KH_DOC_DIR = this_dir / "docs"
63
 
64
  KH_MODE = "dev"
65
- KH_FEATURE_USER_MANAGEMENT = False
 
 
 
 
 
 
 
66
  KH_USER_CAN_SEE_PUBLIC = None
67
  KH_FEATURE_USER_MANAGEMENT_ADMIN = str(
68
  config("KH_FEATURE_USER_MANAGEMENT_ADMIN", default="admin")
@@ -71,8 +82,12 @@ KH_FEATURE_USER_MANAGEMENT_PASSWORD = str(
71
  config("KH_FEATURE_USER_MANAGEMENT_PASSWORD", default="admin")
72
  )
73
  KH_ENABLE_ALEMBIC = False
74
- KH_DATABASE = f"sqlite:///file:{KH_USER_DATA_DIR / 'sql.db?mode=ro&uri=true'}"
75
  KH_FILESTORAGE_PATH = str(KH_USER_DATA_DIR / "files")
 
 
 
 
76
 
77
  KH_DOCSTORE = {
78
  # "__type__": "kotaemon.storages.ElasticsearchDocumentStore",
@@ -83,10 +98,13 @@ KH_DOCSTORE = {
83
  KH_VECTORSTORE = {
84
  # "__type__": "kotaemon.storages.LanceDBVectorStore",
85
  "__type__": "kotaemon.storages.ChromaVectorStore",
 
 
86
  "path": str(KH_USER_DATA_DIR / "vectorstore"),
87
  }
88
  KH_LLMS = {}
89
  KH_EMBEDDINGS = {}
 
90
 
91
  # populate options from config
92
  if config("AZURE_OPENAI_API_KEY", default="") and config(
@@ -122,52 +140,68 @@ if config("AZURE_OPENAI_API_KEY", default="") and config(
122
  "default": False,
123
  }
124
 
125
- if config("OPENAI_API_KEY", default=""):
 
 
 
 
 
126
  KH_LLMS["openai"] = {
127
  "spec": {
128
  "__type__": "kotaemon.llms.ChatOpenAI",
129
  "temperature": 0,
130
  "base_url": config("OPENAI_API_BASE", default="")
131
  or "https://api.openai.com/v1",
132
- "api_key": config("OPENAI_API_KEY", default=""),
133
- "model": config("OPENAI_CHAT_MODEL", default="gpt-3.5-turbo"),
134
  "timeout": 20,
135
  },
136
- "default": True,
137
  }
138
  KH_EMBEDDINGS["openai"] = {
139
  "spec": {
140
  "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
141
  "base_url": config("OPENAI_API_BASE", default="https://api.openai.com/v1"),
142
- "api_key": config("OPENAI_API_KEY", default=""),
143
  "model": config(
144
- "OPENAI_EMBEDDINGS_MODEL", default="text-embedding-ada-002"
145
  ),
146
  "timeout": 10,
147
  "context_length": 8191,
148
  },
149
- "default": True,
150
  }
151
 
152
  if config("LOCAL_MODEL", default=""):
153
  KH_LLMS["ollama"] = {
154
  "spec": {
155
  "__type__": "kotaemon.llms.ChatOpenAI",
156
- "base_url": "http://localhost:11434/v1/",
157
- "model": config("LOCAL_MODEL", default="llama3.1:8b"),
 
 
 
 
 
 
 
 
 
 
158
  },
159
  "default": False,
160
  }
 
161
  KH_EMBEDDINGS["ollama"] = {
162
  "spec": {
163
  "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
164
- "base_url": "http://localhost:11434/v1/",
165
  "model": config("LOCAL_MODEL_EMBEDDINGS", default="nomic-embed-text"),
 
166
  },
167
  "default": False,
168
  }
169
-
170
- KH_EMBEDDINGS["local-bge-en"] = {
171
  "spec": {
172
  "__type__": "kotaemon.embeddings.FastEmbedEmbeddings",
173
  "model_name": "BAAI/bge-base-en-v1.5",
@@ -175,13 +209,84 @@ if config("LOCAL_MODEL", default=""):
175
  "default": False,
176
  }
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  KH_REASONINGS = [
179
  "ktem.reasoning.simple.FullQAPipeline",
180
  "ktem.reasoning.simple.FullDecomposeQAPipeline",
181
  "ktem.reasoning.react.ReactAgentPipeline",
182
  "ktem.reasoning.rewoo.RewooAgentPipeline",
183
  ]
184
- KH_REASONINGS_USE_MULTIMODAL = False
185
  KH_VLM_ENDPOINT = "{0}/openai/deployments/{1}/chat/completions?api-version={2}".format(
186
  config("AZURE_OPENAI_ENDPOINT", default=""),
187
  config("OPENAI_VISION_DEPLOYMENT_NAME", default="gpt-4o"),
@@ -202,7 +307,7 @@ SETTINGS_REASONING = {
202
  "lang": {
203
  "name": "Language",
204
  "value": "en",
205
- "choices": [("English", "en"), ("Japanese", "ja"), ("Vietnamese", "vi")],
206
  "component": "dropdown",
207
  },
208
  "max_context_length": {
@@ -212,32 +317,51 @@ SETTINGS_REASONING = {
212
  },
213
  }
214
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
  KH_INDEX_TYPES = [
217
  "ktem.index.file.FileIndex",
218
- "ktem.index.file.graph.GraphRAGIndex",
219
  ]
220
- KH_INDICES = [
 
221
  {
222
- "name": "File",
 
223
  "config": {
224
  "supported_file_types": (
225
  ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
226
- ".pptx, .csv, .html, .mhtml, .txt, .zip"
227
  ),
228
- "private": False,
229
  },
230
- "index_type": "ktem.index.file.FileIndex",
231
- },
 
 
 
 
232
  {
233
- "name": "GraphRAG",
234
  "config": {
235
  "supported_file_types": (
236
  ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
237
- ".pptx, .csv, .html, .mhtml, .txt, .zip"
238
  ),
239
- "private": False,
240
  },
241
- "index_type": "ktem.index.file.graph.GraphRAGIndex",
242
  },
 
243
  ]
 
4
  from pathlib import Path
5
 
6
  from decouple import config
7
+ from ktem.utils.lang import SUPPORTED_LANGUAGE_MAP
8
  from theflow.settings.default import * # noqa
9
 
10
  cur_frame = currentframe()
11
  if cur_frame is None:
12
  raise ValueError("Cannot get the current frame.")
13
  this_file = getframeinfo(cur_frame).filename
14
+ this_dir = Path("/home/ubuntu/lib-knowledgehub/kotaemon/ktem_app_data")
15
 
16
  # change this if your app use a different name
17
  KH_PACKAGE_NAME = "kotaemon_app"
18
 
19
+ KH_APP_VERSION = config("KH_APP_VERSION", None)
20
  if not KH_APP_VERSION:
21
  try:
22
  # Caution: This might produce the wrong version
23
  # https://stackoverflow.com/a/59533071
24
  KH_APP_VERSION = version(KH_PACKAGE_NAME)
25
+ except Exception:
26
+ KH_APP_VERSION = "local"
27
+
28
+ KH_GRADIO_SHARE = config("KH_GRADIO_SHARE", default=False, cast=bool)
29
+ KH_ENABLE_FIRST_SETUP = config("KH_ENABLE_FIRST_SETUP", default=True, cast=bool)
30
+ KH_DEMO_MODE = config("KH_DEMO_MODE", default=False, cast=bool)
31
+ KH_OLLAMA_URL = config("KH_OLLAMA_URL", default="http://localhost:11434/v1/")
32
 
33
  # App can be ran from anywhere and it's not trivial to decide where to store app data.
34
  # So let's use the same directory as the flowsetting.py file.
35
+ KH_APP_DATA_DIR = this_dir / "ktem_app_data"
36
+ KH_APP_DATA_EXISTS = KH_APP_DATA_DIR.exists()
 
 
37
  KH_APP_DATA_DIR.mkdir(parents=True, exist_ok=True)
38
 
39
  # User data directory
40
  KH_USER_DATA_DIR = KH_APP_DATA_DIR / "user_data"
41
  KH_USER_DATA_DIR.mkdir(parents=True, exist_ok=True)
42
 
43
+ # markdown output directory
44
  KH_MARKDOWN_OUTPUT_DIR = KH_APP_DATA_DIR / "markdown_cache_dir"
45
  KH_MARKDOWN_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
46
 
 
66
  KH_DOC_DIR = this_dir / "docs"
67
 
68
  KH_MODE = "dev"
69
+ KH_SSO_ENABLED = config("KH_SSO_ENABLED", default=False, cast=bool)
70
+
71
+ KH_FEATURE_CHAT_SUGGESTION = config(
72
+ "KH_FEATURE_CHAT_SUGGESTION", default=False, cast=bool
73
+ )
74
+ KH_FEATURE_USER_MANAGEMENT = config(
75
+ "KH_FEATURE_USER_MANAGEMENT", default=True, cast=bool
76
+ )
77
  KH_USER_CAN_SEE_PUBLIC = None
78
  KH_FEATURE_USER_MANAGEMENT_ADMIN = str(
79
  config("KH_FEATURE_USER_MANAGEMENT_ADMIN", default="admin")
 
82
  config("KH_FEATURE_USER_MANAGEMENT_PASSWORD", default="admin")
83
  )
84
  KH_ENABLE_ALEMBIC = False
85
+ KH_DATABASE = f"sqlite:///{KH_USER_DATA_DIR / 'sql.db'}"
86
  KH_FILESTORAGE_PATH = str(KH_USER_DATA_DIR / "files")
87
+ KH_WEB_SEARCH_BACKEND = (
88
+ "kotaemon.indices.retrievers.tavily_web_search.WebSearch"
89
+ # "kotaemon.indices.retrievers.jina_web_search.WebSearch"
90
+ )
91
 
92
  KH_DOCSTORE = {
93
  # "__type__": "kotaemon.storages.ElasticsearchDocumentStore",
 
98
  KH_VECTORSTORE = {
99
  # "__type__": "kotaemon.storages.LanceDBVectorStore",
100
  "__type__": "kotaemon.storages.ChromaVectorStore",
101
+ # "__type__": "kotaemon.storages.MilvusVectorStore",
102
+ # "__type__": "kotaemon.storages.QdrantVectorStore",
103
  "path": str(KH_USER_DATA_DIR / "vectorstore"),
104
  }
105
  KH_LLMS = {}
106
  KH_EMBEDDINGS = {}
107
+ KH_RERANKINGS = {}
108
 
109
  # populate options from config
110
  if config("AZURE_OPENAI_API_KEY", default="") and config(
 
140
  "default": False,
141
  }
142
 
143
+ OPENAI_DEFAULT = "<YOUR_OPENAI_KEY>"
144
+ OPENAI_API_KEY = config("OPENAI_API_KEY", default=OPENAI_DEFAULT)
145
+ GOOGLE_API_KEY = config("GOOGLE_API_KEY", default="your-key")
146
+ IS_OPENAI_DEFAULT = len(OPENAI_API_KEY) > 0 and OPENAI_API_KEY != OPENAI_DEFAULT
147
+
148
+ if OPENAI_API_KEY:
149
  KH_LLMS["openai"] = {
150
  "spec": {
151
  "__type__": "kotaemon.llms.ChatOpenAI",
152
  "temperature": 0,
153
  "base_url": config("OPENAI_API_BASE", default="")
154
  or "https://api.openai.com/v1",
155
+ "api_key": OPENAI_API_KEY,
156
+ "model": config("OPENAI_CHAT_MODEL", default="gpt-4o-mini"),
157
  "timeout": 20,
158
  },
159
+ "default": IS_OPENAI_DEFAULT,
160
  }
161
  KH_EMBEDDINGS["openai"] = {
162
  "spec": {
163
  "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
164
  "base_url": config("OPENAI_API_BASE", default="https://api.openai.com/v1"),
165
+ "api_key": OPENAI_API_KEY,
166
  "model": config(
167
+ "OPENAI_EMBEDDINGS_MODEL", default="text-embedding-3-large"
168
  ),
169
  "timeout": 10,
170
  "context_length": 8191,
171
  },
172
+ "default": IS_OPENAI_DEFAULT,
173
  }
174
 
175
  if config("LOCAL_MODEL", default=""):
176
  KH_LLMS["ollama"] = {
177
  "spec": {
178
  "__type__": "kotaemon.llms.ChatOpenAI",
179
+ "base_url": KH_OLLAMA_URL,
180
+ "model": config("LOCAL_MODEL", default="qwen2.5:7b"),
181
+ "api_key": "ollama",
182
+ },
183
+ "default": False,
184
+ }
185
+ KH_LLMS["ollama-long-context"] = {
186
+ "spec": {
187
+ "__type__": "kotaemon.llms.LCOllamaChat",
188
+ "base_url": KH_OLLAMA_URL.replace("v1/", ""),
189
+ "model": config("LOCAL_MODEL", default="qwen2.5:7b"),
190
+ "num_ctx": 8192,
191
  },
192
  "default": False,
193
  }
194
+
195
  KH_EMBEDDINGS["ollama"] = {
196
  "spec": {
197
  "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
198
+ "base_url": KH_OLLAMA_URL,
199
  "model": config("LOCAL_MODEL_EMBEDDINGS", default="nomic-embed-text"),
200
+ "api_key": "ollama",
201
  },
202
  "default": False,
203
  }
204
+ KH_EMBEDDINGS["fast_embed"] = {
 
205
  "spec": {
206
  "__type__": "kotaemon.embeddings.FastEmbedEmbeddings",
207
  "model_name": "BAAI/bge-base-en-v1.5",
 
209
  "default": False,
210
  }
211
 
212
+ # additional LLM configurations
213
+ KH_LLMS["claude"] = {
214
+ "spec": {
215
+ "__type__": "kotaemon.llms.chats.LCAnthropicChat",
216
+ "model_name": "claude-3-5-sonnet-20240620",
217
+ "api_key": "your-key",
218
+ },
219
+ "default": False,
220
+ }
221
+ KH_LLMS["google"] = {
222
+ "spec": {
223
+ "__type__": "kotaemon.llms.chats.LCGeminiChat",
224
+ "model_name": "gemini-1.5-flash",
225
+ "api_key": GOOGLE_API_KEY,
226
+ },
227
+ "default": not IS_OPENAI_DEFAULT,
228
+ }
229
+ KH_LLMS["groq"] = {
230
+ "spec": {
231
+ "__type__": "kotaemon.llms.ChatOpenAI",
232
+ "base_url": "https://api.groq.com/openai/v1",
233
+ "model": "llama-3.1-8b-instant",
234
+ "api_key": "your-key",
235
+ },
236
+ "default": False,
237
+ }
238
+ KH_LLMS["cohere"] = {
239
+ "spec": {
240
+ "__type__": "kotaemon.llms.chats.LCCohereChat",
241
+ "model_name": "command-r-plus-08-2024",
242
+ "api_key": config("COHERE_API_KEY", default="your-key"),
243
+ },
244
+ "default": False,
245
+ }
246
+
247
+ # additional embeddings configurations
248
+ KH_EMBEDDINGS["cohere"] = {
249
+ "spec": {
250
+ "__type__": "kotaemon.embeddings.LCCohereEmbeddings",
251
+ "model": "embed-multilingual-v3.0",
252
+ "cohere_api_key": config("COHERE_API_KEY", default="your-key"),
253
+ "user_agent": "default",
254
+ },
255
+ "default": False,
256
+ }
257
+ KH_EMBEDDINGS["google"] = {
258
+ "spec": {
259
+ "__type__": "kotaemon.embeddings.LCGoogleEmbeddings",
260
+ "model": "models/text-embedding-004",
261
+ "google_api_key": GOOGLE_API_KEY,
262
+ },
263
+ "default": not IS_OPENAI_DEFAULT,
264
+ }
265
+ # KH_EMBEDDINGS["huggingface"] = {
266
+ # "spec": {
267
+ # "__type__": "kotaemon.embeddings.LCHuggingFaceEmbeddings",
268
+ # "model_name": "sentence-transformers/all-mpnet-base-v2",
269
+ # },
270
+ # "default": False,
271
+ # }
272
+
273
+ # default reranking models
274
+ KH_RERANKINGS["cohere"] = {
275
+ "spec": {
276
+ "__type__": "kotaemon.rerankings.CohereReranking",
277
+ "model_name": "rerank-multilingual-v2.0",
278
+ "cohere_api_key": config("COHERE_API_KEY", default=""),
279
+ },
280
+ "default": True,
281
+ }
282
+
283
  KH_REASONINGS = [
284
  "ktem.reasoning.simple.FullQAPipeline",
285
  "ktem.reasoning.simple.FullDecomposeQAPipeline",
286
  "ktem.reasoning.react.ReactAgentPipeline",
287
  "ktem.reasoning.rewoo.RewooAgentPipeline",
288
  ]
289
+ KH_REASONINGS_USE_MULTIMODAL = config("USE_MULTIMODAL", default=False, cast=bool)
290
  KH_VLM_ENDPOINT = "{0}/openai/deployments/{1}/chat/completions?api-version={2}".format(
291
  config("AZURE_OPENAI_ENDPOINT", default=""),
292
  config("OPENAI_VISION_DEPLOYMENT_NAME", default="gpt-4o"),
 
307
  "lang": {
308
  "name": "Language",
309
  "value": "en",
310
+ "choices": [(lang, code) for code, lang in SUPPORTED_LANGUAGE_MAP.items()],
311
  "component": "dropdown",
312
  },
313
  "max_context_length": {
 
317
  },
318
  }
319
 
320
+ USE_NANO_GRAPHRAG = config("USE_NANO_GRAPHRAG", default=False, cast=bool)
321
+ USE_LIGHTRAG = config("USE_LIGHTRAG", default=True, cast=bool)
322
+ USE_MS_GRAPHRAG = config("USE_MS_GRAPHRAG", default=True, cast=bool)
323
+
324
+ GRAPHRAG_INDEX_TYPES = []
325
+
326
+ if USE_MS_GRAPHRAG:
327
+ GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.GraphRAGIndex")
328
+ if USE_NANO_GRAPHRAG:
329
+ GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.NanoGraphRAGIndex")
330
+ if USE_LIGHTRAG:
331
+ GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.LightRAGIndex")
332
 
333
  KH_INDEX_TYPES = [
334
  "ktem.index.file.FileIndex",
335
+ *GRAPHRAG_INDEX_TYPES,
336
  ]
337
+
338
+ GRAPHRAG_INDICES = [
339
  {
340
+ "name": graph_type.split(".")[-1].replace("Index", "")
341
+ + " Collection", # get last name
342
  "config": {
343
  "supported_file_types": (
344
  ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
345
+ ".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
346
  ),
347
+ "private": True,
348
  },
349
+ "index_type": graph_type,
350
+ }
351
+ for graph_type in GRAPHRAG_INDEX_TYPES
352
+ ]
353
+
354
+ KH_INDICES = [
355
  {
356
+ "name": "File Collection",
357
  "config": {
358
  "supported_file_types": (
359
  ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
360
+ ".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
361
  ),
362
+ "private": True,
363
  },
364
+ "index_type": "ktem.index.file.FileIndex",
365
  },
366
+ *GRAPHRAG_INDICES,
367
  ]