Rulga commited on
Commit
c562673
·
1 Parent(s): 4da6c12

Refactor settings.py: Update model configurations to replace OpenAssistant SFT-1 and SOLAR 10.7B with Yi 6B Chat and Mixtral 8x7B, enhancing descriptions and capabilities

Browse files
Files changed (1) hide show
  1. config/settings.py +37 -80
config/settings.py CHANGED
@@ -214,10 +214,10 @@ MODELS = {
214
  "documentation": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2"
215
  }
216
  },
217
- "oasst-sft-1": {
218
- "id": "OpenAssistant/oasst-sft-1-pythia-12b",
219
- "name": "OpenAssistant SFT-1",
220
- "description": "Open Assistant model trained for dialogue and instruction following",
221
  "type": "base",
222
  "parameters": {
223
  "max_length": 2048,
@@ -226,8 +226,8 @@ MODELS = {
226
  "repetition_penalty": 1.1,
227
  },
228
  "training": {
229
- "base_model_path": "OpenAssistant/oasst-sft-1-pythia-12b",
230
- "fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "oasst-sft-1-tuned"),
231
  "lora_config": {
232
  "r": 16,
233
  "lora_alpha": 32,
@@ -236,31 +236,31 @@ MODELS = {
236
  }
237
  },
238
  "details": {
239
- "full_name": "Open Assistant SFT-1",
240
  "capabilities": [
241
- "Strong dialogue capabilities",
242
- "Good multilingual performance",
243
- "Instruction following",
244
- "Community-trained on diverse datasets"
245
  ],
246
  "limitations": [
247
- "May require more specific prompting for legal domain",
248
- "Performance varies by language",
249
- "Less specialized in legal terminology"
250
  ],
251
  "use_cases": [
252
- "Multilingual client support",
253
- "General legal consultation",
254
  "Document analysis",
255
- "Cross-lingual communication"
 
256
  ],
257
- "documentation": "https://huggingface.co/OpenAssistant/oasst-sft-1-pythia-12b"
258
  }
259
  },
260
- "solar-10.7b": {
261
- "id": "upstage/SOLAR-10.7B-Instruct-v1.0",
262
- "name": "SOLAR 10.7B Instruct",
263
- "description": "Lightweight multilingual model with strong Asian language support",
264
  "type": "base",
265
  "parameters": {
266
  "max_length": 2048,
@@ -269,8 +269,8 @@ MODELS = {
269
  "repetition_penalty": 1.1,
270
  },
271
  "training": {
272
- "base_model_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
273
- "fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "solar-10.7b-tuned"),
274
  "lora_config": {
275
  "r": 16,
276
  "lora_alpha": 32,
@@ -279,68 +279,25 @@ MODELS = {
279
  }
280
  },
281
  "details": {
282
- "full_name": "SOLAR 10.7B Instruct v1.0",
283
  "capabilities": [
284
- "Strong Asian language support",
285
- "Excellent instruction following",
286
- "Fast inference speed",
287
- "Good multilingual performance"
288
  ],
289
  "limitations": [
290
- "Slightly larger size (10.7B)",
291
- "May need more specific prompting for legal domain",
292
- "Performance varies by language"
293
  ],
294
  "use_cases": [
295
- "Multilingual legal assistance",
296
- "Asian language support",
297
- "Cross-cultural legal consultation",
298
- "International document analysis"
299
- ],
300
- "documentation": "https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0"
301
- }
302
- },
303
- "saiga-7b": {
304
- "id": "IlyaGusev/saiga_7b_lora",
305
- "name": "Saiga 7B",
306
- "description": "Multilingual model optimized for Russian and English dialogue",
307
- "type": "base",
308
- "parameters": {
309
- "max_length": 2048,
310
- "temperature": 0.7,
311
- "top_p": 0.9,
312
- "repetition_penalty": 1.1,
313
- },
314
- "training": {
315
- "base_model_path": "IlyaGusev/saiga_7b_lora",
316
- "fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "saiga-7b-tuned"),
317
- "lora_config": {
318
- "r": 16,
319
- "lora_alpha": 32,
320
- "lora_dropout": 0.05,
321
- "target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"]
322
- }
323
- },
324
- "details": {
325
- "full_name": "Saiga 7B",
326
- "capabilities": [
327
- "Excellent Russian language support",
328
- "Strong dialogue capabilities",
329
- "Optimized for instruction following",
330
- "Good performance on legal and technical texts"
331
- ],
332
- "limitations": [
333
- "Primarily focused on Russian and English",
334
- "May require specific prompting for legal domain",
335
- "Limited support for other languages"
336
- ],
337
- "use_cases": [
338
- "Russian-English legal consultation",
339
- "Document analysis in Russian",
340
- "Legal QA for Russian speakers",
341
- "Technical documentation processing"
342
  ],
343
- "documentation": "https://huggingface.co/IlyaGusev/saiga_7b_lora"
344
  }
345
  }
346
  }
 
214
  "documentation": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2"
215
  }
216
  },
217
+ "yi-6b": {
218
+ "id": "01-ai/Yi-6B-Chat",
219
+ "name": "Yi 6B Chat",
220
+ "description": "Lightweight multilingual model with strong performance",
221
  "type": "base",
222
  "parameters": {
223
  "max_length": 2048,
 
226
  "repetition_penalty": 1.1,
227
  },
228
  "training": {
229
+ "base_model_path": "01-ai/Yi-6B-Chat",
230
+ "fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "yi-6b-chat-tuned"),
231
  "lora_config": {
232
  "r": 16,
233
  "lora_alpha": 32,
 
236
  }
237
  },
238
  "details": {
239
+ "full_name": "Yi 6B Chat",
240
  "capabilities": [
241
+ "Strong multilingual support",
242
+ "Efficient performance",
243
+ "Good instruction following",
244
+ "Smaller size (6B parameters)"
245
  ],
246
  "limitations": [
247
+ "Less specialized in legal domain",
248
+ "May need specific prompting",
249
+ "Newer model with less community testing"
250
  ],
251
  "use_cases": [
252
+ "General consultation",
 
253
  "Document analysis",
254
+ "Multilingual support",
255
+ "Basic legal QA"
256
  ],
257
+ "documentation": "https://huggingface.co/01-ai/Yi-6B-Chat"
258
  }
259
  },
260
+ "mixtral-8x7b": {
261
+ "id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
262
+ "name": "Mixtral 8x7B Instruct",
263
+ "description": "Powerful mixture-of-experts model with strong multilingual capabilities",
264
  "type": "base",
265
  "parameters": {
266
  "max_length": 2048,
 
269
  "repetition_penalty": 1.1,
270
  },
271
  "training": {
272
+ "base_model_path": "mistralai/Mixtral-8x7B-Instruct-v0.1",
273
+ "fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "mixtral-8x7b-instruct-tuned"),
274
  "lora_config": {
275
  "r": 16,
276
  "lora_alpha": 32,
 
279
  }
280
  },
281
  "details": {
282
+ "full_name": "Mixtral 8x7B Instruct v0.1",
283
  "capabilities": [
284
+ "Excellent multilingual support",
285
+ "Strong reasoning capabilities",
286
+ "Superior instruction following",
287
+ "High-quality text generation"
288
  ],
289
  "limitations": [
290
+ "Larger model size",
291
+ "May need domain-specific prompting",
292
+ "Higher resource requirements"
293
  ],
294
  "use_cases": [
295
+ "Complex legal analysis",
296
+ "Multilingual consultation",
297
+ "Advanced document processing",
298
+ "Cross-lingual communication"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  ],
300
+ "documentation": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1"
301
  }
302
  }
303
  }