Rulga commited on
Commit
4da6c12
·
1 Parent(s): e79eedb

Refactor settings.py: Update model configurations to replace BLOOMZ 7B MT and mGPT with OpenAssistant SFT-1 and SOLAR 10.7B Instruct, enhancing descriptions and capabilities

Browse files
Files changed (1) hide show
  1. config/settings.py +56 -56
config/settings.py CHANGED
@@ -214,10 +214,10 @@ MODELS = {
214
  "documentation": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2"
215
  }
216
  },
217
- "bloomz-7b1-mt": {
218
- "id": "bigscience/bloomz-7b1-mt",
219
- "name": "BLOOMZ 7B MT",
220
- "description": "Multilingual model trained on 46 languages with strong legal domain capabilities",
221
  "type": "base",
222
  "parameters": {
223
  "max_length": 2048,
@@ -226,8 +226,8 @@ MODELS = {
226
  "repetition_penalty": 1.1,
227
  },
228
  "training": {
229
- "base_model_path": "bigscience/bloomz-7b1-mt",
230
- "fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "bloomz-7b1-mt-tuned"),
231
  "lora_config": {
232
  "r": 16,
233
  "lora_alpha": 32,
@@ -236,31 +236,31 @@ MODELS = {
236
  }
237
  },
238
  "details": {
239
- "full_name": "BLOOMZ 7B1 Multilingual",
240
  "capabilities": [
241
- "Excellent performance across 46 languages",
242
- "Strong understanding of legal terminology",
243
- "Trained on multilingual instructions",
244
- "Good cross-lingual transfer"
245
  ],
246
  "limitations": [
247
- "Slightly slower inference than some alternatives",
248
- "May require more specific prompting in some languages",
249
- "Limited to formal language styles"
250
  ],
251
  "use_cases": [
252
- "International legal documentation",
253
- "Multi-jurisdiction legal advice",
254
- "Cross-border legal research",
255
- "Legal document translation verification"
256
  ],
257
- "documentation": "https://huggingface.co/bigscience/bloomz-7b1-mt"
258
  }
259
  },
260
- "mgpt": {
261
- "id": "sberbank-ai/mGPT",
262
- "name": "mGPT",
263
- "description": "Multilingual GPT model optimized for 60+ languages including Cyrillic scripts",
264
  "type": "base",
265
  "parameters": {
266
  "max_length": 2048,
@@ -269,8 +269,8 @@ MODELS = {
269
  "repetition_penalty": 1.1,
270
  },
271
  "training": {
272
- "base_model_path": "sberbank-ai/mGPT",
273
- "fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "mgpt-tuned"),
274
  "lora_config": {
275
  "r": 16,
276
  "lora_alpha": 32,
@@ -279,31 +279,31 @@ MODELS = {
279
  }
280
  },
281
  "details": {
282
- "full_name": "Multilingual GPT",
283
  "capabilities": [
284
- "Support for 60+ languages",
285
- "Excellent performance with Cyrillic scripts",
286
- "Optimized for dialogue generation",
287
- "Efficient resource usage"
288
  ],
289
  "limitations": [
290
- "Less specialized in legal domain",
291
- "May require additional context for complex legal terms",
292
  "Performance varies by language"
293
  ],
294
  "use_cases": [
295
- "Multilingual client communication",
296
- "Basic legal document translation",
297
- "General legal consultation in multiple languages",
298
- "Cross-cultural legal explanation"
299
  ],
300
- "documentation": "https://huggingface.co/sberbank-ai/mGPT"
301
  }
302
  },
303
- "xglm-4.5b": {
304
- "id": "facebook/xglm-4.5B",
305
- "name": "XGLM 4.5B",
306
- "description": "Lightweight multilingual model with strong Asian language support",
307
  "type": "base",
308
  "parameters": {
309
  "max_length": 2048,
@@ -312,8 +312,8 @@ MODELS = {
312
  "repetition_penalty": 1.1,
313
  },
314
  "training": {
315
- "base_model_path": "facebook/xglm-4.5B",
316
- "fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "xglm-4.5b-tuned"),
317
  "lora_config": {
318
  "r": 16,
319
  "lora_alpha": 32,
@@ -322,25 +322,25 @@ MODELS = {
322
  }
323
  },
324
  "details": {
325
- "full_name": "Meta XGLM 4.5B",
326
  "capabilities": [
327
- "Efficient multilingual generation",
328
- "Strong Asian language support",
329
- "Fast inference speed",
330
- "Lower resource requirements"
331
  ],
332
  "limitations": [
333
- "Smaller model size may affect complex reasoning",
334
- "Less specialized for legal domain",
335
- "May require more context for accurate responses"
336
  ],
337
  "use_cases": [
338
- "Quick multilingual responses",
339
- "Asian language legal support",
340
- "Basic legal document analysis",
341
- "Rapid cross-lingual summarization"
342
  ],
343
- "documentation": "https://huggingface.co/facebook/xglm-4.5B"
344
  }
345
  }
346
  }
 
214
  "documentation": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2"
215
  }
216
  },
217
+ "oasst-sft-1": {
218
+ "id": "OpenAssistant/oasst-sft-1-pythia-12b",
219
+ "name": "OpenAssistant SFT-1",
220
+ "description": "Open Assistant model trained for dialogue and instruction following",
221
  "type": "base",
222
  "parameters": {
223
  "max_length": 2048,
 
226
  "repetition_penalty": 1.1,
227
  },
228
  "training": {
229
+ "base_model_path": "OpenAssistant/oasst-sft-1-pythia-12b",
230
+ "fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "oasst-sft-1-tuned"),
231
  "lora_config": {
232
  "r": 16,
233
  "lora_alpha": 32,
 
236
  }
237
  },
238
  "details": {
239
+ "full_name": "Open Assistant SFT-1",
240
  "capabilities": [
241
+ "Strong dialogue capabilities",
242
+ "Good multilingual performance",
243
+ "Instruction following",
244
+ "Community-trained on diverse datasets"
245
  ],
246
  "limitations": [
247
+ "May require more specific prompting for legal domain",
248
+ "Performance varies by language",
249
+ "Less specialized in legal terminology"
250
  ],
251
  "use_cases": [
252
+ "Multilingual client support",
253
+ "General legal consultation",
254
+ "Document analysis",
255
+ "Cross-lingual communication"
256
  ],
257
+ "documentation": "https://huggingface.co/OpenAssistant/oasst-sft-1-pythia-12b"
258
  }
259
  },
260
+ "solar-10.7b": {
261
+ "id": "upstage/SOLAR-10.7B-Instruct-v1.0",
262
+ "name": "SOLAR 10.7B Instruct",
263
+ "description": "Lightweight multilingual model with strong Asian language support",
264
  "type": "base",
265
  "parameters": {
266
  "max_length": 2048,
 
269
  "repetition_penalty": 1.1,
270
  },
271
  "training": {
272
+ "base_model_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
273
+ "fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "solar-10.7b-tuned"),
274
  "lora_config": {
275
  "r": 16,
276
  "lora_alpha": 32,
 
279
  }
280
  },
281
  "details": {
282
+ "full_name": "SOLAR 10.7B Instruct v1.0",
283
  "capabilities": [
284
+ "Strong Asian language support",
285
+ "Excellent instruction following",
286
+ "Fast inference speed",
287
+ "Good multilingual performance"
288
  ],
289
  "limitations": [
290
+ "Slightly larger size (10.7B)",
291
+ "May need more specific prompting for legal domain",
292
  "Performance varies by language"
293
  ],
294
  "use_cases": [
295
+ "Multilingual legal assistance",
296
+ "Asian language support",
297
+ "Cross-cultural legal consultation",
298
+ "International document analysis"
299
  ],
300
+ "documentation": "https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0"
301
  }
302
  },
303
+ "saiga-7b": {
304
+ "id": "IlyaGusev/saiga_7b_lora",
305
+ "name": "Saiga 7B",
306
+ "description": "Multilingual model optimized for Russian and English dialogue",
307
  "type": "base",
308
  "parameters": {
309
  "max_length": 2048,
 
312
  "repetition_penalty": 1.1,
313
  },
314
  "training": {
315
+ "base_model_path": "IlyaGusev/saiga_7b_lora",
316
+ "fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "saiga-7b-tuned"),
317
  "lora_config": {
318
  "r": 16,
319
  "lora_alpha": 32,
 
322
  }
323
  },
324
  "details": {
325
+ "full_name": "Saiga 7B",
326
  "capabilities": [
327
+ "Excellent Russian language support",
328
+ "Strong dialogue capabilities",
329
+ "Optimized for instruction following",
330
+ "Good performance on legal and technical texts"
331
  ],
332
  "limitations": [
333
+ "Primarily focused on Russian and English",
334
+ "May require specific prompting for legal domain",
335
+ "Limited support for other languages"
336
  ],
337
  "use_cases": [
338
+ "Russian-English legal consultation",
339
+ "Document analysis in Russian",
340
+ "Legal QA for Russian speakers",
341
+ "Technical documentation processing"
342
  ],
343
+ "documentation": "https://huggingface.co/IlyaGusev/saiga_7b_lora"
344
  }
345
  }
346
  }