Rulga commited on
Commit
e79eedb
·
1 Parent(s): a42e4cc

Refactor settings.py: Update model configurations to include BLOOMZ 7B MT and mGPT with enhanced descriptions and capabilities

Browse files
Files changed (1) hide show
  1. config/settings.py +105 -20
config/settings.py CHANGED
@@ -214,10 +214,10 @@ MODELS = {
214
  "documentation": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2"
215
  }
216
  },
217
- "xglm-7.5b": {
218
- "id": "facebook/xglm-7.5B",
219
- "name": "XGLM 7.5B",
220
- "description": "Meta's multilingual model designed for cross-lingual generation",
221
  "type": "base",
222
  "parameters": {
223
  "max_length": 2048,
@@ -226,8 +226,8 @@ MODELS = {
226
  "repetition_penalty": 1.1,
227
  },
228
  "training": {
229
- "base_model_path": "facebook/xglm-7.5B",
230
- "fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "xglm-7.5b-tuned"),
231
  "lora_config": {
232
  "r": 16,
233
  "lora_alpha": 32,
@@ -236,26 +236,111 @@ MODELS = {
236
  }
237
  },
238
  "details": {
239
- "full_name": "Meta XGLM 7.5B",
240
  "capabilities": [
241
- "Specialized for multilingual generation",
242
- "Support for 30+ languages",
243
- "Strong cross-lingual transfer abilities",
244
- "Consistent performance across diverse languages"
245
  ],
246
  "limitations": [
247
- "Less instruction-tuned than dedicated chat models",
248
- "May require more specific prompting",
249
- "Not specifically optimized for legal domain",
250
- "Slightly larger model requiring more GPU memory"
251
  ],
252
  "use_cases": [
253
- "International legal assistance in native languages",
254
- "Complex multilingual documentation",
255
- "Serving clients from diverse linguistic backgrounds",
256
- "Translation and summarization of legal concepts across languages"
257
  ],
258
- "documentation": "https://huggingface.co/facebook/xglm-7.5B"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  }
260
  }
261
  }
 
214
  "documentation": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2"
215
  }
216
  },
217
+ "bloomz-7b1-mt": {
218
+ "id": "bigscience/bloomz-7b1-mt",
219
+ "name": "BLOOMZ 7B MT",
220
+ "description": "Multilingual model trained on 46 languages with strong legal domain capabilities",
221
  "type": "base",
222
  "parameters": {
223
  "max_length": 2048,
 
226
  "repetition_penalty": 1.1,
227
  },
228
  "training": {
229
+ "base_model_path": "bigscience/bloomz-7b1-mt",
230
+ "fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "bloomz-7b1-mt-tuned"),
231
  "lora_config": {
232
  "r": 16,
233
  "lora_alpha": 32,
 
236
  }
237
  },
238
  "details": {
239
+ "full_name": "BLOOMZ 7B1 Multilingual",
240
  "capabilities": [
241
+ "Excellent performance across 46 languages",
242
+ "Strong understanding of legal terminology",
243
+ "Trained on multilingual instructions",
244
+ "Good cross-lingual transfer"
245
  ],
246
  "limitations": [
247
+ "Slightly slower inference than some alternatives",
248
+ "May require more specific prompting in some languages",
249
+ "Limited to formal language styles"
 
250
  ],
251
  "use_cases": [
252
+ "International legal documentation",
253
+ "Multi-jurisdiction legal advice",
254
+ "Cross-border legal research",
255
+ "Legal document translation verification"
256
  ],
257
+ "documentation": "https://huggingface.co/bigscience/bloomz-7b1-mt"
258
+ }
259
+ },
260
+ "mgpt": {
261
+ "id": "sberbank-ai/mGPT",
262
+ "name": "mGPT",
263
+ "description": "Multilingual GPT model optimized for 60+ languages including Cyrillic scripts",
264
+ "type": "base",
265
+ "parameters": {
266
+ "max_length": 2048,
267
+ "temperature": 0.7,
268
+ "top_p": 0.9,
269
+ "repetition_penalty": 1.1,
270
+ },
271
+ "training": {
272
+ "base_model_path": "sberbank-ai/mGPT",
273
+ "fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "mgpt-tuned"),
274
+ "lora_config": {
275
+ "r": 16,
276
+ "lora_alpha": 32,
277
+ "lora_dropout": 0.05,
278
+ "target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"]
279
+ }
280
+ },
281
+ "details": {
282
+ "full_name": "Multilingual GPT",
283
+ "capabilities": [
284
+ "Support for 60+ languages",
285
+ "Excellent performance with Cyrillic scripts",
286
+ "Optimized for dialogue generation",
287
+ "Efficient resource usage"
288
+ ],
289
+ "limitations": [
290
+ "Less specialized in legal domain",
291
+ "May require additional context for complex legal terms",
292
+ "Performance varies by language"
293
+ ],
294
+ "use_cases": [
295
+ "Multilingual client communication",
296
+ "Basic legal document translation",
297
+ "General legal consultation in multiple languages",
298
+ "Cross-cultural legal explanation"
299
+ ],
300
+ "documentation": "https://huggingface.co/sberbank-ai/mGPT"
301
+ }
302
+ },
303
+ "xglm-4.5b": {
304
+ "id": "facebook/xglm-4.5B",
305
+ "name": "XGLM 4.5B",
306
+ "description": "Lightweight multilingual model with strong Asian language support",
307
+ "type": "base",
308
+ "parameters": {
309
+ "max_length": 2048,
310
+ "temperature": 0.7,
311
+ "top_p": 0.9,
312
+ "repetition_penalty": 1.1,
313
+ },
314
+ "training": {
315
+ "base_model_path": "facebook/xglm-4.5B",
316
+ "fine_tuned_path": os.path.join(TRAINING_OUTPUT_DIR, "xglm-4.5b-tuned"),
317
+ "lora_config": {
318
+ "r": 16,
319
+ "lora_alpha": 32,
320
+ "lora_dropout": 0.05,
321
+ "target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"]
322
+ }
323
+ },
324
+ "details": {
325
+ "full_name": "Meta XGLM 4.5B",
326
+ "capabilities": [
327
+ "Efficient multilingual generation",
328
+ "Strong Asian language support",
329
+ "Fast inference speed",
330
+ "Lower resource requirements"
331
+ ],
332
+ "limitations": [
333
+ "Smaller model size may affect complex reasoning",
334
+ "Less specialized for legal domain",
335
+ "May require more context for accurate responses"
336
+ ],
337
+ "use_cases": [
338
+ "Quick multilingual responses",
339
+ "Asian language legal support",
340
+ "Basic legal document analysis",
341
+ "Rapid cross-lingual summarization"
342
+ ],
343
+ "documentation": "https://huggingface.co/facebook/xglm-4.5B"
344
  }
345
  }
346
  }