HeshamHaroon commited on
Commit
9a35f45
·
verified ·
1 Parent(s): a5f8ac7

Update: Auto-evaluation on Space startup

Browse files
Files changed (1) hide show
  1. afcl/app.py +33 -3
afcl/app.py CHANGED
@@ -29,19 +29,49 @@ The **Arabic Function Calling Leaderboard (AFCL)** evaluates Large Language Mode
29
  **لوحة تقييم استدعاء الدوال بالعربية** تقيّم نماذج اللغة الكبيرة على قدرتها على فهم الاستعلامات العربية وإنشاء استدعاءات الدوال المناسبة.
30
  """
31
 
32
- # Models to evaluate
33
  MODELS_TO_EVALUATE = [
 
34
  {"model": "Jais-30B-Chat", "model_id": "inceptionai/jais-30b-chat-v3", "organization": "Inception AI"},
35
  {"model": "ALLaM-7B-Instruct", "model_id": "sdaia/allam-1-7b-instruct", "organization": "SDAIA"},
36
  {"model": "SILMA-9B-Instruct", "model_id": "silma-ai/SILMA-9B-Instruct-v1.0", "organization": "Silma AI"},
 
37
  {"model": "AceGPT-13B-Chat", "model_id": "FreedomIntelligence/AceGPT-13B-chat", "organization": "FreedomIntelligence"},
38
- {"model": "BLOOMZ-7B1", "model_id": "bigscience/bloomz-7b1", "organization": "BigScience"},
39
- {"model": "Aya-Expanse-8B", "model_id": "CohereForAI/aya-expanse-8b", "organization": "Cohere For AI"},
 
 
 
40
  {"model": "Qwen2.5-7B-Instruct", "model_id": "Qwen/Qwen2.5-7B-Instruct", "organization": "Alibaba Qwen"},
 
41
  {"model": "Llama-3.1-8B-Instruct", "model_id": "meta-llama/Llama-3.1-8B-Instruct", "organization": "Meta"},
 
42
  {"model": "Gemma-2-9B-IT", "model_id": "google/gemma-2-9b-it", "organization": "Google"},
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  {"model": "Mistral-7B-Instruct", "model_id": "mistralai/Mistral-7B-Instruct-v0.3", "organization": "Mistral AI"},
 
 
 
 
44
  {"model": "Phi-3-Mini-Instruct", "model_id": "microsoft/Phi-3-mini-4k-instruct", "organization": "Microsoft"},
 
 
 
 
 
 
45
  ]
46
 
47
  # Global state
 
29
  **لوحة تقييم استدعاء الدوال بالعربية** تقيّم نماذج اللغة الكبيرة على قدرتها على فهم الاستعلامات العربية وإنشاء استدعاءات الدوال المناسبة.
30
  """
31
 
32
+ # All 28 Models to evaluate
33
  MODELS_TO_EVALUATE = [
34
+ # Arabic-Native LLMs
35
  {"model": "Jais-30B-Chat", "model_id": "inceptionai/jais-30b-chat-v3", "organization": "Inception AI"},
36
  {"model": "ALLaM-7B-Instruct", "model_id": "sdaia/allam-1-7b-instruct", "organization": "SDAIA"},
37
  {"model": "SILMA-9B-Instruct", "model_id": "silma-ai/SILMA-9B-Instruct-v1.0", "organization": "Silma AI"},
38
+ {"model": "Fanar-Star-1.2B", "model_id": "QatarComputing/fanar-star-1.2b", "organization": "QCRI"},
39
  {"model": "AceGPT-13B-Chat", "model_id": "FreedomIntelligence/AceGPT-13B-chat", "organization": "FreedomIntelligence"},
40
+ {"model": "AraGPT2-Mega", "model_id": "aubmindlab/aragpt2-mega", "organization": "AUB MIND Lab"},
41
+
42
+ # Multilingual with strong Arabic
43
+ {"model": "Qwen2.5-72B-Instruct", "model_id": "Qwen/Qwen2.5-72B-Instruct", "organization": "Alibaba Qwen"},
44
+ {"model": "Qwen2.5-32B-Instruct", "model_id": "Qwen/Qwen2.5-32B-Instruct", "organization": "Alibaba Qwen"},
45
  {"model": "Qwen2.5-7B-Instruct", "model_id": "Qwen/Qwen2.5-7B-Instruct", "organization": "Alibaba Qwen"},
46
+ {"model": "Llama-3.1-70B-Instruct", "model_id": "meta-llama/Llama-3.1-70B-Instruct", "organization": "Meta"},
47
  {"model": "Llama-3.1-8B-Instruct", "model_id": "meta-llama/Llama-3.1-8B-Instruct", "organization": "Meta"},
48
+ {"model": "Gemma-2-27B-IT", "model_id": "google/gemma-2-27b-it", "organization": "Google"},
49
  {"model": "Gemma-2-9B-IT", "model_id": "google/gemma-2-9b-it", "organization": "Google"},
50
+
51
+ # Cohere Arabic Models
52
+ {"model": "Aya-Expanse-32B", "model_id": "CohereForAI/aya-expanse-32b", "organization": "Cohere For AI"},
53
+ {"model": "Aya-Expanse-8B", "model_id": "CohereForAI/aya-expanse-8b", "organization": "Cohere For AI"},
54
+ {"model": "c4ai-command-r7b-arabic", "model_id": "CohereForAI/c4ai-command-r7b-arabic-02-2025", "organization": "Cohere For AI"},
55
+
56
+ # Falcon (UAE)
57
+ {"model": "Falcon-180B-Chat", "model_id": "tiiuae/falcon-180B-chat", "organization": "TII UAE"},
58
+ {"model": "Falcon-40B-Instruct", "model_id": "tiiuae/falcon-40b-instruct", "organization": "TII UAE"},
59
+
60
+ # Mistral
61
+ {"model": "Mistral-Large-Instruct", "model_id": "mistralai/Mistral-Large-Instruct-2411", "organization": "Mistral AI"},
62
+ {"model": "Mixtral-8x22B-Instruct", "model_id": "mistralai/Mixtral-8x22B-Instruct-v0.1", "organization": "Mistral AI"},
63
  {"model": "Mistral-7B-Instruct", "model_id": "mistralai/Mistral-7B-Instruct-v0.3", "organization": "Mistral AI"},
64
+
65
+ # Others
66
+ {"model": "DeepSeek-V3", "model_id": "deepseek-ai/DeepSeek-V3", "organization": "DeepSeek"},
67
+ {"model": "Phi-4", "model_id": "microsoft/phi-4", "organization": "Microsoft"},
68
  {"model": "Phi-3-Mini-Instruct", "model_id": "microsoft/Phi-3-mini-4k-instruct", "organization": "Microsoft"},
69
+ {"model": "BLOOM-176B", "model_id": "bigscience/bloom", "organization": "BigScience"},
70
+ {"model": "BLOOMZ-7B1", "model_id": "bigscience/bloomz-7b1", "organization": "BigScience"},
71
+
72
+ # Arabic Fine-tuned
73
+ {"model": "Arabic-Llama-3.1-8B", "model_id": "Ammar-Arabi/Arabic-Llama-3.1-8B-Instruct", "organization": "Ammar Arabi"},
74
+ {"model": "Llama3-8B-Arabic-Instruct", "model_id": "MahmoudAshraf/Llama3-8B-Arabic-instruct", "organization": "Mahmoud Ashraf"},
75
  ]
76
 
77
  # Global state