Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- afridialeval/import_data.py +0 -2
- afridialeval/models.py +0 -3
- afridialeval/templates/annotate.html +1 -1
- pyproject.toml +2 -0
- run_pipeline.sh +0 -9
- src/generator.py +19 -6
- src/model_registry.py +5 -67
- src/orchestrator.py +0 -1
afridialeval/import_data.py
CHANGED
|
@@ -37,8 +37,6 @@ def parse_filename(filename: str) -> Dict[str, str]:
|
|
| 37 |
|
| 38 |
known_models = {
|
| 39 |
"gpt_5_1": "gpt-5.1",
|
| 40 |
-
"qwen_3_5_27b": "qwen-3.5-27b",
|
| 41 |
-
"qwen_3_5_122b": "qwen-3.5-122b",
|
| 42 |
"gemma_3_27b_it": "gemma-3-27b-it",
|
| 43 |
"gemini-3-flash-preview": "gemini-3-flash-preview",
|
| 44 |
}
|
|
|
|
| 37 |
|
| 38 |
known_models = {
|
| 39 |
"gpt_5_1": "gpt-5.1",
|
|
|
|
|
|
|
| 40 |
"gemma_3_27b_it": "gemma-3-27b-it",
|
| 41 |
"gemini-3-flash-preview": "gemini-3-flash-preview",
|
| 42 |
}
|
afridialeval/models.py
CHANGED
|
@@ -3,7 +3,6 @@
|
|
| 3 |
Codename mapping (blind, consistent):
|
| 4 |
Kifaru (🦏) → gpt-5.1
|
| 5 |
Nyati (🦬) → gemini-3-flash-preview
|
| 6 |
-
Tembo (🐘) → qwen-3.5-27b
|
| 7 |
Chui (🐆) → gemma-3-27b-it
|
| 8 |
"""
|
| 9 |
|
|
@@ -21,14 +20,12 @@ CODENAMES = {
|
|
| 21 |
"chui": "gemma-3-27b-it",
|
| 22 |
"kifaru": "gpt-5.1",
|
| 23 |
"nyati": "gemini-3-flash-preview",
|
| 24 |
-
"tembo": "qwen-3.5-27b",
|
| 25 |
}
|
| 26 |
MODEL_TO_CODENAME = {v: k for k, v in CODENAMES.items()}
|
| 27 |
CODENAME_DISPLAY = {
|
| 28 |
"chui": "🐆 Chui",
|
| 29 |
"kifaru": "🦏 Kifaru",
|
| 30 |
"nyati": "🦬 Nyati",
|
| 31 |
-
"tembo": "🐘 Tembo",
|
| 32 |
}
|
| 33 |
|
| 34 |
|
|
|
|
| 3 |
Codename mapping (blind, consistent):
|
| 4 |
Kifaru (🦏) → gpt-5.1
|
| 5 |
Nyati (🦬) → gemini-3-flash-preview
|
|
|
|
| 6 |
Chui (🐆) → gemma-3-27b-it
|
| 7 |
"""
|
| 8 |
|
|
|
|
| 20 |
"chui": "gemma-3-27b-it",
|
| 21 |
"kifaru": "gpt-5.1",
|
| 22 |
"nyati": "gemini-3-flash-preview",
|
|
|
|
| 23 |
}
|
| 24 |
MODEL_TO_CODENAME = {v: k for k, v in CODENAMES.items()}
|
| 25 |
CODENAME_DISPLAY = {
|
| 26 |
"chui": "🐆 Chui",
|
| 27 |
"kifaru": "🦏 Kifaru",
|
| 28 |
"nyati": "🦬 Nyati",
|
|
|
|
| 29 |
}
|
| 30 |
|
| 31 |
|
afridialeval/templates/annotate.html
CHANGED
|
@@ -64,7 +64,7 @@
|
|
| 64 |
<!-- Step 1: Read all variants -->
|
| 65 |
<div class="step" id="step-1">
|
| 66 |
<h2>Step 1: Read all versions</h2>
|
| 67 |
-
<p class="instruction">Each version (🐆 Chui, 🦏 Kifaru, 🦬 Nyati
|
| 68 |
|
| 69 |
<div class="dialogue-trio">
|
| 70 |
{% for v in variants %}
|
|
|
|
| 64 |
<!-- Step 1: Read all variants -->
|
| 65 |
<div class="step" id="step-1">
|
| 66 |
<h2>Step 1: Read all versions</h2>
|
| 67 |
+
<p class="instruction">Each version (🐆 Chui, 🦏 Kifaru, 🦬 Nyati) was generated by a different system. Read them carefully.</p>
|
| 68 |
|
| 69 |
<div class="dialogue-trio">
|
| 70 |
{% for v in variants %}
|
pyproject.toml
CHANGED
|
@@ -6,6 +6,8 @@ readme = "README.md"
|
|
| 6 |
requires-python = ">=3.11"
|
| 7 |
dependencies = [
|
| 8 |
"azure-identity>=1.25.3",
|
|
|
|
| 9 |
"openai>=2.31.0",
|
|
|
|
| 10 |
"tqdm>=4.67.3",
|
| 11 |
]
|
|
|
|
| 6 |
requires-python = ">=3.11"
|
| 7 |
dependencies = [
|
| 8 |
"azure-identity>=1.25.3",
|
| 9 |
+
"huggingface-hub>=0.25.0",
|
| 10 |
"openai>=2.31.0",
|
| 11 |
+
"python-dotenv>=1.0.0",
|
| 12 |
"tqdm>=4.67.3",
|
| 13 |
]
|
run_pipeline.sh
CHANGED
|
@@ -48,15 +48,6 @@ case "$MODE" in
|
|
| 48 |
# Yoruba
|
| 49 |
run_one "Yoruba" "Nigeria - Lagos" "$@"
|
| 50 |
run_one "Yoruba" "Benin - Porto-Novo" "$@"
|
| 51 |
-
# Somali
|
| 52 |
-
run_one "Somali" "Somalia - Mogadishu" "$@"
|
| 53 |
-
run_one "Somali" "Kenya - Nairobi" "$@"
|
| 54 |
-
# Tsonga
|
| 55 |
-
run_one "Tsonga" "South Africa - Johannesburg" "$@"
|
| 56 |
-
run_one "Tsonga" "Mozambique - Maputo" "$@"
|
| 57 |
-
# Amharic
|
| 58 |
-
run_one "Amharic" "Ethiopia - Addis Ababa" "$@"
|
| 59 |
-
run_one "Amharic" "Eritrea - Asmara" "$@"
|
| 60 |
;;
|
| 61 |
one)
|
| 62 |
LANGUAGE="$1"
|
|
|
|
| 48 |
# Yoruba
|
| 49 |
run_one "Yoruba" "Nigeria - Lagos" "$@"
|
| 50 |
run_one "Yoruba" "Benin - Porto-Novo" "$@"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
;;
|
| 52 |
one)
|
| 53 |
LANGUAGE="$1"
|
src/generator.py
CHANGED
|
@@ -1,18 +1,23 @@
|
|
| 1 |
import hashlib
|
| 2 |
import json
|
|
|
|
| 3 |
import re
|
| 4 |
import time
|
| 5 |
from datetime import datetime, timezone
|
| 6 |
from pathlib import Path
|
| 7 |
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
from azure.identity import (
|
| 10 |
AzureCliCredential,
|
| 11 |
ChainedTokenCredential,
|
| 12 |
ManagedIdentityCredential,
|
| 13 |
get_bearer_token_provider,
|
| 14 |
)
|
| 15 |
-
from openai import AzureOpenAI
|
| 16 |
from tqdm import tqdm
|
| 17 |
|
| 18 |
from src.config import (
|
|
@@ -72,7 +77,19 @@ class Generator:
|
|
| 72 |
self.client = self._build_client()
|
| 73 |
self.prompt_cache: Dict[str, str] = self._load_prompt_cache()
|
| 74 |
|
| 75 |
-
def _build_client(self) -> AzureOpenAI:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
token_provider = get_bearer_token_provider(
|
| 77 |
ChainedTokenCredential(
|
| 78 |
AzureCliCredential(),
|
|
@@ -150,10 +167,6 @@ class Generator:
|
|
| 150 |
if response_format is not None and self.model_config.is_openai_compatible:
|
| 151 |
kwargs["response_format"] = response_format
|
| 152 |
|
| 153 |
-
# Disable thinking for Qwen 3.5 models to avoid slow reasoning tokens
|
| 154 |
-
if "qwen" in self.model_alias.lower() and "3.5" in self.model_alias:
|
| 155 |
-
kwargs["extra_body"] = {"chat_template_kwargs": {"enable_thinking": False}}
|
| 156 |
-
|
| 157 |
response = self.client.chat.completions.create(**kwargs)
|
| 158 |
content = response.choices[0].message.content or ""
|
| 159 |
|
|
|
|
| 1 |
import hashlib
|
| 2 |
import json
|
| 3 |
+
import os
|
| 4 |
import re
|
| 5 |
import time
|
| 6 |
from datetime import datetime, timezone
|
| 7 |
from pathlib import Path
|
| 8 |
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
|
| 9 |
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
|
| 12 |
+
load_dotenv()
|
| 13 |
+
|
| 14 |
from azure.identity import (
|
| 15 |
AzureCliCredential,
|
| 16 |
ChainedTokenCredential,
|
| 17 |
ManagedIdentityCredential,
|
| 18 |
get_bearer_token_provider,
|
| 19 |
)
|
| 20 |
+
from openai import AzureOpenAI, OpenAI
|
| 21 |
from tqdm import tqdm
|
| 22 |
|
| 23 |
from src.config import (
|
|
|
|
| 77 |
self.client = self._build_client()
|
| 78 |
self.prompt_cache: Dict[str, str] = self._load_prompt_cache()
|
| 79 |
|
| 80 |
+
def _build_client(self) -> Union[AzureOpenAI, OpenAI]:
|
| 81 |
+
if self.model_config.backend == "huggingface":
|
| 82 |
+
hf_token = os.environ.get("HF_TOKEN")
|
| 83 |
+
if not hf_token:
|
| 84 |
+
raise RuntimeError(
|
| 85 |
+
"HF_TOKEN environment variable is required for Hugging Face models. "
|
| 86 |
+
"Get a token at https://huggingface.co/settings/tokens"
|
| 87 |
+
)
|
| 88 |
+
return OpenAI(
|
| 89 |
+
base_url="https://router.huggingface.co/hf-inference/v1",
|
| 90 |
+
api_key=hf_token,
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
token_provider = get_bearer_token_provider(
|
| 94 |
ChainedTokenCredential(
|
| 95 |
AzureCliCredential(),
|
|
|
|
| 167 |
if response_format is not None and self.model_config.is_openai_compatible:
|
| 168 |
kwargs["response_format"] = response_format
|
| 169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
response = self.client.chat.completions.create(**kwargs)
|
| 171 |
content = response.choices[0].message.content or ""
|
| 172 |
|
src/model_registry.py
CHANGED
|
@@ -9,6 +9,7 @@ class ModelConfig:
|
|
| 9 |
api_version: str
|
| 10 |
is_openai_compatible: bool = True
|
| 11 |
endpoint_override: str = "" # If set, use this TRAPI endpoint instead of default
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
MODELS: Dict[str, ModelConfig] = {
|
|
@@ -19,81 +20,18 @@ MODELS: Dict[str, ModelConfig] = {
|
|
| 19 |
is_openai_compatible=True,
|
| 20 |
endpoint_override="https://trapi.research.microsoft.com/msrc/shared",
|
| 21 |
),
|
| 22 |
-
"gpt-5.4": ModelConfig(
|
| 23 |
-
alias="gpt-5.4",
|
| 24 |
-
deployment_name="gpt-5.4_2026-03-05",
|
| 25 |
-
api_version="2025-04-01-preview",
|
| 26 |
-
is_openai_compatible=True,
|
| 27 |
-
),
|
| 28 |
"gpt-5.4-mini": ModelConfig(
|
| 29 |
alias="gpt-5.4-mini",
|
| 30 |
deployment_name="gpt-5.4-mini_2026-03-17",
|
| 31 |
api_version="2025-04-01-preview",
|
| 32 |
is_openai_compatible=True,
|
| 33 |
),
|
| 34 |
-
"gpt-4o": ModelConfig(
|
| 35 |
-
alias="gpt-4o",
|
| 36 |
-
deployment_name="gpt-4o_2024-11-20",
|
| 37 |
-
api_version="2025-04-01-preview",
|
| 38 |
-
is_openai_compatible=True,
|
| 39 |
-
),
|
| 40 |
-
"llama-3.3-70B": ModelConfig(
|
| 41 |
-
alias="llama-3.3-70B",
|
| 42 |
-
deployment_name="Llama-3.3-70B-Instruct_5",
|
| 43 |
-
api_version="2025-04-01-preview",
|
| 44 |
-
is_openai_compatible=False,
|
| 45 |
-
),
|
| 46 |
"gemma-3-27b-it": ModelConfig(
|
| 47 |
alias="gemma-3-27b-it",
|
| 48 |
-
deployment_name="
|
| 49 |
-
api_version="
|
| 50 |
-
is_openai_compatible=False,
|
| 51 |
-
),
|
| 52 |
-
"gemma-3-4b-it": ModelConfig(
|
| 53 |
-
alias="gemma-3-4b-it",
|
| 54 |
-
deployment_name="unsloth/gemma-3-4b-it",
|
| 55 |
-
api_version="2025-04-01-preview",
|
| 56 |
-
is_openai_compatible=False,
|
| 57 |
-
),
|
| 58 |
-
"qwen-3.5-122b": ModelConfig(
|
| 59 |
-
alias="qwen-3.5-122b",
|
| 60 |
-
deployment_name="Qwen/Qwen3.5-122B-A10B",
|
| 61 |
-
api_version="2025-04-01-preview",
|
| 62 |
-
is_openai_compatible=False,
|
| 63 |
-
endpoint_override="https://trapi.research.microsoft.com/msrc/shared",
|
| 64 |
-
),
|
| 65 |
-
"qwen-3.5-27b": ModelConfig(
|
| 66 |
-
alias="qwen-3.5-27b",
|
| 67 |
-
deployment_name="Qwen/Qwen3.5-27B",
|
| 68 |
-
api_version="2025-04-01-preview",
|
| 69 |
-
is_openai_compatible=False,
|
| 70 |
-
endpoint_override="https://trapi.research.microsoft.com/msrc/shared",
|
| 71 |
-
),
|
| 72 |
-
"qwen-3.5-397b": ModelConfig(
|
| 73 |
-
alias="qwen-3.5-397b",
|
| 74 |
-
deployment_name="Qwen/Qwen3.5-397B-A17B-GPTQ-Int4",
|
| 75 |
-
api_version="2025-04-01-preview",
|
| 76 |
-
is_openai_compatible=False,
|
| 77 |
-
endpoint_override="https://trapi.research.microsoft.com/msrc/shared",
|
| 78 |
-
),
|
| 79 |
-
"qwen-3.5-9b": ModelConfig(
|
| 80 |
-
alias="qwen-3.5-9b",
|
| 81 |
-
deployment_name="Qwen/Qwen3.5-9B",
|
| 82 |
-
api_version="2025-04-01-preview",
|
| 83 |
is_openai_compatible=False,
|
| 84 |
-
|
| 85 |
-
),
|
| 86 |
-
"Phi-4-reasoning": ModelConfig(
|
| 87 |
-
alias="Phi-4-reasoning",
|
| 88 |
-
deployment_name="gcr-phi-4-reasoning",
|
| 89 |
-
api_version="2025-04-01-preview",
|
| 90 |
-
is_openai_compatible=True,
|
| 91 |
-
),
|
| 92 |
-
"Phi-4-reasoning-mini": ModelConfig(
|
| 93 |
-
alias="Phi-4-reasoning-mini",
|
| 94 |
-
deployment_name="gcr-phi-4-mini-reasoning",
|
| 95 |
-
api_version="2025-04-01-preview",
|
| 96 |
-
is_openai_compatible=True,
|
| 97 |
),
|
| 98 |
"gemini-3-flash-preview": ModelConfig(
|
| 99 |
alias="gemini-3-flash-preview",
|
|
@@ -104,7 +42,7 @@ MODELS: Dict[str, ModelConfig] = {
|
|
| 104 |
}
|
| 105 |
|
| 106 |
|
| 107 |
-
DEFAULT_MODEL_ALIAS = "gpt-
|
| 108 |
|
| 109 |
|
| 110 |
def get_model_config(alias: str) -> ModelConfig:
|
|
|
|
| 9 |
api_version: str
|
| 10 |
is_openai_compatible: bool = True
|
| 11 |
endpoint_override: str = "" # If set, use this TRAPI endpoint instead of default
|
| 12 |
+
backend: str = "trapi" # "trapi" or "huggingface"
|
| 13 |
|
| 14 |
|
| 15 |
MODELS: Dict[str, ModelConfig] = {
|
|
|
|
| 20 |
is_openai_compatible=True,
|
| 21 |
endpoint_override="https://trapi.research.microsoft.com/msrc/shared",
|
| 22 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
"gpt-5.4-mini": ModelConfig(
|
| 24 |
alias="gpt-5.4-mini",
|
| 25 |
deployment_name="gpt-5.4-mini_2026-03-17",
|
| 26 |
api_version="2025-04-01-preview",
|
| 27 |
is_openai_compatible=True,
|
| 28 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
"gemma-3-27b-it": ModelConfig(
|
| 30 |
alias="gemma-3-27b-it",
|
| 31 |
+
deployment_name="google/gemma-3-27b-it",
|
| 32 |
+
api_version="",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
is_openai_compatible=False,
|
| 34 |
+
backend="huggingface",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
),
|
| 36 |
"gemini-3-flash-preview": ModelConfig(
|
| 37 |
alias="gemini-3-flash-preview",
|
|
|
|
| 42 |
}
|
| 43 |
|
| 44 |
|
| 45 |
+
DEFAULT_MODEL_ALIAS = "gpt-5.4-mini"
|
| 46 |
|
| 47 |
|
| 48 |
def get_model_config(alias: str) -> ModelConfig:
|
src/orchestrator.py
CHANGED
|
@@ -20,7 +20,6 @@ DEFAULT_COMPARE_MODELS = [
|
|
| 20 |
"gpt-5.1",
|
| 21 |
"gemma-3-27b-it",
|
| 22 |
"gemini-3-flash-preview",
|
| 23 |
-
"qwen-3.5-27b",
|
| 24 |
]
|
| 25 |
|
| 26 |
|
|
|
|
| 20 |
"gpt-5.1",
|
| 21 |
"gemma-3-27b-it",
|
| 22 |
"gemini-3-flash-preview",
|
|
|
|
| 23 |
]
|
| 24 |
|
| 25 |
|