Login to the Hub first
Browse files- background_inference.py +7 -7
- leaderboard_info.md +2 -2
- requirements.txt +1 -0
background_inference.py
CHANGED
|
@@ -5,6 +5,10 @@ import datasets
|
|
| 5 |
import eval_utils
|
| 6 |
from constants import DIALECTS_WITH_LABELS
|
| 7 |
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
model_name = sys.argv[1]
|
| 10 |
commit_id = sys.argv[2]
|
|
@@ -19,16 +23,12 @@ utils.update_model_queue(
|
|
| 19 |
)
|
| 20 |
|
| 21 |
try:
|
| 22 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
| 23 |
-
model_name, revision=commit_id, access_token=os.environ["HF_TOKEN"]
|
| 24 |
-
)
|
| 25 |
if inference_function == "prompt_chat_LLM":
|
| 26 |
-
model = AutoModel.from_pretrained(
|
| 27 |
-
model_name, revision=commit_id, access_token=os.environ["HF_TOKEN"]
|
| 28 |
-
)
|
| 29 |
else:
|
| 30 |
model = AutoModelForSequenceClassification.from_pretrained(
|
| 31 |
-
model_name, revision=commit_id
|
| 32 |
)
|
| 33 |
|
| 34 |
# Load the dataset
|
|
|
|
| 5 |
import eval_utils
|
| 6 |
from constants import DIALECTS_WITH_LABELS
|
| 7 |
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
|
| 8 |
+
from huggingface_hub import login
|
| 9 |
+
|
| 10 |
+
access_token = os.environ["HF_TOKEN"]
|
| 11 |
+
login(token=access_token)
|
| 12 |
|
| 13 |
model_name = sys.argv[1]
|
| 14 |
commit_id = sys.argv[2]
|
|
|
|
| 23 |
)
|
| 24 |
|
| 25 |
try:
|
| 26 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, revision=commit_id)
|
|
|
|
|
|
|
| 27 |
if inference_function == "prompt_chat_LLM":
|
| 28 |
+
model = AutoModel.from_pretrained(model_name, revision=commit_id)
|
|
|
|
|
|
|
| 29 |
else:
|
| 30 |
model = AutoModelForSequenceClassification.from_pretrained(
|
| 31 |
+
model_name, revision=commit_id
|
| 32 |
)
|
| 33 |
|
| 34 |
# Load the dataset
|
leaderboard_info.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
## Test Set Details
|
| 2 |
-
The test set used for evaluation is composed of 1000 sentences geolocated to the 14 most-populated Arab countries (excluding Somalia from which data was scarce). Each sample is annotated by native speakers recruited from
|
| 3 |
|
| 4 |
## Evaluation Metrics
|
| 5 |
-
We compute the precision, recall, and F1 scores for each of the
|
| 6 |
|
| 7 |
## Data Access
|
| 8 |
If you need to access the single-label training sets, and the multi-label development set, please fill the following form: https://forms.gle/t3QTC6ZqyDJBzAau8
|
|
|
|
| 1 |
## Test Set Details
|
| 2 |
+
The test set used for evaluation is composed of 1000 sentences geolocated to the 14 most-populated Arab countries (excluding Somalia from which data was scarce). Each sample is annotated by native speakers recruited from 11 different Arab countries, namely: Algeria, Egypt, Iraq, Jordan, Morocco, Palestine, Saudi Arabia, Sudan, Syria, Tunisia, Yemen.
|
| 3 |
|
| 4 |
## Evaluation Metrics
|
| 5 |
+
We compute the precision, recall, and F1 scores for each of the 11 countries (treating each label as a binary classification problem).
|
| 6 |
|
| 7 |
## Data Access
|
| 8 |
If you need to access the single-label training sets, and the multi-label development set, please fill the following form: https://forms.gle/t3QTC6ZqyDJBzAau8
|
requirements.txt
CHANGED
|
@@ -5,3 +5,4 @@ pandas
|
|
| 5 |
numpy
|
| 6 |
scikit-learn
|
| 7 |
tabulate
|
|
|
|
|
|
| 5 |
numpy
|
| 6 |
scikit-learn
|
| 7 |
tabulate
|
| 8 |
+
huggingface_hub
|