Spaces:
Build error
Build error
Commit ·
bffa030
1
Parent(s): 7709561
Update language summary and add dataset count for
Browse files
app.py
CHANGED
|
@@ -41,10 +41,12 @@ for lang in tqdm(freqs.keys()):
|
|
| 41 |
list_models(filter=ModelFilter(language=lang, task="token-classification"))
|
| 42 |
)
|
| 43 |
models_for_lang_any_task = list(list_models(filter=ModelFilter(language=lang)))
|
|
|
|
| 44 |
if not models_for_lang_with_task_token_classification:
|
| 45 |
data = {
|
| 46 |
"language": lang,
|
| 47 |
-
"
|
|
|
|
| 48 |
"token_classification_models": len(
|
| 49 |
models_for_lang_with_task_token_classification
|
| 50 |
),
|
|
@@ -58,8 +60,13 @@ len(no_model)
|
|
| 58 |
df = pd.DataFrame(no_model)
|
| 59 |
|
| 60 |
df = df.sort_values(
|
| 61 |
-
by=[
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
)
|
| 64 |
|
| 65 |
|
|
@@ -68,7 +75,7 @@ def report_summary():
|
|
| 68 |
for row in df.head(20).itertuples():
|
| 69 |
language = row[1]
|
| 70 |
summary += f"# Summary for language: {language}\n"
|
| 71 |
-
summary += f"This language has {(row[2])} token classification datasets,
|
| 72 |
summary += f"- [Datasets for token classification task for {language}](https://huggingface.co/datasets?task_categories=task_categories:token-classification&language=language:{language})\n"
|
| 73 |
summary += f"- [Token classification models for {language}](https://huggingface.co/models?task_categories=task_categories:token-classification&language=language:{language})\n"
|
| 74 |
summary += f"- [All models for {language}](https://huggingface.co/models?language={language}&sort=trending)\n"
|
|
|
|
| 41 |
list_models(filter=ModelFilter(language=lang, task="token-classification"))
|
| 42 |
)
|
| 43 |
models_for_lang_any_task = list(list_models(filter=ModelFilter(language=lang)))
|
| 44 |
+
datasets_for_lang_any_task = list(list_datasets(filter=f"language:{lang}"))
|
| 45 |
if not models_for_lang_with_task_token_classification:
|
| 46 |
data = {
|
| 47 |
"language": lang,
|
| 48 |
+
"datasets_for_token_classification": freqs[lang],
|
| 49 |
+
"datasets": len(datasets_for_lang_any_task),
|
| 50 |
"token_classification_models": len(
|
| 51 |
models_for_lang_with_task_token_classification
|
| 52 |
),
|
|
|
|
| 60 |
df = pd.DataFrame(no_model)
|
| 61 |
|
| 62 |
df = df.sort_values(
|
| 63 |
+
by=[
|
| 64 |
+
"datasets_for_token_classification",
|
| 65 |
+
"datasets",
|
| 66 |
+
"token_classification_models",
|
| 67 |
+
"all_models",
|
| 68 |
+
],
|
| 69 |
+
ascending=[False, False, True, True],
|
| 70 |
)
|
| 71 |
|
| 72 |
|
|
|
|
| 75 |
for row in df.head(20).itertuples():
|
| 76 |
language = row[1]
|
| 77 |
summary += f"# Summary for language: {language}\n"
|
| 78 |
+
summary += f"This language has {(row[2])} token classification datasets, {row[3]} datasets overall, {row[4]} token classification models, and {row[5]} models overall.\n"
|
| 79 |
summary += f"- [Datasets for token classification task for {language}](https://huggingface.co/datasets?task_categories=task_categories:token-classification&language=language:{language})\n"
|
| 80 |
summary += f"- [Token classification models for {language}](https://huggingface.co/models?task_categories=task_categories:token-classification&language=language:{language})\n"
|
| 81 |
summary += f"- [All models for {language}](https://huggingface.co/models?language={language}&sort=trending)\n"
|