Spaces:
Runtime error
Runtime error
first_test (#1)
Browse files- feat(leaderboard): first test with results (e9a9cf04d4702e7669dc6528ca7a17e767914fd1)
- src/about.py +17 -4
- src/envs.py +4 -4
- src/leaderboard/read_evals.py +2 -2
src/about.py
CHANGED
|
@@ -12,8 +12,12 @@ class Task:
|
|
| 12 |
# ---------------------------------------------------
|
| 13 |
class Tasks(Enum):
|
| 14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 15 |
-
task0 = Task("
|
| 16 |
-
task1 = Task("
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
NUM_FEWSHOT = 0 # Change with your few shot
|
| 19 |
# ---------------------------------------------------
|
|
@@ -21,11 +25,20 @@ NUM_FEWSHOT = 0 # Change with your few shot
|
|
| 21 |
|
| 22 |
|
| 23 |
# Your leaderboard name
|
| 24 |
-
TITLE = """<h1 align="center" id="space-title">
|
| 25 |
|
| 26 |
# What does your leaderboard evaluate?
|
| 27 |
INTRODUCTION_TEXT = """
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
"""
|
| 30 |
|
| 31 |
# Which evaluations are you running? how can people reproduce what you have?
|
|
|
|
| 12 |
# ---------------------------------------------------
|
| 13 |
class Tasks(Enum):
|
| 14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 15 |
+
task0 = Task("forms", "acc", "Forms")
|
| 16 |
+
task1 = Task("graphics", "acc", "Charts")
|
| 17 |
+
task2 = Task("handwritten", "acc", "Handwritten Texts")
|
| 18 |
+
task3 = Task("long_table", "acc", "Tables")
|
| 19 |
+
task4 = Task("tiny_texts", "acc", "Tiny Texts")
|
| 20 |
+
task5 = Task("multi_column", "acc", "Multiple Column Layout Texts")
|
| 21 |
|
| 22 |
NUM_FEWSHOT = 0 # Change with your few shot
|
| 23 |
# ---------------------------------------------------
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
# Your leaderboard name
|
| 28 |
+
TITLE = """<h1 align="center" id="space-title">FR Benchmark for PDF to MD parsing</h1>"""
|
| 29 |
|
| 30 |
# What does your leaderboard evaluate?
|
| 31 |
INTRODUCTION_TEXT = """
|
| 32 |
+
This leaderboard is about evaluating different VLMs on a specifically crafted document
|
| 33 |
+
parsing benchmark in French language.
|
| 34 |
+
Results are evaluated on 6 different tasks to provide an extensive view on all the tasks
|
| 35 |
+
expected in a business context. The tasks are the following:
|
| 36 |
+
* Forms
|
| 37 |
+
* Charts
|
| 38 |
+
* Handwritten Texts
|
| 39 |
+
* Tables
|
| 40 |
+
* Tiny Texts
|
| 41 |
+
* Multiple Columns Layout Texts
|
| 42 |
"""
|
| 43 |
|
| 44 |
# Which evaluations are you running? how can people reproduce what you have?
|
src/envs.py
CHANGED
|
@@ -6,12 +6,12 @@ from huggingface_hub import HfApi
|
|
| 6 |
# ----------------------------------
|
| 7 |
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
| 8 |
|
| 9 |
-
OWNER = "
|
| 10 |
# ----------------------------------
|
| 11 |
|
| 12 |
-
REPO_ID = f"{OWNER}/
|
| 13 |
-
QUEUE_REPO = f"{OWNER}/requests"
|
| 14 |
-
RESULTS_REPO = f"{OWNER}/results"
|
| 15 |
|
| 16 |
# If you setup a cache later, just change HF_HOME
|
| 17 |
CACHE_PATH=os.getenv("HF_HOME", ".")
|
|
|
|
| 6 |
# ----------------------------------
|
| 7 |
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
| 8 |
|
| 9 |
+
OWNER = "pulsia" # Change to your org - don't forget to create a results and request dataset, with the correct format!
|
| 10 |
# ----------------------------------
|
| 11 |
|
| 12 |
+
REPO_ID = f"{OWNER}/fr-bench-pdf2md"
|
| 13 |
+
# QUEUE_REPO = f"{OWNER}/requests"
|
| 14 |
+
RESULTS_REPO = f"{OWNER}/fr-bench-pdf2md-results"
|
| 15 |
|
| 16 |
# If you setup a cache later, just change HF_HOME
|
| 17 |
CACHE_PATH=os.getenv("HF_HOME", ".")
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -57,7 +57,7 @@ class EvalResult:
|
|
| 57 |
result_key = f"{org}_{model}_{precision.value.name}"
|
| 58 |
full_model = "/".join(org_and_model)
|
| 59 |
|
| 60 |
-
still_on_hub,
|
| 61 |
full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
|
| 62 |
)
|
| 63 |
architecture = "?"
|
|
@@ -176,7 +176,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
| 176 |
for model_result_filepath in model_result_filepaths:
|
| 177 |
# Creation of result
|
| 178 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 179 |
-
eval_result.update_with_request_file(requests_path)
|
| 180 |
|
| 181 |
# Store results of same eval together
|
| 182 |
eval_name = eval_result.eval_name
|
|
|
|
| 57 |
result_key = f"{org}_{model}_{precision.value.name}"
|
| 58 |
full_model = "/".join(org_and_model)
|
| 59 |
|
| 60 |
+
still_on_hub, model_config = is_model_on_hub(
|
| 61 |
full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
|
| 62 |
)
|
| 63 |
architecture = "?"
|
|
|
|
| 176 |
for model_result_filepath in model_result_filepaths:
|
| 177 |
# Creation of result
|
| 178 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 179 |
+
# eval_result.update_with_request_file(requests_path)
|
| 180 |
|
| 181 |
# Store results of same eval together
|
| 182 |
eval_name = eval_result.eval_name
|