Spaces:

pulsia
/

fr-bench-pdf2md

Runtime error

pulsia commited on Jan 23

Commit

542e9df

verified ·

1 Parent(s): b16de72

first_test (#1)

- feat(leaderboard): first test with results (e9a9cf04d4702e7669dc6528ca7a17e767914fd1)

Files changed (3) hide show

src/about.py CHANGED Viewed

@@ -12,8 +12,12 @@ class Task:
 # ---------------------------------------------------
 class Tasks(Enum):
     # task_key in the json file, metric_key in the json file, name to display in the leaderboard
-    task0 = Task("anli_r1", "acc", "ANLI")
-    task1 = Task("logiqa", "acc_norm", "LogiQA")
 NUM_FEWSHOT = 0 # Change with your few shot
 # ---------------------------------------------------
@@ -21,11 +25,20 @@ NUM_FEWSHOT = 0 # Change with your few shot
 # Your leaderboard name
-TITLE = """<h1 align="center" id="space-title">Demo leaderboard</h1>"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
-Intro text
 """
 # Which evaluations are you running? how can people reproduce what you have?

 # ---------------------------------------------------
 class Tasks(Enum):
     # task_key in the json file, metric_key in the json file, name to display in the leaderboard
+    task0 = Task("forms", "acc", "Forms")
+    task1 = Task("graphics", "acc", "Charts")
+    task2 = Task("handwritten", "acc", "Handwritten Texts")
+    task3 = Task("long_table", "acc", "Tables")
+    task4 = Task("tiny_texts", "acc", "Tiny Texts")
+    task5 = Task("multi_column", "acc", "Multiple Column Layout Texts")
 NUM_FEWSHOT = 0 # Change with your few shot
 # ---------------------------------------------------
 # Your leaderboard name
+TITLE = """<h1 align="center" id="space-title">FR Benchmark for PDF to MD parsing</h1>"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
+This leaderboard is about evaluating different VLMs on a specifically crafted document
+parsing benchmark in French language.
+Results are evaluated on 6 different tasks to provide an extensive view on all the tasks
+expected in a business context. The tasks are the following:
+* Forms
+* Charts
+* Handwritten Texts
+* Tables
+* Tiny Texts
+* Multiple Columns Layout Texts
 """
 # Which evaluations are you running? how can people reproduce what you have?

src/envs.py CHANGED Viewed

@@ -6,12 +6,12 @@ from huggingface_hub import HfApi
 # ----------------------------------
 TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
-OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------
-REPO_ID = f"{OWNER}/leaderboard"
-QUEUE_REPO = f"{OWNER}/requests"
-RESULTS_REPO = f"{OWNER}/results"
 # If you setup a cache later, just change HF_HOME
 CACHE_PATH=os.getenv("HF_HOME", ".")

 # ----------------------------------
 TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
+OWNER = "pulsia" # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------
+REPO_ID = f"{OWNER}/fr-bench-pdf2md"
+# QUEUE_REPO = f"{OWNER}/requests"
+RESULTS_REPO = f"{OWNER}/fr-bench-pdf2md-results"
 # If you setup a cache later, just change HF_HOME
 CACHE_PATH=os.getenv("HF_HOME", ".")

src/leaderboard/read_evals.py CHANGED Viewed

@@ -57,7 +57,7 @@ class EvalResult:
             result_key = f"{org}_{model}_{precision.value.name}"
         full_model = "/".join(org_and_model)
-        still_on_hub, _, model_config = is_model_on_hub(
             full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
         )
         architecture = "?"
@@ -176,7 +176,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
     for model_result_filepath in model_result_filepaths:
         # Creation of result
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
-        eval_result.update_with_request_file(requests_path)
         # Store results of same eval together
         eval_name = eval_result.eval_name

             result_key = f"{org}_{model}_{precision.value.name}"
         full_model = "/".join(org_and_model)
+        still_on_hub, model_config = is_model_on_hub(
             full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
         )
         architecture = "?"
     for model_result_filepath in model_result_filepaths:
         # Creation of result
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
+        # eval_result.update_with_request_file(requests_path)
         # Store results of same eval together
         eval_name = eval_result.eval_name