Fix conflicts
Browse files- .github/workflows/tests.yml +5 -1
- marker/output.py +1 -1
- scripts/verify_benchmark_scores.py +15 -2
- signatures/version1/cla.json +52 -0
.github/workflows/tests.yml
CHANGED
|
@@ -28,7 +28,11 @@ jobs:
|
|
| 28 |
- name: Run benchmark test
|
| 29 |
run: |
|
| 30 |
poetry run python benchmarks/overall.py benchmark_data/pdfs benchmark_data/references report.json
|
| 31 |
-
poetry run python scripts/verify_benchmark_scores.py report.json
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
|
|
|
|
| 28 |
- name: Run benchmark test
|
| 29 |
run: |
|
| 30 |
poetry run python benchmarks/overall.py benchmark_data/pdfs benchmark_data/references report.json
|
| 31 |
+
poetry run python scripts/verify_benchmark_scores.py report.json --type marker
|
| 32 |
+
- name: Run table benchmark
|
| 33 |
+
run: |
|
| 34 |
+
poetry run python benchmarks/table.py tables.json
|
| 35 |
+
poetry run python scripts/verify_benchmark_scores.py report.json --type table
|
| 36 |
|
| 37 |
|
| 38 |
|
marker/output.py
CHANGED
|
@@ -3,7 +3,7 @@ import json
|
|
| 3 |
|
| 4 |
|
| 5 |
def get_subfolder_path(out_folder, fname):
|
| 6 |
-
subfolder_name = fname.
|
| 7 |
subfolder_path = os.path.join(out_folder, subfolder_name)
|
| 8 |
return subfolder_path
|
| 9 |
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
def get_subfolder_path(out_folder, fname):
|
| 6 |
+
subfolder_name = fname.rsplit('.', 1)[0]
|
| 7 |
subfolder_path = os.path.join(out_folder, subfolder_name)
|
| 8 |
return subfolder_path
|
| 9 |
|
scripts/verify_benchmark_scores.py
CHANGED
|
@@ -9,12 +9,25 @@ def verify_scores(file_path):
|
|
| 9 |
multicolcnn_score = data["marker"]["files"]["multicolcnn.pdf"]["score"]
|
| 10 |
switch_trans_score = data["marker"]["files"]["switch_trans.pdf"]["score"]
|
| 11 |
|
| 12 |
-
if multicolcnn_score <= 0.
|
| 13 |
raise ValueError("One or more scores are below the required threshold of 0.4")
|
| 14 |
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
if __name__ == "__main__":
|
| 17 |
parser = argparse.ArgumentParser(description="Verify benchmark scores")
|
| 18 |
parser.add_argument("file_path", type=str, help="Path to the json file")
|
|
|
|
| 19 |
args = parser.parse_args()
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
multicolcnn_score = data["marker"]["files"]["multicolcnn.pdf"]["score"]
|
| 10 |
switch_trans_score = data["marker"]["files"]["switch_trans.pdf"]["score"]
|
| 11 |
|
| 12 |
+
if multicolcnn_score <= 0.37 or switch_trans_score <= 0.4:
|
| 13 |
raise ValueError("One or more scores are below the required threshold of 0.4")
|
| 14 |
|
| 15 |
|
| 16 |
+
def verify_table_scores(file_path):
|
| 17 |
+
with open(file_path, 'r') as file:
|
| 18 |
+
data = json.load(file)
|
| 19 |
+
|
| 20 |
+
avg = sum([r["score"] for r in data]) / len(data)
|
| 21 |
+
if avg < 0.7:
|
| 22 |
+
raise ValueError("Average score is below the required threshold of 0.7")
|
| 23 |
+
|
| 24 |
+
|
| 25 |
if __name__ == "__main__":
|
| 26 |
parser = argparse.ArgumentParser(description="Verify benchmark scores")
|
| 27 |
parser.add_argument("file_path", type=str, help="Path to the json file")
|
| 28 |
+
parser.add_argument("--type", type=str, help="Type of file to verify", default="marker")
|
| 29 |
args = parser.parse_args()
|
| 30 |
+
if args.type == "marker":
|
| 31 |
+
verify_scores(args.file_path)
|
| 32 |
+
elif args.type == "table":
|
| 33 |
+
verify_table_scores(args.file_path)
|
signatures/version1/cla.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"signedContributors": [
|
| 3 |
+
{
|
| 4 |
+
"name": "korakot",
|
| 5 |
+
"id": 3155646,
|
| 6 |
+
"comment_id": 2143359366,
|
| 7 |
+
"created_at": "2024-06-01T08:25:52Z",
|
| 8 |
+
"repoId": 712111618,
|
| 9 |
+
"pullRequestNo": 161
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"name": "tosaddler",
|
| 13 |
+
"id": 13705399,
|
| 14 |
+
"comment_id": 2144014410,
|
| 15 |
+
"created_at": "2024-06-02T20:40:52Z",
|
| 16 |
+
"repoId": 712111618,
|
| 17 |
+
"pullRequestNo": 165
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"name": "q2333gh",
|
| 21 |
+
"id": 32679742,
|
| 22 |
+
"comment_id": 2156122900,
|
| 23 |
+
"created_at": "2024-06-08T18:01:39Z",
|
| 24 |
+
"repoId": 712111618,
|
| 25 |
+
"pullRequestNo": 176
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"name": "q2333gh",
|
| 29 |
+
"id": 32679742,
|
| 30 |
+
"comment_id": 2156614334,
|
| 31 |
+
"created_at": "2024-06-09T13:48:49Z",
|
| 32 |
+
"repoId": 712111618,
|
| 33 |
+
"pullRequestNo": 176
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"name": "aniketinamdar",
|
| 37 |
+
"id": 79044809,
|
| 38 |
+
"comment_id": 2157453610,
|
| 39 |
+
"created_at": "2024-06-10T06:43:39Z",
|
| 40 |
+
"repoId": 712111618,
|
| 41 |
+
"pullRequestNo": 179
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "Zxilly",
|
| 45 |
+
"id": 31370133,
|
| 46 |
+
"comment_id": 2295163597,
|
| 47 |
+
"created_at": "2024-08-18T07:44:04Z",
|
| 48 |
+
"repoId": 712111618,
|
| 49 |
+
"pullRequestNo": 257
|
| 50 |
+
}
|
| 51 |
+
]
|
| 52 |
+
}
|