Vik Paruchuri commited on
Commit
d6bdda8
·
2 Parent(s): fcfc3c0 7bff06d

Fix conflicts

Browse files
.github/workflows/tests.yml CHANGED
@@ -28,7 +28,11 @@ jobs:
28
  - name: Run benchmark test
29
  run: |
30
  poetry run python benchmarks/overall.py benchmark_data/pdfs benchmark_data/references report.json
31
- poetry run python scripts/verify_benchmark_scores.py report.json
 
 
 
 
32
 
33
 
34
 
 
28
  - name: Run benchmark test
29
  run: |
30
  poetry run python benchmarks/overall.py benchmark_data/pdfs benchmark_data/references report.json
31
+ poetry run python scripts/verify_benchmark_scores.py report.json --type marker
32
+ - name: Run table benchmark
33
+ run: |
34
+ poetry run python benchmarks/table.py tables.json
35
+ poetry run python scripts/verify_benchmark_scores.py report.json --type table
36
 
37
 
38
 
marker/output.py CHANGED
@@ -3,7 +3,7 @@ import json
3
 
4
 
5
  def get_subfolder_path(out_folder, fname):
6
- subfolder_name = fname.split(".")[0]
7
  subfolder_path = os.path.join(out_folder, subfolder_name)
8
  return subfolder_path
9
 
 
3
 
4
 
5
  def get_subfolder_path(out_folder, fname):
6
+ subfolder_name = fname.rsplit('.', 1)[0]
7
  subfolder_path = os.path.join(out_folder, subfolder_name)
8
  return subfolder_path
9
 
scripts/verify_benchmark_scores.py CHANGED
@@ -9,12 +9,25 @@ def verify_scores(file_path):
9
  multicolcnn_score = data["marker"]["files"]["multicolcnn.pdf"]["score"]
10
  switch_trans_score = data["marker"]["files"]["switch_trans.pdf"]["score"]
11
 
12
- if multicolcnn_score <= 0.39 or switch_trans_score <= 0.4:
13
  raise ValueError("One or more scores are below the required threshold of 0.4")
14
 
15
 
 
 
 
 
 
 
 
 
 
16
  if __name__ == "__main__":
17
  parser = argparse.ArgumentParser(description="Verify benchmark scores")
18
  parser.add_argument("file_path", type=str, help="Path to the json file")
 
19
  args = parser.parse_args()
20
- verify_scores(args.file_path)
 
 
 
 
9
  multicolcnn_score = data["marker"]["files"]["multicolcnn.pdf"]["score"]
10
  switch_trans_score = data["marker"]["files"]["switch_trans.pdf"]["score"]
11
 
12
+ if multicolcnn_score <= 0.37 or switch_trans_score <= 0.4:
13
  raise ValueError("One or more scores are below the required threshold of 0.4")
14
 
15
 
16
+ def verify_table_scores(file_path):
17
+ with open(file_path, 'r') as file:
18
+ data = json.load(file)
19
+
20
+ avg = sum([r["score"] for r in data]) / len(data)
21
+ if avg < 0.7:
22
+ raise ValueError("Average score is below the required threshold of 0.7")
23
+
24
+
25
  if __name__ == "__main__":
26
  parser = argparse.ArgumentParser(description="Verify benchmark scores")
27
  parser.add_argument("file_path", type=str, help="Path to the json file")
28
+ parser.add_argument("--type", type=str, help="Type of file to verify", default="marker")
29
  args = parser.parse_args()
30
+ if args.type == "marker":
31
+ verify_scores(args.file_path)
32
+ elif args.type == "table":
33
+ verify_table_scores(args.file_path)
signatures/version1/cla.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "signedContributors": [
3
+ {
4
+ "name": "korakot",
5
+ "id": 3155646,
6
+ "comment_id": 2143359366,
7
+ "created_at": "2024-06-01T08:25:52Z",
8
+ "repoId": 712111618,
9
+ "pullRequestNo": 161
10
+ },
11
+ {
12
+ "name": "tosaddler",
13
+ "id": 13705399,
14
+ "comment_id": 2144014410,
15
+ "created_at": "2024-06-02T20:40:52Z",
16
+ "repoId": 712111618,
17
+ "pullRequestNo": 165
18
+ },
19
+ {
20
+ "name": "q2333gh",
21
+ "id": 32679742,
22
+ "comment_id": 2156122900,
23
+ "created_at": "2024-06-08T18:01:39Z",
24
+ "repoId": 712111618,
25
+ "pullRequestNo": 176
26
+ },
27
+ {
28
+ "name": "q2333gh",
29
+ "id": 32679742,
30
+ "comment_id": 2156614334,
31
+ "created_at": "2024-06-09T13:48:49Z",
32
+ "repoId": 712111618,
33
+ "pullRequestNo": 176
34
+ },
35
+ {
36
+ "name": "aniketinamdar",
37
+ "id": 79044809,
38
+ "comment_id": 2157453610,
39
+ "created_at": "2024-06-10T06:43:39Z",
40
+ "repoId": 712111618,
41
+ "pullRequestNo": 179
42
+ },
43
+ {
44
+ "name": "Zxilly",
45
+ "id": 31370133,
46
+ "comment_id": 2295163597,
47
+ "created_at": "2024-08-18T07:44:04Z",
48
+ "repoId": 712111618,
49
+ "pullRequestNo": 257
50
+ }
51
+ ]
52
+ }