Fahad-S commited on
Commit
792a6ee
·
verified ·
1 Parent(s): 628ee35

Upload eval_ss.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. eval_ss.py +47 -0
eval_ss.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ result_model = '/proj/cvl/users/x_fahkh2/UI-R1/ckpt/Qwen2.5-VL-3B-Instruct-Trained'
2
+
3
+ tasks = ['mobile', 'desktop', 'web']
4
+ #tasks = ['desktop']
5
+ types = ['icon', 'text']
6
+
7
+ import os
8
+ import json
9
+ import re
10
+
11
+ result = {}
12
+ total = {}
13
+
14
+ for task in tasks:
15
+ ref_json = f'/proj/cvl/users/x_fahkh2/UI-R1/data/ScreenSpot/annotations/screenspot_{task}.json'
16
+ data = json.load(open(ref_json, 'r'))
17
+
18
+ bbox_type = {}
19
+ for d in data:
20
+ bbox = str(d['bbox']).strip('[]')
21
+ bbox_type[bbox] = d['data_type']
22
+
23
+ result_jsonl = os.path.join(result_model, f'infer/prediction_results_screenspot_{task}.jsonl')
24
+ with open(result_jsonl, 'r') as f:
25
+ lines = f.readlines()
26
+ for line in lines:
27
+ gt_bbox_match = re.search(r'"gt_bbox": \[(.*?)\]', line)
28
+ gt_bbox = gt_bbox_match.group(1)
29
+ # print(gt_bbox)
30
+
31
+ type = bbox_type[gt_bbox]
32
+ if "true" in line or "True" in line:
33
+ if f'{task}_{type}' not in result:
34
+ result[f'{task}_{type}'] = 1
35
+ else:
36
+ result[f'{task}_{type}'] = result[f'{task}_{type}'] + 1
37
+
38
+ if f'{task}_{type}' not in total:
39
+ total[f'{task}_{type}'] = 1
40
+ else:
41
+ total[f'{task}_{type}'] = total[f'{task}_{type}'] + 1
42
+
43
+ #print(result)
44
+ #print(total)
45
+ for k in result.keys():
46
+ print(k,result[k]/total[k])
47
+ print(sum(result.values()) / sum(total.values()))