Fahad-S
/

UI-R1-ShowUI

Fahad-S commited on Aug 9, 2025

Commit

792a6ee

verified ·

1 Parent(s): 628ee35

Upload eval_ss.py with huggingface_hub

Files changed (1) hide show

eval_ss.py ADDED Viewed

+result_model = '/proj/cvl/users/x_fahkh2/UI-R1/ckpt/Qwen2.5-VL-3B-Instruct-Trained'
+tasks = ['mobile', 'desktop', 'web']
+#tasks = ['desktop']
+types = ['icon', 'text']
+import os
+import json
+import re
+result = {}
+total = {}
+for task in tasks:
+    ref_json = f'/proj/cvl/users/x_fahkh2/UI-R1/data/ScreenSpot/annotations/screenspot_{task}.json'
+    data = json.load(open(ref_json, 'r'))
+    bbox_type = {}
+    for d in data:
+        bbox = str(d['bbox']).strip('[]')
+        bbox_type[bbox] = d['data_type']
+    result_jsonl = os.path.join(result_model, f'infer/prediction_results_screenspot_{task}.jsonl')
+    with open(result_jsonl, 'r') as f:
+        lines = f.readlines()
+        for line in lines:
+            gt_bbox_match = re.search(r'"gt_bbox": \[(.*?)\]', line)
+            gt_bbox = gt_bbox_match.group(1)
+#             print(gt_bbox)
+            type = bbox_type[gt_bbox]
+            if "true" in line or "True" in line:
+                if f'{task}_{type}' not in result:
+                    result[f'{task}_{type}'] = 1
+                else:
+                    result[f'{task}_{type}'] = result[f'{task}_{type}'] + 1
+            if f'{task}_{type}' not in total:
+                total[f'{task}_{type}'] = 1
+            else:
+                total[f'{task}_{type}'] = total[f'{task}_{type}'] + 1
+#print(result)
+#print(total)
+for k in result.keys():
+    print(k,result[k]/total[k])
+print(sum(result.values()) / sum(total.values()))