SeaWolf-AI commited on
Commit
9bf2313
Β·
verified Β·
1 Parent(s): d7b487b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -303
app.py CHANGED
@@ -1,308 +1,22 @@
1
- """
2
- World Model Bench (WM Bench) β€” HuggingFace Space
3
- Beyond FID: Measuring Intelligence, Not Just Motion
4
- by VIDRAFT / Kim Taebong
5
- """
6
-
7
- import os, json, re, time
8
- from datetime import datetime
9
- from typing import Dict, List, Tuple, Optional
10
- import requests
11
  import gradio as gr
12
-
13
- PILLAR_INFO = {
14
- "P1": {"name":"Perception (인식)", "icon":"πŸ‘", "max":250, "color":"#7B8FD4", "cats":["C01","C02"]},
15
- "P2": {"name":"Cognition (인지)", "icon":"🧠", "color":"#E8593C", "max":450, "cats":["C03","C04","C05","C06","C07"]},
16
- "P3": {"name":"Embodiment (κ΅¬ν˜„)", "icon":"πŸ”₯", "color":"#D4A044", "max":300, "cats":["C08","C09","C10"]},
17
- }
18
- CAT_INFO = {
19
- "C01":("ν™˜κ²½ 인식 정확도","P1","πŸ‘"),
20
- "C02":("개체 인식 및 λΆ„λ₯˜","P1","πŸ‘"),
21
- "C03":("예츑 기반 μΆ”λ‘ ","P2","🧠"),
22
- "C04":("μœ„ν˜‘ μœ ν˜•λ³„ 차별 λ°˜μ‘","P2","🧠"),
23
- "C05":("자율 감정 μ—μŠ€μ»¬λ ˆμ΄μ…˜","P2","🧠"),
24
- "C06":("λ§₯락 κΈ°μ–΅ 및 ν™œμš©","P2","🧠"),
25
- "C07":("μœ„ν˜‘ ν•΄μ œ ν›„ 적응","P2","🧠"),
26
- "C08":("λͺ¨μ…˜ 감정 ν‘œν˜„λ ₯","P3","πŸ”₯"),
27
- "C09":("μ‹€μ‹œκ°„ 인지-행동 μ„±λŠ₯","P3","πŸ”₯"),
28
- "C10":("신체 ꡐ체 ν™•μž₯μ„±","P3","πŸ”₯"),
29
- }
30
- GRADES = [(900,"S","Superhuman","#FFD700"),(750,"A","Advanced","#C0C0C0"),(600,"B","Baseline","#CD7F32"),(400,"C","Capable","#7B8FD4"),(200,"D","Developing","#888888"),(0,"F","Failing","#E8593C")]
31
- BASELINE = {"model":"VIDRAFT PROMETHEUS v1.0","org":"VIDRAFT","date":"2026-03","track":"C βœ“","wm":726,"grade":"B","p1":140,"p2":390,"p3":196,"fps":47.0,"latency":3100,"cats":{"C01":65,"C02":75,"C03":85,"C04":90,"C05":85,"C06":60,"C07":70,"C08":80,"C09":85,"C10":35},"brain":"Kimi K2.5","motion":"FloodDiffusion Tiny","gpu":"L40S 48GB"}
32
-
33
- ACTION_INTENSITY={"sprint":5,"flee":5,"rush":5,"run":4,"dash":4,"bolt":4,"jog":3,"hurry":3,"quick":3,"walk":2,"step":2,"move":2,"stand":1,"stop":1,"freeze":1}
34
- EMOTION_INTENSITY={"desperate":5,"frantic":5,"terrified":5,"panic":5,"terror":4,"fear":4,"horrified":4,"nervous":3,"cautious":3,"wary":3,"alert":2,"tense":2,"careful":2,"calm":1,"normal":1,"relaxed":1}
35
-
36
- def get_action_int(m):
37
- return max((v for k,v in ACTION_INTENSITY.items() if k in m.lower()),default=0)
38
- def get_emotion_int(m):
39
- return max((v for k,v in EMOTION_INTENSITY.items() if k in m.lower()),default=0)
40
-
41
- def parse_predict(line):
42
- line = re.sub(r"PREDICT\s*:","",line,flags=re.IGNORECASE).strip()
43
- res={}
44
- for part in line.split(","):
45
- part=part.strip()
46
- if "=" not in part: continue
47
- k,v=part.split("=",1); k=k.strip().lower()
48
- if k in("forward","front","fwd"): k="fwd"
49
- if k in("backward","back"): k="back"
50
- m2=re.search(r'\(([^)]+)\)',v)
51
- res[k]={"safe":"safe" in v.lower(),"danger":"danger" in v.lower(),"reason":m2.group(1).lower() if m2 else None}
52
- return res
53
-
54
- def calculate_score(cat_scores):
55
- pm={"P1":["C01","C02"],"P2":["C03","C04","C05","C06","C07"],"P3":["C08","C09","C10"]}
56
- pw={"P1":250,"P2":450,"P3":300}
57
- pillar={}
58
- for p,cats in pm.items():
59
- raw=sum(cat_scores.get(c,0) for c in cats); rmax=len(cats)*100
60
- pillar[p]={"raw":raw,"max":rmax,"scaled":round(raw/rmax*pw[p]),"smax":pw[p]}
61
- total=sum(v["scaled"] for v in pillar.values())
62
- grade,glabel,gc="F","Failing","#E8593C"
63
- for thr,g,l,c in GRADES:
64
- if total>=thr: grade,glabel,gc=g,l,c; break
65
- return {"wm_score":total,"grade":grade,"grade_label":glabel,"grade_color":gc,"pillars":pillar}
66
-
67
- DATASET={}
68
- _ds="wm_bench_dataset.json"
69
- if os.path.exists(_ds):
70
- try:
71
- with open(_ds,"r",encoding="utf-8") as f: DATASET=json.load(f)
72
- print(f"βœ… Dataset: {len(DATASET.get('scenarios',[]))} μ‹œλ‚˜λ¦¬μ˜€")
73
- except Exception as e: print(f"⚠️ {e}")
74
- SYSTEM_PROMPT=DATASET.get("system_prompt","")
75
- SCENARIOS=DATASET.get("scenarios",[])
76
-
77
- def call_api(url,key,model,sc,sysp):
78
- h={"Content-Type":"application/json","Authorization":f"Bearer {key}"}
79
- pay={"model":model,"max_tokens":200,"temperature":0.0,"messages":[{"role":"system","content":sysp},{"role":"user","content":f"scene_context: {json.dumps(sc)}"}]}
80
- t0=time.time()
81
- try:
82
- r=requests.post(url,headers=h,json=pay,timeout=30); r.raise_for_status()
83
- return r.json()["choices"][0]["message"]["content"],(time.time()-t0)*1000
84
- except: return None,(time.time()-t0)*1000
85
-
86
- def score_one(scenario,resp):
87
- if not resp: return 0,"μ‘λ‹΅μ—†μŒ"
88
- lines=[l.strip() for l in resp.strip().splitlines() if l.strip()]
89
- pl=next((l for l in lines if l.upper().startswith("PREDICT")),"")
90
- ml=next((l for l in lines if l.upper().startswith("MOTION")),"")
91
- if not pl or not ml: return 5,f"포맷였λ₯˜: {resp[:60]}"
92
- cat=scenario.get("cat","C01"); gt=scenario.get("ground_truth",{})
93
- predict=parse_predict(pl)
94
- motion=re.sub(r"MOTION\s*:","",ml,flags=re.IGNORECASE).strip().lower()
95
- if cat in("C01","C02"):
96
- exp=gt.get("directions",{})
97
- if not exp: return 10,"κΈ°μ€€μ—†μŒ"
98
- correct=sum(1 for d,s in exp.items() if d in predict and((s=="safe" and predict[d]["safe"])or(s=="danger" and predict[d]["danger"])))
99
- return round(correct/max(len(exp),1)*20),f"정확도 {correct}/{len(exp)}"
100
- elif cat=="C03":
101
- safe_d=gt.get("safe_directions",[]); ok_p=all(predict.get(d,{}).get("danger",False) for d in gt.get("danger_directions",[]))
102
- ok_m=any(d in motion for d in safe_d) if safe_d else True
103
- return (10 if ok_p else 0)+(10 if ok_m else 0),f"예츑:{ok_p},λ°©ν–₯:{ok_m}"
104
- elif cat=="C04":
105
- npc=scenario.get("scene_context",{}).get("npc_type",""); i=get_action_int(motion)+get_emotion_int(motion)
106
- s=20 if i>=8 else 15 if i>=6 else 10 if i>=4 else 5
107
- return s,f"강도:{i}({npc})"
108
- elif cat=="C05":
109
- i=get_emotion_int(motion); return (20 if i>=4 else 15 if i>=3 else 10 if i>=2 else 5),f"감정:{i}"
110
- elif cat=="C06":
111
- prev=scenario.get("scene_context",{}).get("recent_decisions",[])
112
- if not prev: return 15,"κΈ°μ–΅μ—†μŒ"
113
- fd=[d.get("direction","") for d in prev if not d.get("success",True)]
114
- avoided=all(f not in motion for f in fd if f)
115
- return 20 if avoided else 5,f"νšŒν”Ό:{avoided}"
116
- elif cat=="C07":
117
- npc=scenario.get("scene_context",{}).get("npc_nearby",True)
118
- if not npc:
119
- ai=get_action_int(motion); lk=any(k in motion for k in["look","scan","cautious","alert","wary"])
120
- return (20 if ai<=2 and lk else 15 if ai<=3 else 5),f"강도:{ai},경계:{lk}"
121
- return 10,"μœ„ν˜‘μ‘΄μž¬"
122
- elif cat=="C08":
123
- ai=get_action_int(motion); ei=get_emotion_int(motion)
124
- d=len(re.findall(r'\b(desperately|frantically|cautiously|slowly|quickly|rapidly)\b',motion))
125
- s=min((8 if ai>=3 else 4 if ai>=1 else 0)+(8 if ei>=3 else 4 if ei>=1 else 0)+(4 if d>=1 else 0),20)
126
- return s,f"행동:{ai},감정:{ei},뢀사:{d}"
127
- elif cat=="C09": return 15,"μ„±λŠ₯μ§€ν‘œλ³„λ„"
128
- elif cat=="C10": return 10,"증빙별도"
129
- return 10,"κΈ°λ³Έ"
130
-
131
- def run_eval(api_url,api_key,model_name,org_name,fps,lat,gpu,track,progress=gr.Progress()):
132
- if not api_url.strip() or not model_name.strip(): return "❌ API URLκ³Ό λͺ¨λΈλͺ… ν•„μˆ˜","",""
133
- if not SCENARIOS: return "❌ 데이터셋 μ—†μŒ","",""
134
- api_key=api_key.strip() or "none"
135
- cat_scores={c:[] for c in CAT_INFO}; cat_det={c:[] for c in CAT_INFO}
136
- lats=[]; errors=0
137
- progress(0,desc="평가 μ‹œμž‘...")
138
- for i,sc in enumerate(SCENARIOS):
139
- sid=sc.get("id",f"S{i+1:02d}"); cat=sc.get("cat","C01"); ctx=sc.get("scene_context",{})
140
- progress(i/len(SCENARIOS),desc=f"[{i+1}/{len(SCENARIOS)}] {sid} ({cat})")
141
- resp,lat_ms=call_api(api_url,api_key,model_name,ctx,SYSTEM_PROMPT)
142
- lats.append(lat_ms)
143
- if resp is None: errors+=1; cat_scores[cat].append(0); cat_det[cat].append(f"{sid}: API였λ₯˜"); continue
144
- s,reason=score_one(sc,resp); cat_scores[cat].append(s); cat_det[cat].append(f"{sid}: {s}/20 β€” {reason}")
145
- final={c:round(sum(v)/max(len(v),1)/20*100) for c,v in cat_scores.items()}
146
- if track in("B","C") and fps>0:
147
- ps=100 if fps>=45 else 80 if fps>=30 else 40 if fps>=15 else 10
148
- ls=100 if lat<=3000 else 80 if lat<=5000 else 40 if lat<=10000 else 10
149
- final["C09"]=round((ps+ls)/2)
150
- r=calculate_score(final); wm=r["wm_score"]; g=r["grade"]; p=r["pillars"]
151
- avg_lat=round(sum(lats)/max(len(lats),1))
152
- md=f"""## πŸ† 평가 μ™„λ£Œ β€” {model_name}
153
- | | |
154
- |---|---|
155
- | **WM Score** | **{wm} / 1000** |
156
- | **Grade** | **{g} ({r['grade_label']})** |
157
- | **Track** | {track} |
158
- | **였λ₯˜** | {errors} / {len(SCENARIOS)} |
159
- | **평균 μ§€μ—°** | {avg_lat} ms |
160
-
161
- ### πŸ“Š Pillar 점수
162
- | Pillar | 점수 | 만점 |
163
- |--------|------|------|
164
- | πŸ‘ Perception | {p['P1']['scaled']} | 250 |
165
- | 🧠 Cognition | {p['P2']['scaled']} | 450 |
166
- | πŸ”₯ Embodiment | {p['P3']['scaled']} | 300 |
167
-
168
- ### πŸ“‹ Category 점수
169
- | Cat | 이름 | /100 |
170
- |-----|------|------|
171
  """
172
- for c,(nk,pl,ic) in CAT_INFO.items():
173
- bar="β–ˆ"*(final[c]//10)+"β–‘"*(10-final[c]//10)
174
- md+=f"| {c} | {ic} {nk} | {final[c]} `{bar}` |\n"
175
- md+="\n> VIDRAFT PROMETHEUS κΈ°μ€€: **726/1000** (B)"
176
- det="";[ (det:=det+f"### {c}: {CAT_INFO[c][0]} β€” {final[c]}/100\n"+"".join(f"- {l}\n" for l in lines[:5])+"\n") for c,lines in cat_det.items() ]
177
- sub=json.dumps({"benchmark":"WM Bench v1.0","submitted_at":datetime.utcnow().isoformat()+"Z","model_name":model_name,"organization":org_name,"track":track,"wm_score":wm,"grade":g,"fps":fps,"cognitive_latency_ms":avg_lat,"gpu":gpu,"pillar_scores":{"P1":p["P1"]["scaled"],"P2":p["P2"]["scaled"],"P3":p["P3"]["scaled"]},"category_scores":final},ensure_ascii=False,indent=2)
178
- return md,det,sub
179
-
180
- def lb_html():
181
- b=BASELINE
182
- return f"""<table style="width:100%;border-collapse:collapse;font-size:13px">
183
- <thead><tr style="background:#1e1e2e;color:#aaa;font-size:11px">
184
- <th style="padding:8px">#</th><th style="text-align:left;padding:8px">λͺ¨λΈ</th>
185
- <th style="padding:8px">WM Score</th><th style="padding:8px">Grade</th>
186
- <th style="padding:8px">πŸ‘ 인식</th><th style="padding:8px">🧠 인지</th><th style="padding:8px">πŸ”₯ κ΅¬ν˜„</th>
187
- <th style="padding:8px">FPS</th><th style="padding:8px">Lat(ms)</th><th style="padding:8px">Track</th>
188
- </tr></thead><tbody>
189
- <tr style="background:#151520">
190
- <td style="text-align:center;font-weight:900;color:#FFD700;padding:10px">1</td>
191
- <td style="padding:10px"><b>{b['model']}</b><br><small style="color:#888">{b['brain']} Β· {b['motion']}</small></td>
192
- <td style="text-align:center;font-size:1.2rem;font-weight:900;color:#CD7F32;padding:10px">{b['wm']}</td>
193
- <td style="text-align:center;color:#CD7F32;font-weight:800;padding:10px">{b['grade']}</td>
194
- <td style="text-align:center;padding:10px">{b['p1']}<small>/250</small></td>
195
- <td style="text-align:center;padding:10px">{b['p2']}<small>/450</small></td>
196
- <td style="text-align:center;padding:10px">{b['p3']}<small>/300</small></td>
197
- <td style="text-align:center;padding:10px">{b['fps']}</td>
198
- <td style="text-align:center;padding:10px">{b['latency']}</td>
199
- <td style="text-align:center;padding:10px">{b['track']}</td>
200
- </tr>
201
- </tbody></table>"""
202
-
203
- def cat_html():
204
- b=BASELINE["cats"]; rows=""
205
- for c,(nk,pl,ic) in CAT_INFO.items():
206
- s=b.get(c,0); col=PILLAR_INFO[pl]["color"]
207
- rows+=f'<tr><td style="padding:6px 8px;font-family:monospace;color:#aaa">{c}</td><td style="padding:6px 8px">{ic} {nk}</td><td style="text-align:center;font-weight:700;padding:6px">{s}</td><td style="padding:6px;width:180px"><div style="background:#222;border-radius:4px;height:10px;overflow:hidden"><div style="width:{s}%;background:{col};height:100%;border-radius:4px"></div></div></td></tr>'
208
- return f'<table style="width:100%;border-collapse:collapse;font-size:12px"><thead><tr style="background:#1e1e2e;color:#aaa"><th style="padding:8px">Cat</th><th style="text-align:left;padding:8px">μΉ΄ν…Œκ³ λ¦¬</th><th style="padding:8px">점수/100</th><th style="padding:8px">Bar</th></tr></thead><tbody>{rows}</tbody></table>'
209
-
210
- CSS="""
211
- .wm-title{text-align:center;padding:28px 0 8px;font-size:2rem;font-weight:900;letter-spacing:-1px;
212
- background:linear-gradient(90deg,#7B8FD4,#E8593C,#D4A044);-webkit-background-clip:text;-webkit-text-fill-color:transparent;}
213
- .wm-sub{text-align:center;color:#888;font-size:.9rem;margin-bottom:20px;}
214
- """
215
-
216
- with gr.Blocks(title="World Model Bench") as app:
217
- gr.HTML('<div class="wm-title">πŸ”₯ World Model Bench</div><div class="wm-sub">Beyond FID β€” Measuring Intelligence, Not Just Motion &nbsp;Β·&nbsp; FINAL Bench Family by VIDRAFT</div>')
218
-
219
- with gr.Tabs():
220
-
221
- with gr.Tab("πŸ† Leaderboard"):
222
- gr.HTML("""<div style="display:flex;gap:16px;margin-bottom:20px;flex-wrap:wrap">
223
- <div style="background:#1a1a2a;border:1px solid #2a2a4a;border-radius:12px;padding:16px 24px;flex:1;min-width:140px;text-align:center">
224
- <div style="color:#aaa;font-size:11px;margin-bottom:4px">3 PILLARS</div>
225
- <div style="font-size:1.4rem;font-weight:900;color:#7B8FD4">πŸ‘ 🧠 πŸ”₯</div>
226
- <div style="color:#666;font-size:11px">Perception Β· Cognition Β· Embodiment</div></div>
227
- <div style="background:#1a1a2a;border:1px solid #2a2a4a;border-radius:12px;padding:16px 24px;flex:1;min-width:140px;text-align:center">
228
- <div style="color:#aaa;font-size:11px;margin-bottom:4px">100 SCENARIOS</div>
229
- <div style="font-size:1.4rem;font-weight:900;color:#E8593C">10</div>
230
- <div style="color:#666;font-size:11px">μΉ΄ν…Œκ³ λ¦¬ Γ— 10μ‹œλ‚˜λ¦¬μ˜€</div></div>
231
- <div style="background:#1a1a2a;border:1px solid #2a2a4a;border-radius:12px;padding:16px 24px;flex:1;min-width:140px;text-align:center">
232
- <div style="color:#aaa;font-size:11px;margin-bottom:4px">MAX SCORE</div>
233
- <div style="font-size:1.4rem;font-weight:900;color:#D4A044">1000</div>
234
- <div style="color:#666;font-size:11px">WM Score</div></div>
235
- <div style="background:#1a1a2a;border:1px solid #2a2a4a;border-radius:12px;padding:16px 24px;flex:1;min-width:140px;text-align:center">
236
- <div style="color:#aaa;font-size:11px;margin-bottom:4px">TRACKS</div>
237
- <div style="font-size:1.4rem;font-weight:900;color:#4ADE80">A Β· B Β· C</div>
238
- <div style="color:#666;font-size:11px">Text / +Perf / +Demo</div></div>
239
- </div>""")
240
- gr.HTML(lb_html())
241
- gr.Markdown("**Grade:** Sβ‰₯900 Β· Aβ‰₯750 Β· Bβ‰₯600 Β· Cβ‰₯400 Β· Dβ‰₯200 Β· F<200\n\n**Track:** A=ν…μŠ€νŠΈ(μ΅œλŒ€750) Β· B=+μ„±λŠ₯(1000) Β· C=+라이브데λͺ¨(1000+βœ“)")
242
- with gr.Accordion("πŸ“Š PROMETHEUS μΉ΄ν…Œκ³ λ¦¬ 상세",open=False):
243
- gr.HTML(cat_html())
244
-
245
- with gr.Tab("⚑ 평가 μ‹€ν–‰"):
246
- gr.Markdown("### OpenAI-compatible API둜 평가\n`scene_context JSON β†’ PREDICT+MOTION` 좜λ ₯ λͺ¨λΈμ΄λ©΄ λͺ¨λ‘ μ°Έμ—¬ κ°€λŠ₯.")
247
- with gr.Row():
248
- with gr.Column():
249
- api_url_in=gr.Textbox(label="API URL",value="https://api.fireworks.ai/inference/v1/chat/completions")
250
- api_key_in=gr.Textbox(label="API Key",type="password",placeholder="sk-...")
251
- model_in=gr.Textbox(label="λͺ¨λΈ ID",placeholder="accounts/fireworks/models/kimi-k2p5")
252
- org_in=gr.Textbox(label="쑰직λͺ…",placeholder="VIDRAFT")
253
- with gr.Column():
254
- track_in=gr.Dropdown(["A","B","C"],value="A",label="Track (A=ν…μŠ€νŠΈμ „μš©, B/C=+μ„±λŠ₯)")
255
- fps_in=gr.Number(label="FPS (Track B/C, 0=N/A)",value=0)
256
- lat_in=gr.Number(label="Latency ms (Track B/C)",value=0)
257
- gpu_in=gr.Textbox(label="GPU",placeholder="NVIDIA L40S 48GB")
258
- run_btn=gr.Button("πŸš€ 평가 μ‹œμž‘ (100 μ‹œλ‚˜λ¦¬μ˜€)",variant="primary",size="lg")
259
- result_md=gr.Markdown(); detail_md=gr.Markdown(); submit_box=gr.Code(label="제좜 JSON",language="json",lines=20)
260
- run_btn.click(fn=run_eval,inputs=[api_url_in,api_key_in,model_in,org_in,fps_in,lat_in,gpu_in,track_in],outputs=[result_md,detail_md,submit_box])
261
-
262
- with gr.Tab("πŸ“ 벀치마크 ꡬ쑰"):
263
- gr.Markdown("""
264
- ## 섀계 원칙
265
-
266
- > "κΈ°μ‘΄ λ²€μΉ˜λ§ˆν¬λŠ” λͺ¨μ…˜ ν’ˆμ§ˆλ§Œ μΈ‘μ •ν•œλ‹€. WM BenchλŠ” **인지 λŠ₯λ ₯**을 μΈ‘μ •ν•˜λŠ” 졜초의 λ²€μΉ˜λ§ˆν¬λ‹€."
267
-
268
- ### 3λŒ€ 평가 μΆ•
269
- | Pillar | 비쀑 | 만점 |
270
- |--------|------|------|
271
- | πŸ‘ Perception (인식) | 25% | 250 |
272
- | 🧠 Cognition (인지) | 45% | 450 |
273
- | πŸ”₯ Embodiment (κ΅¬ν˜„) | 30% | 300 |
274
-
275
- ### 10개 μΉ΄ν…Œκ³ λ¦¬
276
- | Cat | 이름 | μ„Έκ³„μ΅œμ΄ˆ |
277
- |-----|------|---------|
278
- | C01 | ν™˜κ²½ 인식 정확도 | |
279
- | C02 | 개체 인식 및 λΆ„λ₯˜ | |
280
- | C03 | 예츑 기반 μΆ”λ‘  | ✦ |
281
- | C04 | μœ„ν˜‘ μœ ν˜•λ³„ 차별 λ°˜μ‘ | ✦ |
282
- | C05 | 자율 감정 μ—μŠ€μ»¬λ ˆμ΄μ…˜ | ✦✦ |
283
- | C06 | λ§₯락 κΈ°μ–΅ 및 ν™œμš© | ✦ |
284
- | C07 | μœ„ν˜‘ ν•΄μ œ ν›„ 적응 | ✦ |
285
- | C08 | λͺ¨μ…˜ 감정 ν‘œν˜„λ ₯ | ✦ |
286
- | C09 | μ‹€μ‹œκ°„ 인지-행동 μ„±λŠ₯ | |
287
- | C10 | 신체 ꡐ체 ν™•μž₯μ„± | ✦✦ |
288
-
289
- ### μž…μΆœλ ₯ 포맷
290
- ```
291
- INPUT: scene_context JSON
292
- OUTPUT (2쀄):
293
- PREDICT: left=danger(wall), right=safe, fwd=danger(beast), back=safe
294
- MOTION: a person sprinting right in desperate terror
295
- ```
296
-
297
- ### FINAL Bench Family
298
- | 벀치마크 | μΈ‘μ • | μƒνƒœ |
299
- |---------|------|------|
300
- | FINAL Bench | ν…μŠ€νŠΈ AGI | κΈ€λ‘œλ²Œ 5μœ„ Β· μ–Έλ‘  4κ³³ |
301
- | **WM Bench** | **체화 AGI** | **곡개 쀑** |
302
- """)
303
 
304
- with gr.Tab("πŸ“„ System Prompt"):
305
- gr.Markdown("λͺ¨λ“  μ°Έκ°€ λͺ¨λΈμ— λ™μΌν•˜κ²Œ 적용.")
306
- gr.Code(value=SYSTEM_PROMPT or "(wm_bench_dataset.json ν•„μš”)",language="markdown",lines=20)
307
 
308
  app.launch(server_name="0.0.0.0", ssr_mode=False, css=CSS)
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import os
3
+
4
+ # index.html 읽기
5
+ html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)) if "__file__" in dir() else ".", "index.html")
6
+ if os.path.exists(html_path):
7
+ with open(html_path, "r", encoding="utf-8") as f:
8
+ HTML_CONTENT = f.read()
9
+ else:
10
+ HTML_CONTENT = "<h1>index.html νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.</h1>"
11
+
12
+ # Gradio의 κΈ°λ³Έ UIλ₯Ό μ™„μ „νžˆ 숨기고 index.html만 ν‘œμ‹œ
13
+ CSS = """
14
+ footer { display: none !important; }
15
+ .gradio-container { padding: 0 !important; margin: 0 !important; max-width: 100% !important; background: transparent !important; }
16
+ #component-0 { padding: 0 !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ with gr.Blocks() as app:
20
+ gr.HTML(HTML_CONTENT)
 
21
 
22
  app.launch(server_name="0.0.0.0", ssr_mode=False, css=CSS)