Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1338,17 +1338,24 @@ def _run_problem(axl_def:AXLProblemDef):
|
|
| 1338 |
t0=time.time()
|
| 1339 |
try:
|
| 1340 |
base_prob=build_base_problem(axl_def)
|
| 1341 |
-
binding,
|
| 1342 |
elapsed=round(time.time()-t0,2)
|
|
|
|
| 1343 |
survived=[t for t in DEBATE_ENGINE.all_traces if t.survived]
|
| 1344 |
failed=[t for t in DEBATE_ENGINE.all_traces if not t.survived]
|
| 1345 |
best_trace=min((t for t in survived),key=lambda t:t.ce_after,default=None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1346 |
record={
|
| 1347 |
"problem":axl_def.name,
|
| 1348 |
"description":axl_def.description,
|
| 1349 |
"axioms":sorted(axl_def.axioms),
|
| 1350 |
-
"ce":round(
|
| 1351 |
-
"
|
|
|
|
| 1352 |
"elapsed":elapsed,
|
| 1353 |
"binding":{k:round(v,5) for k,v in binding.items()},
|
| 1354 |
"traces":[{
|
|
@@ -1412,11 +1419,17 @@ def _render_collapse(record:Dict) -> str:
|
|
| 1412 |
b=record["binding"]
|
| 1413 |
traces=record["traces"]
|
| 1414 |
survived=[t for t in traces if t["survived"]]
|
|
|
|
|
|
|
| 1415 |
best=min(survived,key=lambda t:t["ce"],default=None)
|
|
|
|
|
|
|
|
|
|
| 1416 |
obs_vars=set()
|
| 1417 |
for axl in AXL_PROBLEMS:
|
| 1418 |
if axl.name==record["problem"]: obs_vars=set(axl.observations.keys()); break
|
| 1419 |
|
|
|
|
| 1420 |
ce_color="#4CAF50" if record["solved"] else ("#FF9800" if record["ce"]<1.0 else "#EF5350")
|
| 1421 |
status="✓ SOLVED + VERIFIED" if record["solved"] else ("~ PARTIAL" if record["ce"]<1.0 else "✗ UNSOLVED")
|
| 1422 |
|
|
@@ -1440,12 +1453,14 @@ def _render_collapse(record:Dict) -> str:
|
|
| 1440 |
for t in traces
|
| 1441 |
)
|
| 1442 |
|
|
|
|
|
|
|
| 1443 |
return f"""
|
| 1444 |
<div style='border:1px solid #2a2a2a;border-radius:8px;margin-bottom:20px;overflow:hidden'>
|
| 1445 |
<div style='background:#111;padding:10px 14px;border-bottom:1px solid #222'>
|
| 1446 |
<span style='color:{ce_color};font-weight:700;font-size:1.1em'>{status}</span>
|
| 1447 |
<span style='color:#555;margin-left:16px;font-size:0.85em'>{record['problem']}</span>
|
| 1448 |
-
<span style='color:#333;margin-left:12px;font-size:0.8em'>
|
| 1449 |
<span style='color:#555;margin-left:12px;font-size:0.8em'>{record['elapsed']}s</span>
|
| 1450 |
</div>
|
| 1451 |
<div style='display:flex;gap:0'>
|
|
@@ -1525,7 +1540,7 @@ async def dashboard():
|
|
| 1525 |
<div style='margin-bottom:18px'>
|
| 1526 |
<span class='badge' style='color:#aaa'>Runs: {runs}</span>
|
| 1527 |
<span class='badge' style='color:#4CAF50'>Solved: {solved}</span>
|
| 1528 |
-
<span class='badge' style='color:#26C6DA'>Avg
|
| 1529 |
<span class='badge' style='color:#FF9800'>Problems: {len(AXL_PROBLEMS)}</span>
|
| 1530 |
<span class='badge' style='color:#5C6BC0'>Library: {len(DEBATE_ENGINE.library)}</span>
|
| 1531 |
</div>
|
|
|
|
| 1338 |
t0=time.time()
|
| 1339 |
try:
|
| 1340 |
base_prob=build_base_problem(axl_def)
|
| 1341 |
+
binding, domain_ce, rounds=DEBATE_ENGINE.debate(axl_def,base_prob)
|
| 1342 |
elapsed=round(time.time()-t0,2)
|
| 1343 |
+
|
| 1344 |
survived=[t for t in DEBATE_ENGINE.all_traces if t.survived]
|
| 1345 |
failed=[t for t in DEBATE_ENGINE.all_traces if not t.survived]
|
| 1346 |
best_trace=min((t for t in survived),key=lambda t:t.ce_after,default=None)
|
| 1347 |
+
|
| 1348 |
+
# FIX: Calculate actual CE against the original base problem
|
| 1349 |
+
actual_base_ce = base_prob.constraint_energy(binding)
|
| 1350 |
+
is_truly_solved = len(survived) > 0
|
| 1351 |
+
|
| 1352 |
record={
|
| 1353 |
"problem":axl_def.name,
|
| 1354 |
"description":axl_def.description,
|
| 1355 |
"axioms":sorted(axl_def.axioms),
|
| 1356 |
+
"ce":round(actual_base_ce, 6), # Use REAL energy
|
| 1357 |
+
"domain_ce":round(domain_ce, 6), # Keep domain CE for reference
|
| 1358 |
+
"solved":is_truly_solved, # ONLY true if verified
|
| 1359 |
"elapsed":elapsed,
|
| 1360 |
"binding":{k:round(v,5) for k,v in binding.items()},
|
| 1361 |
"traces":[{
|
|
|
|
| 1419 |
b=record["binding"]
|
| 1420 |
traces=record["traces"]
|
| 1421 |
survived=[t for t in traces if t["survived"]]
|
| 1422 |
+
|
| 1423 |
+
# FIX: If nothing survived, pick the best attempt to show WHY it failed
|
| 1424 |
best=min(survived,key=lambda t:t["ce"],default=None)
|
| 1425 |
+
if not best and traces:
|
| 1426 |
+
best=min(traces,key=lambda t:t.get("g1_ce",999.0))
|
| 1427 |
+
|
| 1428 |
obs_vars=set()
|
| 1429 |
for axl in AXL_PROBLEMS:
|
| 1430 |
if axl.name==record["problem"]: obs_vars=set(axl.observations.keys()); break
|
| 1431 |
|
| 1432 |
+
# FIX: Ensure it correctly marks it as UNSOLVED when CE is high or unverified
|
| 1433 |
ce_color="#4CAF50" if record["solved"] else ("#FF9800" if record["ce"]<1.0 else "#EF5350")
|
| 1434 |
status="✓ SOLVED + VERIFIED" if record["solved"] else ("~ PARTIAL" if record["ce"]<1.0 else "✗ UNSOLVED")
|
| 1435 |
|
|
|
|
| 1453 |
for t in traces
|
| 1454 |
)
|
| 1455 |
|
| 1456 |
+
dom_ce_str = f" | DomCE={record.get('domain_ce', 0.0):.6f}" if 'domain_ce' in record else ""
|
| 1457 |
+
|
| 1458 |
return f"""
|
| 1459 |
<div style='border:1px solid #2a2a2a;border-radius:8px;margin-bottom:20px;overflow:hidden'>
|
| 1460 |
<div style='background:#111;padding:10px 14px;border-bottom:1px solid #222'>
|
| 1461 |
<span style='color:{ce_color};font-weight:700;font-size:1.1em'>{status}</span>
|
| 1462 |
<span style='color:#555;margin-left:16px;font-size:0.85em'>{record['problem']}</span>
|
| 1463 |
+
<span style='color:#333;margin-left:12px;font-size:0.8em'>BaseCE={record['ce']:.6f}{dom_ce_str}</span>
|
| 1464 |
<span style='color:#555;margin-left:12px;font-size:0.8em'>{record['elapsed']}s</span>
|
| 1465 |
</div>
|
| 1466 |
<div style='display:flex;gap:0'>
|
|
|
|
| 1540 |
<div style='margin-bottom:18px'>
|
| 1541 |
<span class='badge' style='color:#aaa'>Runs: {runs}</span>
|
| 1542 |
<span class='badge' style='color:#4CAF50'>Solved: {solved}</span>
|
| 1543 |
+
<span class='badge' style='color:#26C6DA'>Avg BaseCE: {avg_ce}</span>
|
| 1544 |
<span class='badge' style='color:#FF9800'>Problems: {len(AXL_PROBLEMS)}</span>
|
| 1545 |
<span class='badge' style='color:#5C6BC0'>Library: {len(DEBATE_ENGINE.library)}</span>
|
| 1546 |
</div>
|