everydaytok commited on
Commit
9f4ec78
·
verified ·
1 Parent(s): 4a165f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -5
app.py CHANGED
@@ -1338,17 +1338,24 @@ def _run_problem(axl_def:AXLProblemDef):
1338
  t0=time.time()
1339
  try:
1340
  base_prob=build_base_problem(axl_def)
1341
- binding,ce,rounds=DEBATE_ENGINE.debate(axl_def,base_prob)
1342
  elapsed=round(time.time()-t0,2)
 
1343
  survived=[t for t in DEBATE_ENGINE.all_traces if t.survived]
1344
  failed=[t for t in DEBATE_ENGINE.all_traces if not t.survived]
1345
  best_trace=min((t for t in survived),key=lambda t:t.ce_after,default=None)
 
 
 
 
 
1346
  record={
1347
  "problem":axl_def.name,
1348
  "description":axl_def.description,
1349
  "axioms":sorted(axl_def.axioms),
1350
- "ce":round(ce,6),
1351
- "solved":ce<SOLVE_THRESHOLD,
 
1352
  "elapsed":elapsed,
1353
  "binding":{k:round(v,5) for k,v in binding.items()},
1354
  "traces":[{
@@ -1412,11 +1419,17 @@ def _render_collapse(record:Dict) -> str:
1412
  b=record["binding"]
1413
  traces=record["traces"]
1414
  survived=[t for t in traces if t["survived"]]
 
 
1415
  best=min(survived,key=lambda t:t["ce"],default=None)
 
 
 
1416
  obs_vars=set()
1417
  for axl in AXL_PROBLEMS:
1418
  if axl.name==record["problem"]: obs_vars=set(axl.observations.keys()); break
1419
 
 
1420
  ce_color="#4CAF50" if record["solved"] else ("#FF9800" if record["ce"]<1.0 else "#EF5350")
1421
  status="✓ SOLVED + VERIFIED" if record["solved"] else ("~ PARTIAL" if record["ce"]<1.0 else "✗ UNSOLVED")
1422
 
@@ -1440,12 +1453,14 @@ def _render_collapse(record:Dict) -> str:
1440
  for t in traces
1441
  )
1442
 
 
 
1443
  return f"""
1444
  <div style='border:1px solid #2a2a2a;border-radius:8px;margin-bottom:20px;overflow:hidden'>
1445
  <div style='background:#111;padding:10px 14px;border-bottom:1px solid #222'>
1446
  <span style='color:{ce_color};font-weight:700;font-size:1.1em'>{status}</span>
1447
  <span style='color:#555;margin-left:16px;font-size:0.85em'>{record['problem']}</span>
1448
- <span style='color:#333;margin-left:12px;font-size:0.8em'>CE={record['ce']:.6f}</span>
1449
  <span style='color:#555;margin-left:12px;font-size:0.8em'>{record['elapsed']}s</span>
1450
  </div>
1451
  <div style='display:flex;gap:0'>
@@ -1525,7 +1540,7 @@ async def dashboard():
1525
  <div style='margin-bottom:18px'>
1526
  <span class='badge' style='color:#aaa'>Runs: {runs}</span>
1527
  <span class='badge' style='color:#4CAF50'>Solved: {solved}</span>
1528
- <span class='badge' style='color:#26C6DA'>Avg CE: {avg_ce}</span>
1529
  <span class='badge' style='color:#FF9800'>Problems: {len(AXL_PROBLEMS)}</span>
1530
  <span class='badge' style='color:#5C6BC0'>Library: {len(DEBATE_ENGINE.library)}</span>
1531
  </div>
 
1338
  t0=time.time()
1339
  try:
1340
  base_prob=build_base_problem(axl_def)
1341
+ binding, domain_ce, rounds=DEBATE_ENGINE.debate(axl_def,base_prob)
1342
  elapsed=round(time.time()-t0,2)
1343
+
1344
  survived=[t for t in DEBATE_ENGINE.all_traces if t.survived]
1345
  failed=[t for t in DEBATE_ENGINE.all_traces if not t.survived]
1346
  best_trace=min((t for t in survived),key=lambda t:t.ce_after,default=None)
1347
+
1348
+ # FIX: Calculate actual CE against the original base problem
1349
+ actual_base_ce = base_prob.constraint_energy(binding)
1350
+ is_truly_solved = len(survived) > 0
1351
+
1352
  record={
1353
  "problem":axl_def.name,
1354
  "description":axl_def.description,
1355
  "axioms":sorted(axl_def.axioms),
1356
+ "ce":round(actual_base_ce, 6), # Use REAL energy
1357
+ "domain_ce":round(domain_ce, 6), # Keep domain CE for reference
1358
+ "solved":is_truly_solved, # ONLY true if verified
1359
  "elapsed":elapsed,
1360
  "binding":{k:round(v,5) for k,v in binding.items()},
1361
  "traces":[{
 
1419
  b=record["binding"]
1420
  traces=record["traces"]
1421
  survived=[t for t in traces if t["survived"]]
1422
+
1423
+ # FIX: If nothing survived, pick the best attempt to show WHY it failed
1424
  best=min(survived,key=lambda t:t["ce"],default=None)
1425
+ if not best and traces:
1426
+ best=min(traces,key=lambda t:t.get("g1_ce",999.0))
1427
+
1428
  obs_vars=set()
1429
  for axl in AXL_PROBLEMS:
1430
  if axl.name==record["problem"]: obs_vars=set(axl.observations.keys()); break
1431
 
1432
+ # FIX: Ensure it correctly marks it as UNSOLVED when CE is high or unverified
1433
  ce_color="#4CAF50" if record["solved"] else ("#FF9800" if record["ce"]<1.0 else "#EF5350")
1434
  status="✓ SOLVED + VERIFIED" if record["solved"] else ("~ PARTIAL" if record["ce"]<1.0 else "✗ UNSOLVED")
1435
 
 
1453
  for t in traces
1454
  )
1455
 
1456
+ dom_ce_str = f" | DomCE={record.get('domain_ce', 0.0):.6f}" if 'domain_ce' in record else ""
1457
+
1458
  return f"""
1459
  <div style='border:1px solid #2a2a2a;border-radius:8px;margin-bottom:20px;overflow:hidden'>
1460
  <div style='background:#111;padding:10px 14px;border-bottom:1px solid #222'>
1461
  <span style='color:{ce_color};font-weight:700;font-size:1.1em'>{status}</span>
1462
  <span style='color:#555;margin-left:16px;font-size:0.85em'>{record['problem']}</span>
1463
+ <span style='color:#333;margin-left:12px;font-size:0.8em'>BaseCE={record['ce']:.6f}{dom_ce_str}</span>
1464
  <span style='color:#555;margin-left:12px;font-size:0.8em'>{record['elapsed']}s</span>
1465
  </div>
1466
  <div style='display:flex;gap:0'>
 
1540
  <div style='margin-bottom:18px'>
1541
  <span class='badge' style='color:#aaa'>Runs: {runs}</span>
1542
  <span class='badge' style='color:#4CAF50'>Solved: {solved}</span>
1543
+ <span class='badge' style='color:#26C6DA'>Avg BaseCE: {avg_ce}</span>
1544
  <span class='badge' style='color:#FF9800'>Problems: {len(AXL_PROBLEMS)}</span>
1545
  <span class='badge' style='color:#5C6BC0'>Library: {len(DEBATE_ENGINE.library)}</span>
1546
  </div>