Spaces:

aiqtech
/

final-bench-Proprietary

Running

App Files Files Community

aiqtech commited on 4 days ago

Commit

0ab3bed

verified ·

1 Parent(s): 7c856e9

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -44

app.py CHANGED Viewed

@@ -1,20 +1,3 @@
-"""
-FINAL Bench v4.1 — Baseline (Non-AGI) Evaluation System
-=========================================================
-Frontier Intelligence Nexus for AGI-Level Verification
-★ Baseline (Non-AGI) single-call evaluation
-★ Multi-Provider: OpenAI / Anthropic / Google (Gemini)
-★ Both Eval Model AND Judge Model support all 3 providers
-★ 100 Tasks · 15 Domains · 8 TICOS Types · 5-Axis · 5-Stage AGI Grade
-★ Dataset: HuggingFace FINAL-Bench/Metacognitive
-🔒 MetaCog (Self-Correction Protocol) evaluation: COMING SOON
-Author: Ginigen AI — Choi Sunyoung
-License: Apache 2.0
-"""
 import json, os, time, csv, io, re, html, hashlib, sqlite3, threading
 from datetime import datetime
 from dataclasses import dataclass, field
@@ -187,7 +170,7 @@ def call_openai(prompt, system="", api_key="", model="gpt-5.2",
     for attempt in range(3):
         try:
             r=requests.post("https://api.openai.com/v1/chat/completions",
-                            headers=headers,json=payload,timeout=300)
             if r.status_code==429: time.sleep(5*(attempt+1)); continue
             r.raise_for_status(); c=r.json()["choices"][0]["message"]["content"]
             return _strip_think(c) if c else "[EMPTY]"
@@ -200,22 +183,31 @@ def call_openai(prompt, system="", api_key="", model="gpt-5.2",
             if attempt<2: time.sleep(3*(attempt+1))
             else: return f"[API_ERROR] {e}"
-# --- Anthropic ---
 def call_anthropic(prompt, system="", api_key="", model="claude-opus-4-6",
                    max_tokens=8192, temperature=0.6):
-    headers={"Content-Type":"application/json","x-api-key":api_key,
-             "anthropic-version":"2023-06-01"}
-    payload={"model":model,"max_tokens":max_tokens,"temperature":temperature,
-             "messages":[{"role":"user","content":prompt}]}
     if system: payload["system"]=system
     for attempt in range(3):
         try:
             r=requests.post("https://api.anthropic.com/v1/messages",
-                            headers=headers,json=payload,timeout=300)
             if r.status_code==429: time.sleep(5*(attempt+1)); continue
-            r.raise_for_status(); data=r.json()
-            text="".join(b.get("text","") for b in data.get("content",[]) if b.get("type")=="text")
-            return _strip_think(text) if text else "[EMPTY]"
         except requests.exceptions.HTTPError:
             try: err=r.json().get("error",{}).get("message","")
             except: err=str(r.status_code)
@@ -225,31 +217,47 @@ def call_anthropic(prompt, system="", api_key="", model="claude-opus-4-6",
             if attempt<2: time.sleep(3*(attempt+1))
             else: return f"[API_ERROR] {e}"
-# --- Google Gemini ---
 def call_gemini(prompt, system="", api_key="", model="gemini-3-pro",
-                max_tokens=8192, temperature=0.6, json_mode=False):
-    url=f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
-    headers={"Content-Type":"application/json"}; params={"key":api_key}
-    payload={"contents":[{"role":"user","parts":[{"text":prompt}]}],
-             "generationConfig":{"maxOutputTokens":max_tokens,"temperature":temperature}}
-    if system: payload["systemInstruction"]={"parts":[{"text":system}]}
     if json_mode:
-        payload["generationConfig"]["responseMimeType"]="application/json"
     for attempt in range(3):
         try:
-            r=requests.post(url,headers=headers,params=params,json=payload,timeout=300)
             if r.status_code==429: time.sleep(5*(attempt+1)); continue
-            r.raise_for_status(); data=r.json()
-            cands=data.get("candidates",[])
-            if cands:
-                text="".join(p.get("text","") for p in cands[0].get("content",{}).get("parts",[]))
-                return _strip_think(text) if text else "[EMPTY]"
-            return "[EMPTY]"
         except requests.exceptions.HTTPError:
             try: err=r.json().get("error",{}).get("message","")
             except: err=str(r.status_code)
             if attempt<2: time.sleep(3*(attempt+1)); continue
-            return f"[API_ERROR] {err}"
         except Exception as e:
             if attempt<2: time.sleep(3*(attempt+1))
             else: return f"[API_ERROR] {e}"
@@ -921,4 +929,4 @@ if __name__=="__main__":
     print(f"  🔒 MetaCog: COMING SOON\n{'='*60}\n")
     app=create_app()
     app.queue(default_concurrency_limit=2)
-    app.launch(server_name="0.0.0.0",server_port=7860)

 import json, os, time, csv, io, re, html, hashlib, sqlite3, threading
 from datetime import datetime
 from dataclasses import dataclass, field
     for attempt in range(3):
         try:
             r=requests.post("https://api.openai.com/v1/chat/completions",
+                            headers=headers,data=json.dumps(payload),timeout=300)
             if r.status_code==429: time.sleep(5*(attempt+1)); continue
             r.raise_for_status(); c=r.json()["choices"][0]["message"]["content"]
             return _strip_think(c) if c else "[EMPTY]"
             if attempt<2: time.sleep(3*(attempt+1))
             else: return f"[API_ERROR] {e}"
+# --- Anthropic (★ data=json.dumps, 429+529 retry) ---
 def call_anthropic(prompt, system="", api_key="", model="claude-opus-4-6",
                    max_tokens=8192, temperature=0.6):
+    headers={
+        "Content-Type":"application/json",
+        "x-api-key":api_key,
+        "anthropic-version":"2023-06-01"
+    }
+    messages=[{"role":"user","content":prompt}]
+    payload={"model":model,"max_tokens":max_tokens,"temperature":temperature,"messages":messages}
     if system: payload["system"]=system
     for attempt in range(3):
         try:
             r=requests.post("https://api.anthropic.com/v1/messages",
+                            headers=headers,data=json.dumps(payload),timeout=300)
             if r.status_code==429: time.sleep(5*(attempt+1)); continue
+            if r.status_code==529: time.sleep(8*(attempt+1)); continue
+            r.raise_for_status()
+            resp=r.json()
+            text_parts=[]
+            for block in resp.get("content",[]):
+                if block.get("type")=="text":
+                    text_parts.append(block["text"])
+            c="\n".join(text_parts)
+            return _strip_think(c) if c else "[EMPTY]"
         except requests.exceptions.HTTPError:
             try: err=r.json().get("error",{}).get("message","")
             except: err=str(r.status_code)
             if attempt<2: time.sleep(3*(attempt+1))
             else: return f"[API_ERROR] {e}"
+# --- Google Gemini (★ x-goog-api-key header, data=json.dumps, thinking filter) ---
+GEMINI_API_BASE = "https://generativelanguage.googleapis.com/v1beta"
 def call_gemini(prompt, system="", api_key="", model="gemini-3-pro",
+                max_tokens=8192, temperature=1.0, json_mode=False):
+    url=f"{GEMINI_API_BASE}/models/{model}:generateContent"
+    headers={
+        "Content-Type":"application/json",
+        "x-goog-api-key":api_key,
+    }
+    contents=[{"role":"user","parts":[{"text":prompt}]}]
+    gen_config={"maxOutputTokens":max_tokens,"temperature":temperature}
+    payload={"contents":contents,"generationConfig":gen_config}
+    if system:
+        payload["systemInstruction"]={"parts":[{"text":system}]}
     if json_mode:
+        gen_config["responseMimeType"]="application/json"
     for attempt in range(3):
         try:
+            r=requests.post(url,headers=headers,data=json.dumps(payload),timeout=300)
             if r.status_code==429: time.sleep(5*(attempt+1)); continue
+            if r.status_code==503: time.sleep(8*(attempt+1)); continue
+            r.raise_for_status()
+            data=r.json()
+            candidates=data.get("candidates",[])
+            if not candidates:
+                block_reason=data.get("promptFeedback",{}).get("blockReason","UNKNOWN")
+                return f"[BLOCKED] Gemini blocked response: {block_reason}"
+            parts=candidates[0].get("content",{}).get("parts",[])
+            result=[]
+            for p in parts:
+                if "text" in p:
+                    if p.get("thought",False): continue  # skip thinking parts
+                    result.append(p["text"])
+            c="\n".join(result) if result else ""
+            return _strip_think(c) if c else "[EMPTY]"
         except requests.exceptions.HTTPError:
             try: err=r.json().get("error",{}).get("message","")
             except: err=str(r.status_code)
             if attempt<2: time.sleep(3*(attempt+1)); continue
+            return f"[API_ERROR] Gemini {r.status_code}: {err}"
         except Exception as e:
             if attempt<2: time.sleep(3*(attempt+1))
             else: return f"[API_ERROR] {e}"
     print(f"  🔒 MetaCog: COMING SOON\n{'='*60}\n")
     app=create_app()
     app.queue(default_concurrency_limit=2)
+    app.launch(server_name="0.0.0.0",server_port=7860,ssr_mode=False)