roc-hci's picture
Update app.py
7f8909f verified
import json, os, threading
import pandas as pd
from fastapi import FastAPI, Request, Response, Header
from huggingface_hub import HfApi, hf_hub_download, list_repo_files
import uvicorn
app = FastAPI()
API = HfApi()
HF_TOKEN = os.environ.get("HF_TOKEN")
SECRET = os.environ["PRIVATE_LABELS"]
LABELS = json.loads(SECRET)["who_is_human"]
WEBHOOK_SECRET = os.environ.get("WEBHOOK_SECRET")
SUBMISSIONS_REPO = "roc-hci/turing-bench-submissions"
RESULTS_REPO = "roc-hci/turing-bench-results"
def get_pending_submissions():
"""Find submissions that haven't been evaluated yet."""
# List all submission metadata files
submission_files = [
f for f in list_repo_files(SUBMISSIONS_REPO, repo_type="dataset", token=HF_TOKEN)
if f.startswith("metadata/") and f.endswith(".json")
]
# List all result files
result_files = [
f for f in list_repo_files(RESULTS_REPO, repo_type="dataset", token=HF_TOKEN)
if f.endswith(".json")
]
# Extract submission IDs from each
submitted_ids = {f.replace("metadata/", "").replace(".json", "") for f in submission_files}
evaluated_ids = {f.replace("results/", "").replace(".json", "") for f in result_files}
pending_ids = submitted_ids - evaluated_ids
return pending_ids
def evaluate_submission(submission_id: str):
"""Run evaluation for a single submission."""
# Download the submission metadata
metadata_path = hf_hub_download(
repo_id=SUBMISSIONS_REPO,
filename=f"metadata/{submission_id}.json",
repo_type="dataset",
token=HF_TOKEN,
)
with open(metadata_path) as f:
metadata = json.load(f)
# Download the predictions file
predictions_path = hf_hub_download(
repo_id=SUBMISSIONS_REPO,
filename=metadata["predictions_file"],
repo_type="dataset",
token=HF_TOKEN,
)
# ---- Your evaluation logic goes here ----
scores = run_evaluation(predictions_path)
# ------------------------------------------
# Build the result record
result = {
"model_name": metadata["model_name"],
"submitted_by": metadata["submitted_by"],
"submission_time": metadata["submission_time"],
"accuracy": scores["Accuracy"],
}
# Upload result
result_bytes = json.dumps(result).encode()
API.upload_file(
path_or_fileobj=result_bytes,
path_in_repo=f"results/{submission_id}.json",
repo_id=RESULTS_REPO,
repo_type="dataset",
token=HF_TOKEN,
)
print(f"Evaluated {submission_id}: {scores}")
def run_evaluation(predictions_path: str) -> dict:
df = pd.read_csv(predictions_path)
preds = df["who_is_human"].astype(str).str.strip().tolist()
acc = sum(p == l for p, l in zip(preds, LABELS)) / len(LABELS)
return {"Accuracy" : acc}
@app.get("/")
def health():
return "OK"
@app.post("/webhook")
async def webhook(request: Request, x_webhook_secret: str = Header(None)):
if WEBHOOK_SECRET and x_webhook_secret != WEBHOOK_SECRET:
return Response(status_code=403)
payload = await request.json()
print("Webhook received:", json.dumps(payload, indent=2))
event = payload.get("event", {})
repo = payload.get("repo", {})
if event.get("action") == "update" and repo.get("name") == SUBMISSIONS_REPO:
threading.Thread(target=process_pending, daemon=True).start()
return "OK"
def process_pending():
pending = get_pending_submissions()
for submission_id in pending:
try:
evaluate_submission(submission_id)
except Exception as e:
print(f"Error evaluating {submission_id}: {e}")
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)