Spaces:
Sleeping
Sleeping
| import json, os, threading | |
| import pandas as pd | |
| from fastapi import FastAPI, Request, Response, Header | |
| from huggingface_hub import HfApi, hf_hub_download, list_repo_files | |
| import uvicorn | |
| app = FastAPI() | |
| API = HfApi() | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| SECRET = os.environ["PRIVATE_LABELS"] | |
| LABELS = json.loads(SECRET)["who_is_human"] | |
| WEBHOOK_SECRET = os.environ.get("WEBHOOK_SECRET") | |
| SUBMISSIONS_REPO = "roc-hci/turing-bench-submissions" | |
| RESULTS_REPO = "roc-hci/turing-bench-results" | |
| def get_pending_submissions(): | |
| """Find submissions that haven't been evaluated yet.""" | |
| # List all submission metadata files | |
| submission_files = [ | |
| f for f in list_repo_files(SUBMISSIONS_REPO, repo_type="dataset", token=HF_TOKEN) | |
| if f.startswith("metadata/") and f.endswith(".json") | |
| ] | |
| # List all result files | |
| result_files = [ | |
| f for f in list_repo_files(RESULTS_REPO, repo_type="dataset", token=HF_TOKEN) | |
| if f.endswith(".json") | |
| ] | |
| # Extract submission IDs from each | |
| submitted_ids = {f.replace("metadata/", "").replace(".json", "") for f in submission_files} | |
| evaluated_ids = {f.replace("results/", "").replace(".json", "") for f in result_files} | |
| pending_ids = submitted_ids - evaluated_ids | |
| return pending_ids | |
| def evaluate_submission(submission_id: str): | |
| """Run evaluation for a single submission.""" | |
| # Download the submission metadata | |
| metadata_path = hf_hub_download( | |
| repo_id=SUBMISSIONS_REPO, | |
| filename=f"metadata/{submission_id}.json", | |
| repo_type="dataset", | |
| token=HF_TOKEN, | |
| ) | |
| with open(metadata_path) as f: | |
| metadata = json.load(f) | |
| # Download the predictions file | |
| predictions_path = hf_hub_download( | |
| repo_id=SUBMISSIONS_REPO, | |
| filename=metadata["predictions_file"], | |
| repo_type="dataset", | |
| token=HF_TOKEN, | |
| ) | |
| # ---- Your evaluation logic goes here ---- | |
| scores = run_evaluation(predictions_path) | |
| # ------------------------------------------ | |
| # Build the result record | |
| result = { | |
| "model_name": metadata["model_name"], | |
| "submitted_by": metadata["submitted_by"], | |
| "submission_time": metadata["submission_time"], | |
| "accuracy": scores["Accuracy"], | |
| } | |
| # Upload result | |
| result_bytes = json.dumps(result).encode() | |
| API.upload_file( | |
| path_or_fileobj=result_bytes, | |
| path_in_repo=f"results/{submission_id}.json", | |
| repo_id=RESULTS_REPO, | |
| repo_type="dataset", | |
| token=HF_TOKEN, | |
| ) | |
| print(f"Evaluated {submission_id}: {scores}") | |
| def run_evaluation(predictions_path: str) -> dict: | |
| df = pd.read_csv(predictions_path) | |
| preds = df["who_is_human"].astype(str).str.strip().tolist() | |
| acc = sum(p == l for p, l in zip(preds, LABELS)) / len(LABELS) | |
| return {"Accuracy" : acc} | |
| def health(): | |
| return "OK" | |
| async def webhook(request: Request, x_webhook_secret: str = Header(None)): | |
| if WEBHOOK_SECRET and x_webhook_secret != WEBHOOK_SECRET: | |
| return Response(status_code=403) | |
| payload = await request.json() | |
| print("Webhook received:", json.dumps(payload, indent=2)) | |
| event = payload.get("event", {}) | |
| repo = payload.get("repo", {}) | |
| if event.get("action") == "update" and repo.get("name") == SUBMISSIONS_REPO: | |
| threading.Thread(target=process_pending, daemon=True).start() | |
| return "OK" | |
| def process_pending(): | |
| pending = get_pending_submissions() | |
| for submission_id in pending: | |
| try: | |
| evaluate_submission(submission_id) | |
| except Exception as e: | |
| print(f"Error evaluating {submission_id}: {e}") | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |