Spaces:

Ray368
/

GateMem-Submit

Running

App Files Files Community

GateMem-Submit / app.py

Ray368

Create app.py

d565ce0 verified about 1 month ago

Raw

History Blame Contribute Delete

9.71 kB

	import base64
	import json
	import os
	import shutil
	import subprocess
	import time
	import uuid
	from pathlib import Path

	import gradio as gr
	import requests


	GITHUB_OWNER = os.getenv("GITHUB_OWNER", "rzhub")
	GITHUB_REPO = os.getenv("GITHUB_REPO", "GateMem")
	GITHUB_BRANCH = os.getenv("GITHUB_BRANCH", "main")
	LEADERBOARD_PATH = os.getenv("LEADERBOARD_PATH", "docs/assets/leaderboard.json")
	PENDING_PATH = os.getenv("PENDING_PATH", "docs/assets/pending_submissions.json")
	GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
	ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "")

	WORKDIR = Path("/tmp/gatemem_submit")
	REPO_DIR = WORKDIR / "GateMem"
	SUBMISSIONS_DIR = WORKDIR / "submissions"


	def ensure_repo():
	WORKDIR.mkdir(parents=True, exist_ok=True)
	SUBMISSIONS_DIR.mkdir(parents=True, exist_ok=True)

	if not REPO_DIR.exists():
	subprocess.run(
	["git", "clone", "--depth", "1", "https://github.com/rzhub/GateMem.git", str(REPO_DIR)],
	check=True,
	)
	else:
	subprocess.run(["git", "-C", str(REPO_DIR), "pull"], check=False)


	def github_headers():
	if not GITHUB_TOKEN:
	raise RuntimeError("GITHUB_TOKEN is not configured.")
	return {
	"Authorization": f"Bearer {GITHUB_TOKEN}",
	"Accept": "application/vnd.github+json",
	}


	def github_get_json_file(path):
	url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO}/contents/{path}"
	r = requests.get(url, headers=github_headers(), params={"ref": GITHUB_BRANCH}, timeout=30)

	if r.status_code == 404:
	return [], None

	r.raise_for_status()
	obj = r.json()
	content = base64.b64decode(obj["content"]).decode("utf-8")
	data = json.loads(content) if content.strip() else []
	return data, obj["sha"]


	def github_put_json_file(path, data, message, sha=None):
	url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO}/contents/{path}"
	encoded = base64.b64encode(
	json.dumps(data, indent=2, ensure_ascii=False).encode("utf-8")
	).decode("utf-8")

	payload = {
	"message": message,
	"content": encoded,
	"branch": GITHUB_BRANCH,
	}
	if sha:
	payload["sha"] = sha

	r = requests.put(url, headers=github_headers(), json=payload, timeout=30)
	r.raise_for_status()
	return r.json()


	def extract_metrics(summary):
	"""
	Adjust this if your summary.json uses slightly different field names.
	"""
	return {
	"u": float(summary.get("utility_accuracy", 0.0)),
	"a": float(summary.get("privacy_leakage_rate", 0.0)),
	"f": float(summary.get("deletion_leakage_rate", 0.0)),
	"mgs": float(summary.get("compliance_utility_score", 0.0)),
	}


	def run_scorer(predictions_path, domain, use_llm_judge=False):
	ensure_repo()

	domain_name = domain.lower()
	run_id = str(uuid.uuid4())[:8]
	out_dir = SUBMISSIONS_DIR / run_id / "eval"
	out_dir.mkdir(parents=True, exist_ok=True)

	data_dir = REPO_DIR / "bench" / "data" / domain_name

	cmd = [
	"python",
	str(REPO_DIR / "bench" / "scripts" / "score_predictions.py"),
	"--data_dir",
	str(data_dir),
	"--predictions",
	str(predictions_path),
	"--out_dir",
	str(out_dir),
	]

	if use_llm_judge:
	cmd += [
	"--use_llm_judge",
	"--judge_provider",
	"openai",
	"--judge_model",
	"gpt-4o",
	]

	env = os.environ.copy()
	proc = subprocess.run(
	cmd,
	cwd=str(REPO_DIR),
	env=env,
	text=True,
	stdout=subprocess.PIPE,
	stderr=subprocess.STDOUT,
	)

	if proc.returncode != 0:
	raise RuntimeError(f"Scoring failed:\n{proc.stdout}")

	summary_path = out_dir / "summary.json"
	if not summary_path.exists():
	raise RuntimeError(f"summary.json not found. Scorer output:\n{proc.stdout}")

	summary = json.loads(summary_path.read_text(encoding="utf-8"))
	metrics = extract_metrics(summary)

	return metrics, str(summary_path), proc.stdout


	def submit_result(predictions_file, method, backbone, domain, family, contact, code_url, use_llm_judge):
	if predictions_file is None:
	return "Please upload predictions.jsonl."

	if not method.strip():
	return "Please provide a method name."

	submit_id = f"{int(time.time())}-{uuid.uuid4().hex[:8]}"
	submit_dir = SUBMISSIONS_DIR / submit_id
	submit_dir.mkdir(parents=True, exist_ok=True)

	pred_path = submit_dir / "predictions.jsonl"
	shutil.copy(predictions_file.name, pred_path)

	try:
	metrics, summary_path, logs = run_scorer(pred_path, domain, use_llm_judge=use_llm_judge)
	except Exception as e:
	return f"Submission failed:\n{e}"

	row = {
	"submission_id": submit_id,
	"method": method.strip(),
	"backbone": backbone,
	"domain": domain,
	"family": family,
	"u": round(metrics["u"], 1),
	"a": round(metrics["a"], 1),
	"f": round(metrics["f"], 1),
	"mgs": round(metrics["mgs"], 1),
	"source": "external",
	"verified": False,
	"contact": contact.strip(),
	"code_url": code_url.strip(),
	"created_at": int(time.time()),
	}

	pending, sha = github_get_json_file(PENDING_PATH)
	pending.append(row)
	github_put_json_file(
	PENDING_PATH,
	pending,
	f"Add pending GateMem submission: {method.strip()}",
	sha=sha,
	)

	return (
	"Submitted and scored successfully.\n\n"
	f"Status: pending maintainer approval\n"
	f"Submission ID: {submit_id}\n\n"
	f"U={row['u']}, A={row['a']}, F={row['f']}, MGS={row['mgs']}"
	)


	def list_pending(password):
	if password != ADMIN_PASSWORD:
	return "Invalid admin password."

	pending, _ = github_get_json_file(PENDING_PATH)
	if not pending:
	return "No pending submissions."

	lines = []
	for item in pending:
	lines.append(
	f"{item['submission_id']} \| {item['method']} \| {item['backbone']} \| "
	f"{item['domain']} \| U={item['u']} A={item['a']} F={item['f']} MGS={item['mgs']}"
	)

	return "\n".join(lines)


	def approve_submission(password, submission_id):
	if password != ADMIN_PASSWORD:
	return "Invalid admin password."

	pending, pending_sha = github_get_json_file(PENDING_PATH)
	leaderboard, leaderboard_sha = github_get_json_file(LEADERBOARD_PATH)

	target = None
	remaining = []
	for item in pending:
	if item["submission_id"] == submission_id.strip():
	target = item
	else:
	remaining.append(item)

	if target is None:
	return f"Submission not found: {submission_id}"

	target["verified"] = True
	target["approved_at"] = int(time.time())

	leaderboard.append(target)

	github_put_json_file(
	LEADERBOARD_PATH,
	leaderboard,
	f"Approve GateMem leaderboard submission: {target['method']}",
	sha=leaderboard_sha,
	)
	github_put_json_file(
	PENDING_PATH,
	remaining,
	f"Remove approved pending submission: {target['method']}",
	sha=pending_sha,
	)

	return (
	f"Approved and added to leaderboard:\n"
	f"{target['method']} \| {target['backbone']} \| {target['domain']} \| "
	f"MGS={target['mgs']}"
	)


	with gr.Blocks(title="GateMem Result Submission") as demo:
	gr.Markdown("# GateMem Result Submission")
	gr.Markdown(
	"Upload `predictions.jsonl` generated by your method. "
	"The server scores it with the official GateMem evaluator and stores it as a pending submission."
	)

	with gr.Tab("Submit Result"):
	predictions_file = gr.File(label="predictions.jsonl", file_types=[".jsonl"])
	method = gr.Textbox(label="Method name", placeholder="e.g., MyMemoryAgent")
	backbone = gr.Dropdown(
	["GPT-5-mini", "GPT-4o-mini", "Gemini-2.5-Flash-Lite", "Other"],
	label="Backbone model",
	value="GPT-5-mini",
	)
	domain = gr.Dropdown(
	["Medical", "Office", "Education", "Household"],
	label="Domain",
	value="Medical",
	)
	family = gr.Dropdown(
	["Full-context", "RAG", "External memory", "Other"],
	label="Method family",
	value="Other",
	)
	contact = gr.Textbox(label="Contact email")
	code_url = gr.Textbox(label="Code URL / commit / artifact link")
	use_llm_judge = gr.Checkbox(
	label="Use LLM judge if server is configured",
	value=False,
	)
	submit_btn = gr.Button("Submit and Score", variant="primary")
	submit_out = gr.Textbox(label="Submission status", lines=8)

	submit_btn.click(
	submit_result,
	inputs=[predictions_file, method, backbone, domain, family, contact, code_url, use_llm_judge],
	outputs=submit_out,
	)

	with gr.Tab("Admin"):
	admin_password = gr.Textbox(label="Admin password", type="password")
	list_btn = gr.Button("List Pending Submissions")
	pending_out = gr.Textbox(label="Pending submissions", lines=12)

	submission_id = gr.Textbox(label="Submission ID to approve")
	approve_btn = gr.Button("Approve and Update Leaderboard", variant="primary")
	approve_out = gr.Textbox(label="Approval status", lines=6)

	list_btn.click(list_pending, inputs=[admin_password], outputs=pending_out)
	approve_btn.click(approve_submission, inputs=[admin_password, submission_id], outputs=approve_out)


	if __name__ == "__main__":
	ensure_repo()
	demo.launch()