Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Upload missing File Search predictions and link them to existing results. | |
| Usage: | |
| export HF_TOKEN=... | |
| python streamlit_app/eval/link_file_search_predictions.py --apply | |
| """ | |
| import argparse | |
| import json | |
| import os | |
| from pathlib import Path | |
| from huggingface_hub import HfApi, hf_hub_download | |
| RESULTS_REPO = "agentic-document-ai/backend-results" | |
| TOKEN = os.environ.get("HF_TOKEN") | |
| BASE_DIR = Path(__file__).resolve().parents[2] # Project root | |
| FILE_SEARCH_DIR = BASE_DIR / "file_search_results" | |
| # Map missing results -> local prediction file | |
| MISSING_RESULTS = { | |
| "Google/Gemini_2.5_Flash_with_File_Search_results_20260103_221253.json": "gemini-2.5-flash.jsonl", | |
| "Google/Gemini_2.5_Pro_with_File_Search_results_20260103_221943.json": "gemini-2.5-pro.jsonl", | |
| "Google/Gemini_3.0_Pro_(Preview)_with_File_Search_results_20260104_120431.json": "gemini-3-pro-preview.jsonl", | |
| "OpenAI/GPT-5.2_(2025-12-11)_with_File_Search_results_20260104_121551.json": "gpt-5.2-2025-12-11.jsonl", | |
| "OpenAI/GPT-5_Mini_(2025-08-07)_with_File_Search_results_20260104_122026.json": "gpt-5-mini-2025-08-07.jsonl", | |
| } | |
| def _pred_path_from_result(result_path: str) -> str: | |
| # {org}/{model}_results_{ts}.json -> {org}/{model}_predictions_{ts}.jsonl | |
| base, ts = result_path.rsplit("_results_", 1) | |
| ts = ts.replace(".json", "") | |
| return f"{base}_predictions_{ts}.jsonl" | |
| def main() -> int: | |
| parser = argparse.ArgumentParser(description="Upload file_search_results predictions and link them to results.") | |
| parser.add_argument("--apply", action="store_true", help="Apply uploads/updates (default: dry-run)") | |
| args = parser.parse_args() | |
| if not FILE_SEARCH_DIR.exists(): | |
| raise FileNotFoundError(f"Missing directory: {FILE_SEARCH_DIR}") | |
| api = HfApi(token=TOKEN) | |
| actions = [] | |
| for result_path, local_name in MISSING_RESULTS.items(): | |
| local_file = FILE_SEARCH_DIR / local_name | |
| if not local_file.exists(): | |
| raise FileNotFoundError(f"Missing local prediction file: {local_file}") | |
| pred_path = _pred_path_from_result(result_path) | |
| actions.append((result_path, pred_path, local_file)) | |
| print(f"Planned uploads: {len(actions)}") | |
| for result_path, pred_path, local_file in actions: | |
| print(f"- {local_file.name} -> {pred_path}") | |
| if not args.apply: | |
| print("\nDry-run only. Re-run with --apply to execute.") | |
| return 0 | |
| for result_path, pred_path, local_file in actions: | |
| # Upload predictions file | |
| api.upload_file( | |
| path_or_fileobj=str(local_file), | |
| path_in_repo=pred_path, | |
| repo_id=RESULTS_REPO, | |
| repo_type="dataset", | |
| token=TOKEN, | |
| commit_message=f"Add predictions for {pred_path}", | |
| ) | |
| # Update results JSON with linkage fields | |
| local_result = hf_hub_download( | |
| repo_id=RESULTS_REPO, | |
| filename=result_path, | |
| repo_type="dataset", | |
| token=TOKEN, | |
| ) | |
| with open(local_result) as f: | |
| data = json.load(f) | |
| data["source_predictions_file"] = pred_path | |
| data["result_file_path"] = result_path | |
| api.upload_file( | |
| path_or_fileobj=json.dumps(data, indent=2).encode("utf-8"), | |
| path_in_repo=result_path, | |
| repo_id=RESULTS_REPO, | |
| repo_type="dataset", | |
| token=TOKEN, | |
| commit_message=f"Link result to predictions: {result_path}", | |
| ) | |
| print("Done.") | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |