Spaces:
Sleeping
Sleeping
| """ | |
| Process and classify ICLR submissions using OpenReview API. | |
| This script processes ICLR submissions, classifies them into subdirectories | |
| based on decisions, extracts paper content into JSON format, and checks the | |
| validity of the processed papers. | |
| It includes three main functions: | |
| - classify_ICLR_submissions_into_subdirectories: Classifies papers into | |
| directories based on decisions. | |
| - process_submission: Processes each submission by extracting text and saving | |
| it as a JSON file. | |
| - check_processed_paper: Verifies if all processed papers are valid JSON files. | |
| """ | |
| import os | |
| import sys | |
| import traceback | |
| from collections import Counter | |
| from tqdm import tqdm | |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) | |
| from agentreview.arguments import parse_args | |
| from agentreview.utility.utils import print_colored | |
| decision_map = { | |
| # ICLR 2023 | |
| "Reject": "Reject", | |
| "Accept: poster": "Accept-poster", | |
| "Accept: notable-top-25%": "Accept-notable-top-25", | |
| "Accept: notable-top-5%": "Accept-notable-top-5", | |
| # ICLR 2022 | |
| "Accept (Poster)": "Accept-poster", | |
| "Accept (Oral)": "Accept-oral", | |
| "Accept (Spotlight)": "Accept-spotlight", | |
| # ICLR 2021 | |
| "Significant concerns (Do not publish)": "Significant-concerns", | |
| "Concerns raised (can publish with adjustment)": "Concerns-raised", | |
| # ICLR 2020 | |
| "Accept (Talk)": "Accept-oral", # We assume this signifies an oral presentation | |
| # ICLR 2018 | |
| "Invite to Workshop Track": "Reject" | |
| } | |
| def categorize_ICLR_submissions_into_subdirectories(): | |
| """Classifies ICLR submissions into subdirectories based on review decisions. | |
| This function iterates through the review notes and identifies the decision | |
| (recommendation or final decision) for each submission. It then moves the | |
| notes and their corresponding papers into directories based on the decision. | |
| Raises: | |
| AssertionError: If the line containing the decision does not have the | |
| expected format. | |
| """ | |
| note_dir = f"data/{args.conference}/notes" | |
| paper_dir = f"data/{args.conference}/paper" | |
| for note in os.listdir(note_dir): | |
| print(note) | |
| # Skip directories or irrelevant files | |
| if os.path.isdir(os.path.join(note_dir, note)) or ".DS_Store" in note: | |
| continue | |
| note_path = os.path.join(note_dir, note) | |
| lines = open(note_path, "r").readlines() | |
| decision = None | |
| for line in tqdm(lines): | |
| if "\"recommendation\"" in line: | |
| assert Counter(line)["\""] == 4, "Unexpected format in recommendation line." | |
| print(line) | |
| decision = line.split("\"recommendation\"")[1].split("\"")[1] | |
| break | |
| elif "\"decision\"" in line: | |
| assert Counter(line)["\""] == 4, "Unexpected format in decision line." | |
| print(line) | |
| try: | |
| decision = line.split("\"decision\"")[1].split("\"")[1] | |
| break | |
| except Exception: | |
| traceback.print_exc() | |
| print_colored(line, 'red') | |
| if decision is None: | |
| # Possibly withdrawn papers | |
| print_colored(f"Could not find decision for {note}", "red") | |
| continue | |
| os.makedirs(os.path.join(note_dir, decision_map[decision]), exist_ok=True) | |
| os.makedirs(os.path.join(paper_dir, decision_map[decision]), exist_ok=True) | |
| os.rename(note_path, os.path.join(note_dir, decision_map[decision], note)) | |
| paper_id = int(note.split(".json")[0]) | |
| paper_path = os.path.join(paper_dir, f"{paper_id}.pdf") | |
| os.rename(paper_path, os.path.join(paper_dir, decision_map[decision], f"{paper_id}.pdf")) | |
| if __name__ == "__main__": | |
| args = parse_args() | |
| # Extract contents of each paper into a JSON file | |
| categorize_ICLR_submissions_into_subdirectories() |