| import json |
| import requests |
| import csv |
|
|
| offset = 0 |
| limit = 1000 |
| max_count = 4944 |
|
|
| base_url = 'https://api.openreview.net' |
|
|
| all_papers = [] |
| while offset < max_count: |
| limit = min(limit, max_count - offset) |
|
|
| print(offset, limit) |
| url = base_url + f"/notes?details=invitation%2Coriginal&offset={offset}&limit={limit}&invitation=ICLR.cc%2F2023%2FConference%2F-%2FBlind_Submission" |
|
|
| response = requests.get(url) |
| papers = json.loads(response.text)['notes'] |
| all_papers += papers |
|
|
| offset += limit |
|
|
|
|
|
|
| with open('iclr_submissions.csv', 'w', encoding='UTF8', newline='') as f: |
| header = ['title', 'url', 'pdf', 'tldr', 'abstract', 'keywords'] |
| writer = csv.writer(f) |
| writer.writerow(header) |
|
|
| for paper in all_papers: |
| content = paper['content'] |
|
|
| title = content['title'] |
| url = f'https://openreview.net/forum?id={paper["forum"]}' |
| pdf = f'https://openreview.net/pdf?id={paper["forum"]}' |
| tldr = content.get('TL;DR', '') |
| abstract = content['abstract'] |
| keywords = ', '.join(content['keywords']) |
|
|
| writer.writerow([title, url, pdf, tldr, abstract, keywords]) |