Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| import os | |
| import subprocess | |
| import sys | |
| from dotenv import load_dotenv | |
| def main(): | |
| # Load environment variables from .env file | |
| load_dotenv() | |
| # Configuration | |
| output_dir = "data" | |
| # Top AI/ML topics identified via API search | |
| topic_ids = [ | |
| "T12072", # Machine Learning and Algorithms | |
| "T11948", # Deep Learning and Neural Networks | |
| ] | |
| topic_filter = "|".join(topic_ids) | |
| pub_year = "2018-2024" | |
| min_citations = "20" | |
| # Check for API key | |
| api_key = os.environ.get("OPENALEX_API_KEY") | |
| if not api_key: | |
| print("Error: OPENALEX_API_KEY environment variable is not set.") | |
| print("Please get your API key from https://openalex.org/settings/api and set it:") | |
| print("export OPENALEX_API_KEY='your-key-here'") | |
| sys.exit(1) | |
| # Ensure output directory exists | |
| os.makedirs(output_dir, exist_ok=True) | |
| filter_str = f"topics.id:{topic_filter},publication_year:{pub_year},cited_by_count:>{min_citations}" | |
| print(f"π Starting OpenAlex download for AI/ML papers...") | |
| print(f"π Output directory: {output_dir}") | |
| print(f"π Filter: {filter_str}") | |
| # Build the command | |
| command = [ | |
| "openalex", "download", | |
| "--api-key", api_key, | |
| "--output", output_dir, | |
| "--filter", filter_str, | |
| "--resume", | |
| "--workers", "10" | |
| ] | |
| try: | |
| # Run the command and pipe output to terminal | |
| process = subprocess.Popen( | |
| command, | |
| stdout=subprocess.PIPE, | |
| stderr=subprocess.STDOUT, | |
| text=True, | |
| bufsize=1 | |
| ) | |
| # Print progress in real-time | |
| for line in process.stdout: | |
| print(line, end="") | |
| process.wait() | |
| if process.returncode == 0: | |
| print("\nβ Download completed successfully.") | |
| else: | |
| print(f"\nβ Download failed with return code {process.returncode}.") | |
| print("You can run this script again to resume from the last checkpoint.") | |
| except KeyboardInterrupt: | |
| print("\nπ Download interrupted by user. Run again to resume.") | |
| sys.exit(1) | |
| except Exception as e: | |
| print(f"\nπ₯ An error occurred: {e}") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() | |