exoplanet / download_data.py
Nur Arifin Akbar
Convert to Gradio app with pre-trained model
ba763c8
"""
Download NASA Exoplanet datasets from the NASA Exoplanet Archive
"""
import requests
import pandas as pd
import os
def download_dataset(table_name, filename):
"""Download dataset from NASA Exoplanet Archive"""
base_url = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync"
query = f"select * from {table_name}"
params = {
'query': query,
'format': 'csv'
}
print(f"Downloading {table_name}...")
response = requests.get(base_url, params=params, timeout=60)
if response.status_code == 200:
with open(filename, 'wb') as f:
f.write(response.content)
print(f"✓ Successfully downloaded {filename}")
# Display basic info
df = pd.read_csv(filename)
print(f" Shape: {df.shape}")
print(f" Columns: {len(df.columns)}")
print()
return df
else:
print(f"✗ Failed to download {table_name}: {response.status_code}")
return None
def main():
# Create data directory
os.makedirs('data', exist_ok=True)
datasets = [
('cumulative', 'data/kepler_koi.csv'), # Kepler cumulative table
('toi', 'data/tess_toi.csv'),
('k2pandc', 'data/k2_candidates.csv')
]
for table_name, filename in datasets:
df = download_dataset(table_name, filename)
if df is not None:
print(f"Sample of {filename}:")
print(df.head())
print("\n" + "="*80 + "\n")
if __name__ == "__main__":
main()