File size: 2,331 Bytes
9142902 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import requests
import pandas as pd
import os
import time
from data_layer.config import BASE_URL, API_KEY, AGRI_RESOURCE_ID
def fetch_agriculture_data(limit=500, retries=3, max_records=2000):
"""
Fetch agriculture data from data.gov.in API in chunks and save as CSV.
Handles rate limits and saves automatically into hybrid_dataset folder.
"""
os.makedirs("hybrid_dataset", exist_ok=True)
csv_path = "hybrid_dataset/agriculture_data.csv"
all_data = []
print("๐พ Starting Agriculture data fetch...")
offset = 0
total_fetched = 0
while total_fetched < max_records:
url = f"{BASE_URL}{AGRI_RESOURCE_ID}?api-key={API_KEY}&format=json&limit={limit}&offset={offset}"
for attempt in range(retries):
try:
response = requests.get(url, timeout=20)
response.raise_for_status()
data = response.json().get("records", [])
if not data:
print("โ
No more records found.")
break
df_chunk = pd.DataFrame(data)
all_data.append(df_chunk)
total_fetched += len(df_chunk)
offset += limit
print(f"โ
Chunk fetched: {len(df_chunk)} rows (Total: {total_fetched})")
# small delay to avoid rate limit
time.sleep(2)
break
except requests.exceptions.HTTPError as e:
if "429" in str(e):
print("โ ๏ธ Too Many Requests โ waiting 20 seconds...")
time.sleep(20)
elif "403" in str(e):
print("๐ซ Forbidden: Check your API key or URL in config.py")
return pd.DataFrame()
else:
print(f"โ ๏ธ Attempt {attempt+1} failed: {e}")
time.sleep(3)
else:
print("โ Max retries reached, skipping this chunk.")
break
if all_data:
final_df = pd.concat(all_data, ignore_index=True)
final_df.to_csv(csv_path, index=False)
print(f"โ
Agriculture data fetched & saved โ {csv_path} ({len(final_df)} rows total)")
return final_df
else:
print("โ No data fetched.")
return pd.DataFrame()
|