File size: 2,331 Bytes
9142902
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import requests
import pandas as pd
import os
import time
from data_layer.config import BASE_URL, API_KEY, AGRI_RESOURCE_ID

def fetch_agriculture_data(limit=500, retries=3, max_records=2000):
    """
    Fetch agriculture data from data.gov.in API in chunks and save as CSV.
    Handles rate limits and saves automatically into hybrid_dataset folder.
    """

    os.makedirs("hybrid_dataset", exist_ok=True)
    csv_path = "hybrid_dataset/agriculture_data.csv"
    all_data = []

    print("๐ŸŒพ Starting Agriculture data fetch...")

    offset = 0
    total_fetched = 0

    while total_fetched < max_records:
        url = f"{BASE_URL}{AGRI_RESOURCE_ID}?api-key={API_KEY}&format=json&limit={limit}&offset={offset}"

        for attempt in range(retries):
            try:
                response = requests.get(url, timeout=20)
                response.raise_for_status()

                data = response.json().get("records", [])
                if not data:
                    print("โœ… No more records found.")
                    break

                df_chunk = pd.DataFrame(data)
                all_data.append(df_chunk)

                total_fetched += len(df_chunk)
                offset += limit

                print(f"โœ… Chunk fetched: {len(df_chunk)} rows (Total: {total_fetched})")

                # small delay to avoid rate limit
                time.sleep(2)
                break

            except requests.exceptions.HTTPError as e:
                if "429" in str(e):
                    print("โš ๏ธ Too Many Requests โ€” waiting 20 seconds...")
                    time.sleep(20)
                elif "403" in str(e):
                    print("๐Ÿšซ Forbidden: Check your API key or URL in config.py")
                    return pd.DataFrame()
                else:
                    print(f"โš ๏ธ Attempt {attempt+1} failed: {e}")
                    time.sleep(3)

        else:
            print("โŒ Max retries reached, skipping this chunk.")
            break

    if all_data:
        final_df = pd.concat(all_data, ignore_index=True)
        final_df.to_csv(csv_path, index=False)
        print(f"โœ… Agriculture data fetched & saved โ†’ {csv_path} ({len(final_df)} rows total)")
        return final_df
    else:
        print("โŒ No data fetched.")
        return pd.DataFrame()