Spaces:
Running
Running
Update src/agri_predict/scraper.py
Browse files- src/agri_predict/scraper.py +51 -68
src/agri_predict/scraper.py
CHANGED
|
@@ -19,89 +19,72 @@ logger.setLevel(logging.INFO)
|
|
| 19 |
|
| 20 |
|
| 21 |
class AgmarknetAPIClient:
|
| 22 |
-
"""Client for Agmarknet API."""
|
| 23 |
-
|
| 24 |
BASE_URL = "https://api.agmarknet.gov.in/v1/prices-and-arrivals/market-report/specific"
|
| 25 |
|
| 26 |
-
# Fixed
|
| 27 |
-
COMMODITY_GROUP_ID = 3
|
| 28 |
-
COMMODITY_ID = 11
|
| 29 |
INCLUDE_EXCEL = "false"
|
| 30 |
-
|
| 31 |
-
#
|
|
|
|
|
|
|
| 32 |
TIMEOUT = 30
|
| 33 |
-
|
| 34 |
def __init__(self):
|
| 35 |
-
"""Initialize API client."""
|
| 36 |
self.session = requests.Session()
|
| 37 |
-
logger.info("Agmarknet API client initialized")
|
| 38 |
-
|
| 39 |
-
def _log_api_call(self, date_str: str, url: str, status_code: int,
|
| 40 |
-
records_count: int = 0):
|
| 41 |
-
"""Log API call details.
|
| 42 |
-
|
| 43 |
-
Args:
|
| 44 |
-
date_str: Date string (YYYY-MM-DD)
|
| 45 |
-
url: Full URL called
|
| 46 |
-
status_code: HTTP status code
|
| 47 |
-
records_count: Number of records fetched
|
| 48 |
-
"""
|
| 49 |
logger.info(
|
| 50 |
f"API CALL | Date: {date_str} | Status: {status_code} | "
|
| 51 |
f"Records: {records_count} | URL: {url}"
|
| 52 |
)
|
| 53 |
-
|
| 54 |
-
def
|
| 55 |
-
"""Fetch
|
| 56 |
-
|
| 57 |
Args:
|
| 58 |
-
|
| 59 |
-
|
| 60 |
Returns:
|
| 61 |
-
|
| 62 |
"""
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
try:
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
data = response.json()
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
return data
|
| 86 |
-
else:
|
| 87 |
-
logger.error(
|
| 88 |
-
f"❌ API returned failure | Date: {date} | "
|
| 89 |
-
f"Message: {data.get('message', 'Unknown error')}"
|
| 90 |
-
)
|
| 91 |
-
return None
|
| 92 |
-
|
| 93 |
-
except requests.exceptions.Timeout:
|
| 94 |
-
logger.error(f"❌ Timeout error for date: {date}")
|
| 95 |
-
return None
|
| 96 |
-
except requests.exceptions.HTTPError as e:
|
| 97 |
-
logger.error(f"❌ HTTP error for date: {date} | Status: {e.response.status_code}")
|
| 98 |
-
return None
|
| 99 |
-
except requests.exceptions.RequestException as e:
|
| 100 |
-
logger.error(f"❌ Request error for date: {date} | Error: {str(e)}")
|
| 101 |
-
return None
|
| 102 |
-
except ValueError as e:
|
| 103 |
-
logger.error(f"❌ JSON decode error for date: {date} | Error: {str(e)}")
|
| 104 |
-
return None
|
| 105 |
|
| 106 |
def fetch_date_range(self, start_date: str, end_date: str) -> List[Dict[str, Any]]:
|
| 107 |
"""Fetch market data for a date range.
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
class AgmarknetAPIClient:
|
| 22 |
+
"""Client for Agmarknet API using ScraperAPI."""
|
| 23 |
+
|
| 24 |
BASE_URL = "https://api.agmarknet.gov.in/v1/prices-and-arrivals/market-report/specific"
|
| 25 |
|
| 26 |
+
# Fixed Parameters
|
| 27 |
+
COMMODITY_GROUP_ID = 3
|
| 28 |
+
COMMODITY_ID = 11
|
| 29 |
INCLUDE_EXCEL = "false"
|
| 30 |
+
|
| 31 |
+
SCRAPER_API_KEY = "bbbbde6b56c0fde1e2a61c914eb22d14" # <-- Add your key here
|
| 32 |
+
SCRAPER_API_URL = "http://api.scraperapi.com"
|
| 33 |
+
|
| 34 |
TIMEOUT = 30
|
| 35 |
+
|
| 36 |
def __init__(self):
|
|
|
|
| 37 |
self.session = requests.Session()
|
| 38 |
+
logger.info("Agmarknet API client initialized with ScraperAPI")
|
| 39 |
+
|
| 40 |
+
def _log_api_call(self, date_str: str, url: str, status_code: int, records_count: int = 0):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
logger.info(
|
| 42 |
f"API CALL | Date: {date_str} | Status: {status_code} | "
|
| 43 |
f"Records: {records_count} | URL: {url}"
|
| 44 |
)
|
| 45 |
+
|
| 46 |
+
def fetch_data(self, date_str: str):
|
| 47 |
+
"""Fetch data using ScraperAPI.
|
| 48 |
+
|
| 49 |
Args:
|
| 50 |
+
date_str: Date string (YYYY-MM-DD)
|
| 51 |
+
|
| 52 |
Returns:
|
| 53 |
+
JSON response from API
|
| 54 |
"""
|
| 55 |
+
# Original Agmarknet query params
|
| 56 |
+
query_params = {
|
| 57 |
+
"commodityGroupId": self.COMMODITY_GROUP_ID,
|
| 58 |
+
"commodityId": self.COMMODITY_ID,
|
| 59 |
+
"date": date_str,
|
| 60 |
+
"includeExcel": self.INCLUDE_EXCEL
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
original_url = f"{self.BASE_URL}?{urlencode(query_params)}"
|
| 64 |
+
|
| 65 |
+
# ScraperAPI wrapper URL
|
| 66 |
+
scraper_params = {
|
| 67 |
+
"api_key": self.SCRAPER_API_KEY,
|
| 68 |
+
"url": original_url,
|
| 69 |
+
"render": "false"
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
scraper_url = f"{self.SCRAPER_API_URL}?{urlencode(scraper_params)}"
|
| 73 |
+
|
| 74 |
try:
|
| 75 |
+
response = self.session.get(scraper_url, timeout=self.TIMEOUT)
|
| 76 |
+
status_code = response.status_code
|
| 77 |
+
|
|
|
|
| 78 |
data = response.json()
|
| 79 |
+
records_count = len(data.get("data", [])) if isinstance(data, dict) else 0
|
| 80 |
+
|
| 81 |
+
self._log_api_call(date_str, original_url, status_code, records_count)
|
| 82 |
+
|
| 83 |
+
return data
|
| 84 |
+
|
| 85 |
+
except Exception as e:
|
| 86 |
+
logger.error(f"ScraperAPI request failed for {date_str}: {str(e)}")
|
| 87 |
+
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
def fetch_date_range(self, start_date: str, end_date: str) -> List[Dict[str, Any]]:
|
| 90 |
"""Fetch market data for a date range.
|