ThejasRao commited on
Commit
328fcfe
·
verified ·
1 Parent(s): a820271

Update src/agri_predict/scraper.py

Browse files
Files changed (1) hide show
  1. src/agri_predict/scraper.py +51 -68
src/agri_predict/scraper.py CHANGED
@@ -19,89 +19,72 @@ logger.setLevel(logging.INFO)
19
 
20
 
21
  class AgmarknetAPIClient:
22
- """Client for Agmarknet API."""
23
-
24
  BASE_URL = "https://api.agmarknet.gov.in/v1/prices-and-arrivals/market-report/specific"
25
 
26
- # Fixed parameters
27
- COMMODITY_GROUP_ID = 3 # Commodity group for Sesamum
28
- COMMODITY_ID = 11 # Sesamum(Sesame,Gingelly,Til)
29
  INCLUDE_EXCEL = "false"
30
-
31
- # Timeout in seconds
 
 
32
  TIMEOUT = 30
33
-
34
  def __init__(self):
35
- """Initialize API client."""
36
  self.session = requests.Session()
37
- logger.info("Agmarknet API client initialized")
38
-
39
- def _log_api_call(self, date_str: str, url: str, status_code: int,
40
- records_count: int = 0):
41
- """Log API call details.
42
-
43
- Args:
44
- date_str: Date string (YYYY-MM-DD)
45
- url: Full URL called
46
- status_code: HTTP status code
47
- records_count: Number of records fetched
48
- """
49
  logger.info(
50
  f"API CALL | Date: {date_str} | Status: {status_code} | "
51
  f"Records: {records_count} | URL: {url}"
52
  )
53
-
54
- def fetch_market_data(self, date: str) -> Optional[Dict[str, Any]]:
55
- """Fetch market data for a specific date.
56
-
57
  Args:
58
- date: Date string in format YYYY-MM-DD
59
-
60
  Returns:
61
- API response dictionary or None if error
62
  """
63
- url = (
64
- f"{self.BASE_URL}?date={date}&"
65
- f"commodityGroupId={self.COMMODITY_GROUP_ID}&"
66
- f"commodityId={self.COMMODITY_ID}&"
67
- f"includeExcel={self.INCLUDE_EXCEL}"
68
- )
69
-
 
 
 
 
 
 
 
 
 
 
 
 
70
  try:
71
- logger.info(f"Fetching data for date: {date}")
72
- response = self.session.get(url, timeout=self.TIMEOUT)
73
- response.raise_for_status()
74
-
75
  data = response.json()
76
-
77
- if data.get("success"):
78
- # Count total records
79
- total_records = self._count_records(data)
80
- self._log_api_call(date, url, response.status_code, total_records)
81
- logger.info(
82
- f"✅ Successfully fetched data | Date: {date} | "
83
- f"Total records: {total_records}"
84
- )
85
- return data
86
- else:
87
- logger.error(
88
- f"❌ API returned failure | Date: {date} | "
89
- f"Message: {data.get('message', 'Unknown error')}"
90
- )
91
- return None
92
-
93
- except requests.exceptions.Timeout:
94
- logger.error(f"❌ Timeout error for date: {date}")
95
- return None
96
- except requests.exceptions.HTTPError as e:
97
- logger.error(f"❌ HTTP error for date: {date} | Status: {e.response.status_code}")
98
- return None
99
- except requests.exceptions.RequestException as e:
100
- logger.error(f"❌ Request error for date: {date} | Error: {str(e)}")
101
- return None
102
- except ValueError as e:
103
- logger.error(f"❌ JSON decode error for date: {date} | Error: {str(e)}")
104
- return None
105
 
106
  def fetch_date_range(self, start_date: str, end_date: str) -> List[Dict[str, Any]]:
107
  """Fetch market data for a date range.
 
19
 
20
 
21
  class AgmarknetAPIClient:
22
+ """Client for Agmarknet API using ScraperAPI."""
23
+
24
  BASE_URL = "https://api.agmarknet.gov.in/v1/prices-and-arrivals/market-report/specific"
25
 
26
+ # Fixed Parameters
27
+ COMMODITY_GROUP_ID = 3
28
+ COMMODITY_ID = 11
29
  INCLUDE_EXCEL = "false"
30
+
31
+ SCRAPER_API_KEY = "bbbbde6b56c0fde1e2a61c914eb22d14" # <-- Add your key here
32
+ SCRAPER_API_URL = "http://api.scraperapi.com"
33
+
34
  TIMEOUT = 30
35
+
36
  def __init__(self):
 
37
  self.session = requests.Session()
38
+ logger.info("Agmarknet API client initialized with ScraperAPI")
39
+
40
+ def _log_api_call(self, date_str: str, url: str, status_code: int, records_count: int = 0):
 
 
 
 
 
 
 
 
 
41
  logger.info(
42
  f"API CALL | Date: {date_str} | Status: {status_code} | "
43
  f"Records: {records_count} | URL: {url}"
44
  )
45
+
46
+ def fetch_data(self, date_str: str):
47
+ """Fetch data using ScraperAPI.
48
+
49
  Args:
50
+ date_str: Date string (YYYY-MM-DD)
51
+
52
  Returns:
53
+ JSON response from API
54
  """
55
+ # Original Agmarknet query params
56
+ query_params = {
57
+ "commodityGroupId": self.COMMODITY_GROUP_ID,
58
+ "commodityId": self.COMMODITY_ID,
59
+ "date": date_str,
60
+ "includeExcel": self.INCLUDE_EXCEL
61
+ }
62
+
63
+ original_url = f"{self.BASE_URL}?{urlencode(query_params)}"
64
+
65
+ # ScraperAPI wrapper URL
66
+ scraper_params = {
67
+ "api_key": self.SCRAPER_API_KEY,
68
+ "url": original_url,
69
+ "render": "false"
70
+ }
71
+
72
+ scraper_url = f"{self.SCRAPER_API_URL}?{urlencode(scraper_params)}"
73
+
74
  try:
75
+ response = self.session.get(scraper_url, timeout=self.TIMEOUT)
76
+ status_code = response.status_code
77
+
 
78
  data = response.json()
79
+ records_count = len(data.get("data", [])) if isinstance(data, dict) else 0
80
+
81
+ self._log_api_call(date_str, original_url, status_code, records_count)
82
+
83
+ return data
84
+
85
+ except Exception as e:
86
+ logger.error(f"ScraperAPI request failed for {date_str}: {str(e)}")
87
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  def fetch_date_range(self, start_date: str, end_date: str) -> List[Dict[str, Any]]:
90
  """Fetch market data for a date range.