krushimitravit commited on
Commit
d271986
·
verified ·
1 Parent(s): 34a7ef1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -27
app.py CHANGED
@@ -2,6 +2,7 @@ from flask import Flask, render_template, request, jsonify, Response
2
  import requests
3
  from bs4 import BeautifulSoup
4
  from flask import stream_with_context
 
5
 
6
  app = Flask(__name__)
7
 
@@ -93,20 +94,27 @@ def build_image_url(crop, year, pest, param, week):
93
  def image_exists(url):
94
  """Return True if the remote image URL returns a valid image (HTTP 200 + image content-type)."""
95
  try:
96
- resp = requests.head(url, timeout=6, allow_redirects=True)
97
  if resp.status_code == 200:
98
- ct = resp.headers.get("Content-Type", "")
99
- return ct.startswith("image/")
100
- # Some servers don't support HEAD; fall back to a small GET
101
  if resp.status_code in (405, 403):
102
- resp = requests.get(url, timeout=8, stream=True)
103
- ct = resp.headers.get("Content-Type", "")
104
- return resp.status_code == 200 and ct.startswith("image/")
105
  except Exception:
106
  pass
107
  return False
108
 
109
 
 
 
 
 
 
 
 
 
110
  @app.route('/')
111
  def index():
112
  crop = request.args.get('crop', '')
@@ -141,7 +149,10 @@ def index():
141
 
142
  @app.route('/fetch_weeks')
143
  def fetch_weeks():
144
- """Return only weeks for which at least one param image actually exists."""
 
 
 
145
  crop = request.args.get('crop', '')
146
  pest = request.args.get('pest', '')
147
  year = request.args.get('year', '')
@@ -152,36 +163,40 @@ def fetch_weeks():
152
  ext_crop = CROP_MAPPING.get(crop, '')
153
  ext_pest = PEST_MAPPING.get(crop, {}).get(pest, '')
154
 
155
- # Try to scrape available weeks from the upstream page first
156
  candidate_weeks = []
157
  try:
158
  payload = {"country": ext_crop, "city": ext_pest, "sowing": year}
159
- response = requests.get(
160
  "http://www.icar-crida.res.in:8080/naip/gismaps.jsp",
161
- params=payload, timeout=10
162
  )
163
- soup = BeautifulSoup(response.text, 'html.parser')
164
- week_options = soup.select('select[name="week"] option')
165
  candidate_weeks = [
166
- opt.get('value') for opt in week_options
167
  if opt.get('value') and "Select" not in opt.get('value', '')
168
  ]
169
  except Exception:
170
  pass
171
 
172
- # Fall back to weeks 1-52 if scraping failed
173
  if not candidate_weeks:
174
  candidate_weeks = [str(i) for i in range(1, 53)]
175
 
176
- # Filter: keep only weeks where at least one param image exists
177
  available_weeks = []
178
- for week in candidate_weeks:
179
- for param_code in PARAMS.keys():
180
- url = build_image_url(crop, year, pest, param_code, week)
181
- if image_exists(url):
182
- available_weeks.append(week)
183
- break # No need to check all params for this week
184
-
 
 
 
 
 
 
185
  return jsonify({"weeks": available_weeks})
186
 
187
 
@@ -189,7 +204,8 @@ def fetch_weeks():
189
  def check_availability():
190
  """
191
  Check which params are available for a given crop/pest/year/week combination.
192
- Returns a dict of {param_code: bool} so the frontend can disable unavailable options.
 
193
  """
194
  crop = request.args.get('crop', '')
195
  pest = request.args.get('pest', '')
@@ -199,10 +215,14 @@ def check_availability():
199
  if not (crop and pest and year and week):
200
  return jsonify({"availability": {}})
201
 
202
- availability = {}
203
- for param_code in PARAMS.keys():
204
  url = build_image_url(crop, year, pest, param_code, week)
205
- availability[param_code] = image_exists(url)
 
 
 
 
 
206
 
207
  return jsonify({"availability": availability})
208
 
 
2
  import requests
3
  from bs4 import BeautifulSoup
4
  from flask import stream_with_context
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
 
7
  app = Flask(__name__)
8
 
 
94
  def image_exists(url):
95
  """Return True if the remote image URL returns a valid image (HTTP 200 + image content-type)."""
96
  try:
97
+ resp = requests.head(url, timeout=4, allow_redirects=True)
98
  if resp.status_code == 200:
99
+ return resp.headers.get("Content-Type", "").startswith("image/")
100
+ # Some servers don't support HEAD — fall back to a streaming GET
 
101
  if resp.status_code in (405, 403):
102
+ resp = requests.get(url, timeout=4, stream=True)
103
+ resp.close()
104
+ return resp.status_code == 200 and resp.headers.get("Content-Type", "").startswith("image/")
105
  except Exception:
106
  pass
107
  return False
108
 
109
 
110
+ def week_has_any_data(week, crop, year, pest):
111
+ """Return week string if any param image exists for it, else None."""
112
+ for param_code in PARAMS:
113
+ if image_exists(build_image_url(crop, year, pest, param_code, week)):
114
+ return week
115
+ return None
116
+
117
+
118
  @app.route('/')
119
  def index():
120
  crop = request.args.get('crop', '')
 
149
 
150
  @app.route('/fetch_weeks')
151
  def fetch_weeks():
152
+ """
153
+ Return weeks that have at least one available param image.
154
+ Checks are run in parallel to stay well within the gunicorn timeout.
155
+ """
156
  crop = request.args.get('crop', '')
157
  pest = request.args.get('pest', '')
158
  year = request.args.get('year', '')
 
163
  ext_crop = CROP_MAPPING.get(crop, '')
164
  ext_pest = PEST_MAPPING.get(crop, {}).get(pest, '')
165
 
166
+ # Step 1: scrape candidate weeks from upstream (fast — single request)
167
  candidate_weeks = []
168
  try:
169
  payload = {"country": ext_crop, "city": ext_pest, "sowing": year}
170
+ resp = requests.get(
171
  "http://www.icar-crida.res.in:8080/naip/gismaps.jsp",
172
+ params=payload, timeout=8
173
  )
174
+ soup = BeautifulSoup(resp.text, 'html.parser')
 
175
  candidate_weeks = [
176
+ opt.get('value') for opt in soup.select('select[name="week"] option')
177
  if opt.get('value') and "Select" not in opt.get('value', '')
178
  ]
179
  except Exception:
180
  pass
181
 
 
182
  if not candidate_weeks:
183
  candidate_weeks = [str(i) for i in range(1, 53)]
184
 
185
+ # Step 2: check all candidate weeks in parallel (max 20 threads, 4s per request)
186
  available_weeks = []
187
+ with ThreadPoolExecutor(max_workers=20) as executor:
188
+ futures = {
189
+ executor.submit(week_has_any_data, week, crop, year, pest): week
190
+ for week in candidate_weeks
191
+ }
192
+ found = set()
193
+ for future in as_completed(futures):
194
+ result = future.result()
195
+ if result is not None:
196
+ found.add(result)
197
+
198
+ # Preserve original order
199
+ available_weeks = [w for w in candidate_weeks if w in found]
200
  return jsonify({"weeks": available_weeks})
201
 
202
 
 
204
  def check_availability():
205
  """
206
  Check which params are available for a given crop/pest/year/week combination.
207
+ All 5 param checks run in parallel.
208
+ Returns a dict of {param_code: bool}.
209
  """
210
  crop = request.args.get('crop', '')
211
  pest = request.args.get('pest', '')
 
215
  if not (crop and pest and year and week):
216
  return jsonify({"availability": {}})
217
 
218
+ def check_param(param_code):
 
219
  url = build_image_url(crop, year, pest, param_code, week)
220
+ return param_code, image_exists(url)
221
+
222
+ availability = {}
223
+ with ThreadPoolExecutor(max_workers=5) as executor:
224
+ for param_code, exists in executor.map(lambda p: check_param(p), PARAMS.keys()):
225
+ availability[param_code] = exists
226
 
227
  return jsonify({"availability": availability})
228