Update app.py
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@ from flask import Flask, render_template, request, jsonify, Response
|
|
| 2 |
import requests
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
from flask import stream_with_context
|
|
|
|
| 5 |
|
| 6 |
app = Flask(__name__)
|
| 7 |
|
|
@@ -93,20 +94,27 @@ def build_image_url(crop, year, pest, param, week):
|
|
| 93 |
def image_exists(url):
|
| 94 |
"""Return True if the remote image URL returns a valid image (HTTP 200 + image content-type)."""
|
| 95 |
try:
|
| 96 |
-
resp = requests.head(url, timeout=
|
| 97 |
if resp.status_code == 200:
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
# Some servers don't support HEAD; fall back to a small GET
|
| 101 |
if resp.status_code in (405, 403):
|
| 102 |
-
resp = requests.get(url, timeout=
|
| 103 |
-
|
| 104 |
-
return resp.status_code == 200 and
|
| 105 |
except Exception:
|
| 106 |
pass
|
| 107 |
return False
|
| 108 |
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
@app.route('/')
|
| 111 |
def index():
|
| 112 |
crop = request.args.get('crop', '')
|
|
@@ -141,7 +149,10 @@ def index():
|
|
| 141 |
|
| 142 |
@app.route('/fetch_weeks')
|
| 143 |
def fetch_weeks():
|
| 144 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 145 |
crop = request.args.get('crop', '')
|
| 146 |
pest = request.args.get('pest', '')
|
| 147 |
year = request.args.get('year', '')
|
|
@@ -152,36 +163,40 @@ def fetch_weeks():
|
|
| 152 |
ext_crop = CROP_MAPPING.get(crop, '')
|
| 153 |
ext_pest = PEST_MAPPING.get(crop, {}).get(pest, '')
|
| 154 |
|
| 155 |
-
#
|
| 156 |
candidate_weeks = []
|
| 157 |
try:
|
| 158 |
payload = {"country": ext_crop, "city": ext_pest, "sowing": year}
|
| 159 |
-
|
| 160 |
"http://www.icar-crida.res.in:8080/naip/gismaps.jsp",
|
| 161 |
-
params=payload, timeout=
|
| 162 |
)
|
| 163 |
-
soup = BeautifulSoup(
|
| 164 |
-
week_options = soup.select('select[name="week"] option')
|
| 165 |
candidate_weeks = [
|
| 166 |
-
opt.get('value') for opt in
|
| 167 |
if opt.get('value') and "Select" not in opt.get('value', '')
|
| 168 |
]
|
| 169 |
except Exception:
|
| 170 |
pass
|
| 171 |
|
| 172 |
-
# Fall back to weeks 1-52 if scraping failed
|
| 173 |
if not candidate_weeks:
|
| 174 |
candidate_weeks = [str(i) for i in range(1, 53)]
|
| 175 |
|
| 176 |
-
#
|
| 177 |
available_weeks = []
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
return jsonify({"weeks": available_weeks})
|
| 186 |
|
| 187 |
|
|
@@ -189,7 +204,8 @@ def fetch_weeks():
|
|
| 189 |
def check_availability():
|
| 190 |
"""
|
| 191 |
Check which params are available for a given crop/pest/year/week combination.
|
| 192 |
-
|
|
|
|
| 193 |
"""
|
| 194 |
crop = request.args.get('crop', '')
|
| 195 |
pest = request.args.get('pest', '')
|
|
@@ -199,10 +215,14 @@ def check_availability():
|
|
| 199 |
if not (crop and pest and year and week):
|
| 200 |
return jsonify({"availability": {}})
|
| 201 |
|
| 202 |
-
|
| 203 |
-
for param_code in PARAMS.keys():
|
| 204 |
url = build_image_url(crop, year, pest, param_code, week)
|
| 205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
return jsonify({"availability": availability})
|
| 208 |
|
|
|
|
| 2 |
import requests
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
from flask import stream_with_context
|
| 5 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 6 |
|
| 7 |
app = Flask(__name__)
|
| 8 |
|
|
|
|
| 94 |
def image_exists(url):
|
| 95 |
"""Return True if the remote image URL returns a valid image (HTTP 200 + image content-type)."""
|
| 96 |
try:
|
| 97 |
+
resp = requests.head(url, timeout=4, allow_redirects=True)
|
| 98 |
if resp.status_code == 200:
|
| 99 |
+
return resp.headers.get("Content-Type", "").startswith("image/")
|
| 100 |
+
# Some servers don't support HEAD — fall back to a streaming GET
|
|
|
|
| 101 |
if resp.status_code in (405, 403):
|
| 102 |
+
resp = requests.get(url, timeout=4, stream=True)
|
| 103 |
+
resp.close()
|
| 104 |
+
return resp.status_code == 200 and resp.headers.get("Content-Type", "").startswith("image/")
|
| 105 |
except Exception:
|
| 106 |
pass
|
| 107 |
return False
|
| 108 |
|
| 109 |
|
| 110 |
+
def week_has_any_data(week, crop, year, pest):
|
| 111 |
+
"""Return week string if any param image exists for it, else None."""
|
| 112 |
+
for param_code in PARAMS:
|
| 113 |
+
if image_exists(build_image_url(crop, year, pest, param_code, week)):
|
| 114 |
+
return week
|
| 115 |
+
return None
|
| 116 |
+
|
| 117 |
+
|
| 118 |
@app.route('/')
|
| 119 |
def index():
|
| 120 |
crop = request.args.get('crop', '')
|
|
|
|
| 149 |
|
| 150 |
@app.route('/fetch_weeks')
|
| 151 |
def fetch_weeks():
|
| 152 |
+
"""
|
| 153 |
+
Return weeks that have at least one available param image.
|
| 154 |
+
Checks are run in parallel to stay well within the gunicorn timeout.
|
| 155 |
+
"""
|
| 156 |
crop = request.args.get('crop', '')
|
| 157 |
pest = request.args.get('pest', '')
|
| 158 |
year = request.args.get('year', '')
|
|
|
|
| 163 |
ext_crop = CROP_MAPPING.get(crop, '')
|
| 164 |
ext_pest = PEST_MAPPING.get(crop, {}).get(pest, '')
|
| 165 |
|
| 166 |
+
# Step 1: scrape candidate weeks from upstream (fast — single request)
|
| 167 |
candidate_weeks = []
|
| 168 |
try:
|
| 169 |
payload = {"country": ext_crop, "city": ext_pest, "sowing": year}
|
| 170 |
+
resp = requests.get(
|
| 171 |
"http://www.icar-crida.res.in:8080/naip/gismaps.jsp",
|
| 172 |
+
params=payload, timeout=8
|
| 173 |
)
|
| 174 |
+
soup = BeautifulSoup(resp.text, 'html.parser')
|
|
|
|
| 175 |
candidate_weeks = [
|
| 176 |
+
opt.get('value') for opt in soup.select('select[name="week"] option')
|
| 177 |
if opt.get('value') and "Select" not in opt.get('value', '')
|
| 178 |
]
|
| 179 |
except Exception:
|
| 180 |
pass
|
| 181 |
|
|
|
|
| 182 |
if not candidate_weeks:
|
| 183 |
candidate_weeks = [str(i) for i in range(1, 53)]
|
| 184 |
|
| 185 |
+
# Step 2: check all candidate weeks in parallel (max 20 threads, 4s per request)
|
| 186 |
available_weeks = []
|
| 187 |
+
with ThreadPoolExecutor(max_workers=20) as executor:
|
| 188 |
+
futures = {
|
| 189 |
+
executor.submit(week_has_any_data, week, crop, year, pest): week
|
| 190 |
+
for week in candidate_weeks
|
| 191 |
+
}
|
| 192 |
+
found = set()
|
| 193 |
+
for future in as_completed(futures):
|
| 194 |
+
result = future.result()
|
| 195 |
+
if result is not None:
|
| 196 |
+
found.add(result)
|
| 197 |
+
|
| 198 |
+
# Preserve original order
|
| 199 |
+
available_weeks = [w for w in candidate_weeks if w in found]
|
| 200 |
return jsonify({"weeks": available_weeks})
|
| 201 |
|
| 202 |
|
|
|
|
| 204 |
def check_availability():
|
| 205 |
"""
|
| 206 |
Check which params are available for a given crop/pest/year/week combination.
|
| 207 |
+
All 5 param checks run in parallel.
|
| 208 |
+
Returns a dict of {param_code: bool}.
|
| 209 |
"""
|
| 210 |
crop = request.args.get('crop', '')
|
| 211 |
pest = request.args.get('pest', '')
|
|
|
|
| 215 |
if not (crop and pest and year and week):
|
| 216 |
return jsonify({"availability": {}})
|
| 217 |
|
| 218 |
+
def check_param(param_code):
|
|
|
|
| 219 |
url = build_image_url(crop, year, pest, param_code, week)
|
| 220 |
+
return param_code, image_exists(url)
|
| 221 |
+
|
| 222 |
+
availability = {}
|
| 223 |
+
with ThreadPoolExecutor(max_workers=5) as executor:
|
| 224 |
+
for param_code, exists in executor.map(lambda p: check_param(p), PARAMS.keys()):
|
| 225 |
+
availability[param_code] = exists
|
| 226 |
|
| 227 |
return jsonify({"availability": availability})
|
| 228 |
|