Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,6 +15,7 @@ from typing import Optional, List, Dict, Tuple
|
|
| 15 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 16 |
import threading
|
| 17 |
import json
|
|
|
|
| 18 |
|
| 19 |
# google oauth helpers (used earlier)
|
| 20 |
from google.oauth2.credentials import Credentials
|
|
@@ -38,6 +39,11 @@ TOP_K_SPECIES = int(os.environ.get("TOP_K_SPECIES", 5))
|
|
| 38 |
DOY_BINS = 366
|
| 39 |
DOY_SMOOTH = 15
|
| 40 |
EPS_STD = 1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
# Tune parallelism: how many months to fetch at once
|
| 43 |
MAX_WORKERS = int(os.environ.get("MAX_WORKERS", 4))
|
|
@@ -99,6 +105,90 @@ ee_cache: Dict[Tuple[float, float, str], dict] = {}
|
|
| 99 |
# ------------------------------
|
| 100 |
# Utility functions
|
| 101 |
# ------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
def gaussian_pdf_scalar(x_scalar, mean, std):
|
| 103 |
"""Return Gaussian PDF scalar for scalar x. If x_scalar is None or NaN, return 1.0 (neutral)."""
|
| 104 |
try:
|
|
@@ -640,21 +730,39 @@ async def predict_bloom(req: BloomPredictionRequest):
|
|
| 640 |
monthly_results[month - 1] = MonthlyResult(**res)
|
| 641 |
|
| 642 |
# Build monthly bell curve: normalize ml_bloom_probability across months
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 650 |
|
| 651 |
-
|
| 652 |
response = {
|
| 653 |
"success": True,
|
| 654 |
"analysis_date": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
|
| 655 |
"requested_date": req.date,
|
| 656 |
"monthly_results": monthly_results,
|
| 657 |
"monthly_curve": monthly_curve,
|
|
|
|
|
|
|
| 658 |
"processing_time": processing_time
|
| 659 |
}
|
| 660 |
return BloomPredictionResponse(**response)
|
|
|
|
| 15 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 16 |
import threading
|
| 17 |
import json
|
| 18 |
+
import scipy.signal
|
| 19 |
|
| 20 |
# google oauth helpers (used earlier)
|
| 21 |
from google.oauth2.credentials import Credentials
|
|
|
|
| 39 |
DOY_BINS = 366
|
| 40 |
DOY_SMOOTH = 15
|
| 41 |
EPS_STD = 1.0
|
| 42 |
+
# TUNABLES (additions)
|
| 43 |
+
ALPHA = float(os.environ.get("MONTH_ALPHA", 2.0)) # >1 sharpens peaks, 1.0 = no change
|
| 44 |
+
SMOOTH_SIGMA = float(os.environ.get("SMOOTH_SIGMA", 1.2)) # gaussian sigma in months
|
| 45 |
+
TOP_K_SPECIES = int(os.environ.get("TOP_K_SPECIES", 5)) # how many species to return
|
| 46 |
+
MIN_CURVE_PROB = float(os.environ.get("MIN_CURVE_PROB", 0.01)) # min per-month percentage floor
|
| 47 |
|
| 48 |
# Tune parallelism: how many months to fetch at once
|
| 49 |
MAX_WORKERS = int(os.environ.get("MAX_WORKERS", 4))
|
|
|
|
| 105 |
# ------------------------------
|
| 106 |
# Utility functions
|
| 107 |
# ------------------------------
|
| 108 |
+
def gaussian_kernel(length=12, sigma=1.2):
|
| 109 |
+
"""Return a normalized 1D gaussian kernel of size `length` centered (odd/even ok) with sigma in index units."""
|
| 110 |
+
# create symmetric kernel (size = length*3 is overkill; we'll sample it centered)
|
| 111 |
+
half = max(6, int(3 * sigma))
|
| 112 |
+
xs = np.arange(-half, half + 1)
|
| 113 |
+
kern = np.exp(-0.5 * (xs / sigma) ** 2)
|
| 114 |
+
kern = kern / kern.sum()
|
| 115 |
+
return kern
|
| 116 |
+
|
| 117 |
+
def smooth_monthly_probs(raw_probs, alpha=ALPHA, sigma=SMOOTH_SIGMA):
|
| 118 |
+
"""
|
| 119 |
+
raw_probs: length-12 array of ML probabilities in [0,100]
|
| 120 |
+
Steps:
|
| 121 |
+
1. scale to [0,1]
|
| 122 |
+
2. apply exponent alpha to sharpen
|
| 123 |
+
3. pad circularly then convolve with gaussian kernel
|
| 124 |
+
4. normalize to sum to 1 and return percentages (0-100)
|
| 125 |
+
"""
|
| 126 |
+
a = np.asarray(raw_probs, dtype=float) / 100.0
|
| 127 |
+
# apply floor to avoid all zeros
|
| 128 |
+
if a.sum() == 0:
|
| 129 |
+
a = np.ones_like(a) * (MIN_CURVE_PROB / 100.0)
|
| 130 |
+
# sharpen
|
| 131 |
+
if alpha != 1.0:
|
| 132 |
+
a = np.power(a, alpha)
|
| 133 |
+
# circular padding for smoothing wrap-around months
|
| 134 |
+
pad = int(max(6, 3 * sigma))
|
| 135 |
+
padded = np.concatenate([a[-pad:], a, a[:pad]])
|
| 136 |
+
# build kernel centered
|
| 137 |
+
xs = np.arange(-pad, pad + len(a) + pad) # not used directly
|
| 138 |
+
kern_range = np.arange(-pad, pad + 1)
|
| 139 |
+
kern = np.exp(-0.5 * (kern_range / sigma) ** 2)
|
| 140 |
+
kern = kern / kern.sum()
|
| 141 |
+
smoothed = np.convolve(padded, kern, mode='same')
|
| 142 |
+
# extract center portion
|
| 143 |
+
center = smoothed[pad:pad+12]
|
| 144 |
+
# clip small negatives or tiny numbers
|
| 145 |
+
center = np.clip(center, 0.0, None)
|
| 146 |
+
if center.sum() == 0:
|
| 147 |
+
center = np.ones_like(center) / len(center)
|
| 148 |
+
# normalize to percentages
|
| 149 |
+
norm = center / center.sum()
|
| 150 |
+
perc = (norm * 100.0).round(3)
|
| 151 |
+
return perc.tolist()
|
| 152 |
+
|
| 153 |
+
def is_bell_shaped(perc_list):
|
| 154 |
+
"""
|
| 155 |
+
Basic unimodal/symmetric check:
|
| 156 |
+
- count peaks (local maxima) - expect 1
|
| 157 |
+
- compute skewness sign (negative/positive)
|
| 158 |
+
- compute ratio of mass on left vs right of peak (expect roughly symmetric)
|
| 159 |
+
Returns (is_bell, diagnostics_dict)
|
| 160 |
+
"""
|
| 161 |
+
arr = np.asarray(perc_list, dtype=float)
|
| 162 |
+
# small smoothing to remove noise
|
| 163 |
+
arr_smooth = np.array(scipy.signal.savgol_filter(arr, 5 if len(arr)>=5 else len(arr)-1, 3 if len(arr)>=3 else 1))
|
| 164 |
+
# find peaks
|
| 165 |
+
peaks = (np.diff(np.sign(np.diff(arr_smooth))) < 0).nonzero()[0] + 1 # indices of local maxima
|
| 166 |
+
num_peaks = len(peaks)
|
| 167 |
+
peak_idx = int(peaks[0]) + 1 if num_peaks >= 1 else int(arr_smooth.argmax())
|
| 168 |
+
# symmetry: mass difference left/right of peak
|
| 169 |
+
left_mass = arr[:peak_idx].sum()
|
| 170 |
+
right_mass = arr[peak_idx+1:].sum() if peak_idx+1 < len(arr) else 0.0
|
| 171 |
+
sym_ratio = left_mass / (right_mass + 1e-9)
|
| 172 |
+
# skewness:
|
| 173 |
+
m = arr.mean()
|
| 174 |
+
s = arr.std(ddof=0) if arr.std(ddof=0) > 0 else 1.0
|
| 175 |
+
skew = ((arr - m) ** 3).mean() / (s ** 3)
|
| 176 |
+
# heuristics
|
| 177 |
+
is_unimodal = (num_peaks <= 1)
|
| 178 |
+
is_symmish = 0.5 <= sym_ratio <= 2.0 # within factor 2
|
| 179 |
+
# final decision
|
| 180 |
+
is_bell = is_unimodal and is_symmish
|
| 181 |
+
diagnostics = {
|
| 182 |
+
"num_peaks": num_peaks,
|
| 183 |
+
"peak_month": int(np.argmax(arr))+1,
|
| 184 |
+
"sym_ratio_left_to_right": float(sym_ratio),
|
| 185 |
+
"skewness": float(skew),
|
| 186 |
+
"is_unimodal": bool(is_unimodal),
|
| 187 |
+
"is_symmetric_enough": bool(is_symmish)
|
| 188 |
+
}
|
| 189 |
+
return bool(is_bell), diagnostics
|
| 190 |
+
|
| 191 |
+
|
| 192 |
def gaussian_pdf_scalar(x_scalar, mean, std):
|
| 193 |
"""Return Gaussian PDF scalar for scalar x. If x_scalar is None or NaN, return 1.0 (neutral)."""
|
| 194 |
try:
|
|
|
|
| 730 |
monthly_results[month - 1] = MonthlyResult(**res)
|
| 731 |
|
| 732 |
# Build monthly bell curve: normalize ml_bloom_probability across months
|
| 733 |
+
# After monthly_results list is filled (MonthlyResult objects)...
|
| 734 |
+
|
| 735 |
+
# Build raw ML probs array (0-100)
|
| 736 |
+
raw_probs = np.array([(mr.ml_bloom_probability or 0.0) for mr in monthly_results], dtype=float)
|
| 737 |
+
|
| 738 |
+
# 1) Compute smoothed bell curve percentages
|
| 739 |
+
monthly_perc = smooth_monthly_probs(raw_probs.tolist(), alpha=ALPHA, sigma=SMOOTH_SIGMA)
|
| 740 |
+
monthly_curve = {i+1: float(monthly_perc[i]) for i in range(12)}
|
| 741 |
+
|
| 742 |
+
# 2) Bell-shape verification
|
| 743 |
+
bell_ok, bell_diag = is_bell_shaped(list(monthly_perc))
|
| 744 |
+
|
| 745 |
+
# 3) Trim species_probs in each monthly_result to top-K only
|
| 746 |
+
for mr in monthly_results:
|
| 747 |
+
if isinstance(mr.species_probs, dict) and len(mr.species_probs) > 0:
|
| 748 |
+
# sort and keep top K
|
| 749 |
+
items = sorted(mr.species_probs.items(), key=lambda x: -float(x[1]))[:TOP_K_SPECIES]
|
| 750 |
+
mr.species_probs = {k: round(float(v), 6) for k, v in items}
|
| 751 |
+
# also ensure species_top aligns (already top list)
|
| 752 |
+
mr.species_top = [(s, float(p)) for s, p in mr.species_top[:TOP_K_SPECIES]]
|
| 753 |
+
else:
|
| 754 |
+
mr.species_probs = {}
|
| 755 |
+
mr.species_top = []
|
| 756 |
|
| 757 |
+
# include bell verification in response
|
| 758 |
response = {
|
| 759 |
"success": True,
|
| 760 |
"analysis_date": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
|
| 761 |
"requested_date": req.date,
|
| 762 |
"monthly_results": monthly_results,
|
| 763 |
"monthly_curve": monthly_curve,
|
| 764 |
+
"bell_valid": bell_ok,
|
| 765 |
+
"bell_diagnostics": bell_diag,
|
| 766 |
"processing_time": processing_time
|
| 767 |
}
|
| 768 |
return BloomPredictionResponse(**response)
|