NaseefNazrul commited on
Commit
5b16c17
·
verified ·
1 Parent(s): 6720ba6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -8
app.py CHANGED
@@ -15,6 +15,7 @@ from typing import Optional, List, Dict, Tuple
15
  from concurrent.futures import ThreadPoolExecutor, as_completed
16
  import threading
17
  import json
 
18
 
19
  # google oauth helpers (used earlier)
20
  from google.oauth2.credentials import Credentials
@@ -38,6 +39,11 @@ TOP_K_SPECIES = int(os.environ.get("TOP_K_SPECIES", 5))
38
  DOY_BINS = 366
39
  DOY_SMOOTH = 15
40
  EPS_STD = 1.0
 
 
 
 
 
41
 
42
  # Tune parallelism: how many months to fetch at once
43
  MAX_WORKERS = int(os.environ.get("MAX_WORKERS", 4))
@@ -99,6 +105,90 @@ ee_cache: Dict[Tuple[float, float, str], dict] = {}
99
  # ------------------------------
100
  # Utility functions
101
  # ------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  def gaussian_pdf_scalar(x_scalar, mean, std):
103
  """Return Gaussian PDF scalar for scalar x. If x_scalar is None or NaN, return 1.0 (neutral)."""
104
  try:
@@ -640,21 +730,39 @@ async def predict_bloom(req: BloomPredictionRequest):
640
  monthly_results[month - 1] = MonthlyResult(**res)
641
 
642
  # Build monthly bell curve: normalize ml_bloom_probability across months
643
- probs = np.array([ (mr.ml_bloom_probability or 0.0)/100.0 for mr in monthly_results ], dtype=float)
644
- if probs.sum() == 0:
645
- # if all zero, set equal small weights (or zeros)
646
- norm = np.ones(12) / 12.0
647
- else:
648
- norm = probs / probs.sum()
649
- monthly_curve = {i+1: round(float(norm[i] * 100.0), 3) for i in range(12)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
650
 
651
- processing_time = round(time.time() - start_time, 2)
652
  response = {
653
  "success": True,
654
  "analysis_date": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
655
  "requested_date": req.date,
656
  "monthly_results": monthly_results,
657
  "monthly_curve": monthly_curve,
 
 
658
  "processing_time": processing_time
659
  }
660
  return BloomPredictionResponse(**response)
 
15
  from concurrent.futures import ThreadPoolExecutor, as_completed
16
  import threading
17
  import json
18
+ import scipy.signal
19
 
20
  # google oauth helpers (used earlier)
21
  from google.oauth2.credentials import Credentials
 
39
  DOY_BINS = 366
40
  DOY_SMOOTH = 15
41
  EPS_STD = 1.0
42
+ # TUNABLES (additions)
43
+ ALPHA = float(os.environ.get("MONTH_ALPHA", 2.0)) # >1 sharpens peaks, 1.0 = no change
44
+ SMOOTH_SIGMA = float(os.environ.get("SMOOTH_SIGMA", 1.2)) # gaussian sigma in months
45
+ TOP_K_SPECIES = int(os.environ.get("TOP_K_SPECIES", 5)) # how many species to return
46
+ MIN_CURVE_PROB = float(os.environ.get("MIN_CURVE_PROB", 0.01)) # min per-month percentage floor
47
 
48
  # Tune parallelism: how many months to fetch at once
49
  MAX_WORKERS = int(os.environ.get("MAX_WORKERS", 4))
 
105
  # ------------------------------
106
  # Utility functions
107
  # ------------------------------
108
+ def gaussian_kernel(length=12, sigma=1.2):
109
+ """Return a normalized 1D gaussian kernel of size `length` centered (odd/even ok) with sigma in index units."""
110
+ # create symmetric kernel (size = length*3 is overkill; we'll sample it centered)
111
+ half = max(6, int(3 * sigma))
112
+ xs = np.arange(-half, half + 1)
113
+ kern = np.exp(-0.5 * (xs / sigma) ** 2)
114
+ kern = kern / kern.sum()
115
+ return kern
116
+
117
+ def smooth_monthly_probs(raw_probs, alpha=ALPHA, sigma=SMOOTH_SIGMA):
118
+ """
119
+ raw_probs: length-12 array of ML probabilities in [0,100]
120
+ Steps:
121
+ 1. scale to [0,1]
122
+ 2. apply exponent alpha to sharpen
123
+ 3. pad circularly then convolve with gaussian kernel
124
+ 4. normalize to sum to 1 and return percentages (0-100)
125
+ """
126
+ a = np.asarray(raw_probs, dtype=float) / 100.0
127
+ # apply floor to avoid all zeros
128
+ if a.sum() == 0:
129
+ a = np.ones_like(a) * (MIN_CURVE_PROB / 100.0)
130
+ # sharpen
131
+ if alpha != 1.0:
132
+ a = np.power(a, alpha)
133
+ # circular padding for smoothing wrap-around months
134
+ pad = int(max(6, 3 * sigma))
135
+ padded = np.concatenate([a[-pad:], a, a[:pad]])
136
+ # build kernel centered
137
+ xs = np.arange(-pad, pad + len(a) + pad) # not used directly
138
+ kern_range = np.arange(-pad, pad + 1)
139
+ kern = np.exp(-0.5 * (kern_range / sigma) ** 2)
140
+ kern = kern / kern.sum()
141
+ smoothed = np.convolve(padded, kern, mode='same')
142
+ # extract center portion
143
+ center = smoothed[pad:pad+12]
144
+ # clip small negatives or tiny numbers
145
+ center = np.clip(center, 0.0, None)
146
+ if center.sum() == 0:
147
+ center = np.ones_like(center) / len(center)
148
+ # normalize to percentages
149
+ norm = center / center.sum()
150
+ perc = (norm * 100.0).round(3)
151
+ return perc.tolist()
152
+
153
+ def is_bell_shaped(perc_list):
154
+ """
155
+ Basic unimodal/symmetric check:
156
+ - count peaks (local maxima) - expect 1
157
+ - compute skewness sign (negative/positive)
158
+ - compute ratio of mass on left vs right of peak (expect roughly symmetric)
159
+ Returns (is_bell, diagnostics_dict)
160
+ """
161
+ arr = np.asarray(perc_list, dtype=float)
162
+ # small smoothing to remove noise
163
+ arr_smooth = np.array(scipy.signal.savgol_filter(arr, 5 if len(arr)>=5 else len(arr)-1, 3 if len(arr)>=3 else 1))
164
+ # find peaks
165
+ peaks = (np.diff(np.sign(np.diff(arr_smooth))) < 0).nonzero()[0] + 1 # indices of local maxima
166
+ num_peaks = len(peaks)
167
+ peak_idx = int(peaks[0]) + 1 if num_peaks >= 1 else int(arr_smooth.argmax())
168
+ # symmetry: mass difference left/right of peak
169
+ left_mass = arr[:peak_idx].sum()
170
+ right_mass = arr[peak_idx+1:].sum() if peak_idx+1 < len(arr) else 0.0
171
+ sym_ratio = left_mass / (right_mass + 1e-9)
172
+ # skewness:
173
+ m = arr.mean()
174
+ s = arr.std(ddof=0) if arr.std(ddof=0) > 0 else 1.0
175
+ skew = ((arr - m) ** 3).mean() / (s ** 3)
176
+ # heuristics
177
+ is_unimodal = (num_peaks <= 1)
178
+ is_symmish = 0.5 <= sym_ratio <= 2.0 # within factor 2
179
+ # final decision
180
+ is_bell = is_unimodal and is_symmish
181
+ diagnostics = {
182
+ "num_peaks": num_peaks,
183
+ "peak_month": int(np.argmax(arr))+1,
184
+ "sym_ratio_left_to_right": float(sym_ratio),
185
+ "skewness": float(skew),
186
+ "is_unimodal": bool(is_unimodal),
187
+ "is_symmetric_enough": bool(is_symmish)
188
+ }
189
+ return bool(is_bell), diagnostics
190
+
191
+
192
  def gaussian_pdf_scalar(x_scalar, mean, std):
193
  """Return Gaussian PDF scalar for scalar x. If x_scalar is None or NaN, return 1.0 (neutral)."""
194
  try:
 
730
  monthly_results[month - 1] = MonthlyResult(**res)
731
 
732
  # Build monthly bell curve: normalize ml_bloom_probability across months
733
+ # After monthly_results list is filled (MonthlyResult objects)...
734
+
735
+ # Build raw ML probs array (0-100)
736
+ raw_probs = np.array([(mr.ml_bloom_probability or 0.0) for mr in monthly_results], dtype=float)
737
+
738
+ # 1) Compute smoothed bell curve percentages
739
+ monthly_perc = smooth_monthly_probs(raw_probs.tolist(), alpha=ALPHA, sigma=SMOOTH_SIGMA)
740
+ monthly_curve = {i+1: float(monthly_perc[i]) for i in range(12)}
741
+
742
+ # 2) Bell-shape verification
743
+ bell_ok, bell_diag = is_bell_shaped(list(monthly_perc))
744
+
745
+ # 3) Trim species_probs in each monthly_result to top-K only
746
+ for mr in monthly_results:
747
+ if isinstance(mr.species_probs, dict) and len(mr.species_probs) > 0:
748
+ # sort and keep top K
749
+ items = sorted(mr.species_probs.items(), key=lambda x: -float(x[1]))[:TOP_K_SPECIES]
750
+ mr.species_probs = {k: round(float(v), 6) for k, v in items}
751
+ # also ensure species_top aligns (already top list)
752
+ mr.species_top = [(s, float(p)) for s, p in mr.species_top[:TOP_K_SPECIES]]
753
+ else:
754
+ mr.species_probs = {}
755
+ mr.species_top = []
756
 
757
+ # include bell verification in response
758
  response = {
759
  "success": True,
760
  "analysis_date": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
761
  "requested_date": req.date,
762
  "monthly_results": monthly_results,
763
  "monthly_curve": monthly_curve,
764
+ "bell_valid": bell_ok,
765
+ "bell_diagnostics": bell_diag,
766
  "processing_time": processing_time
767
  }
768
  return BloomPredictionResponse(**response)