Spaces:

NaseefNazrul
/

BloomAI

Sleeping

App Files Files Community

NaseefNazrul commited on Oct 5, 2025

Commit

5b16c17

verified ·

1 Parent(s): 6720ba6

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -8

app.py CHANGED Viewed

@@ -15,6 +15,7 @@ from typing import Optional, List, Dict, Tuple
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import threading
 import json
 # google oauth helpers (used earlier)
 from google.oauth2.credentials import Credentials
@@ -38,6 +39,11 @@ TOP_K_SPECIES = int(os.environ.get("TOP_K_SPECIES", 5))
 DOY_BINS = 366
 DOY_SMOOTH = 15
 EPS_STD = 1.0
 # Tune parallelism: how many months to fetch at once
 MAX_WORKERS = int(os.environ.get("MAX_WORKERS", 4))
@@ -99,6 +105,90 @@ ee_cache: Dict[Tuple[float, float, str], dict] = {}
 # ------------------------------
 # Utility functions
 # ------------------------------
 def gaussian_pdf_scalar(x_scalar, mean, std):
     """Return Gaussian PDF scalar for scalar x. If x_scalar is None or NaN, return 1.0 (neutral)."""
     try:
@@ -640,21 +730,39 @@ async def predict_bloom(req: BloomPredictionRequest):
             monthly_results[month - 1] = MonthlyResult(**res)
     # Build monthly bell curve: normalize ml_bloom_probability across months
-    probs = np.array([ (mr.ml_bloom_probability or 0.0)/100.0 for mr in monthly_results ], dtype=float)
-    if probs.sum() == 0:
-        # if all zero, set equal small weights (or zeros)
-        norm = np.ones(12) / 12.0
-    else:
-        norm = probs / probs.sum()
-    monthly_curve = {i+1: round(float(norm[i] * 100.0), 3) for i in range(12)}
-    processing_time = round(time.time() - start_time, 2)
     response = {
         "success": True,
         "analysis_date": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
         "requested_date": req.date,
         "monthly_results": monthly_results,
         "monthly_curve": monthly_curve,
         "processing_time": processing_time
     }
     return BloomPredictionResponse(**response)

 from concurrent.futures import ThreadPoolExecutor, as_completed
 import threading
 import json
+import scipy.signal
 # google oauth helpers (used earlier)
 from google.oauth2.credentials import Credentials
 DOY_BINS = 366
 DOY_SMOOTH = 15
 EPS_STD = 1.0
+# TUNABLES (additions)
+ALPHA = float(os.environ.get("MONTH_ALPHA", 2.0))      # >1 sharpens peaks, 1.0 = no change
+SMOOTH_SIGMA = float(os.environ.get("SMOOTH_SIGMA", 1.2))  # gaussian sigma in months
+TOP_K_SPECIES = int(os.environ.get("TOP_K_SPECIES", 5))   # how many species to return
+MIN_CURVE_PROB = float(os.environ.get("MIN_CURVE_PROB", 0.01))  # min per-month percentage floor
 # Tune parallelism: how many months to fetch at once
 MAX_WORKERS = int(os.environ.get("MAX_WORKERS", 4))
 # ------------------------------
 # Utility functions
 # ------------------------------
+def gaussian_kernel(length=12, sigma=1.2):
+    """Return a normalized 1D gaussian kernel of size `length` centered (odd/even ok) with sigma in index units."""
+    # create symmetric kernel (size = length*3 is overkill; we'll sample it centered)
+    half = max(6, int(3 * sigma))
+    xs = np.arange(-half, half + 1)
+    kern = np.exp(-0.5 * (xs / sigma) ** 2)
+    kern = kern / kern.sum()
+    return kern
+def smooth_monthly_probs(raw_probs, alpha=ALPHA, sigma=SMOOTH_SIGMA):
+    """
+    raw_probs: length-12 array of ML probabilities in [0,100]
+    Steps:
+      1. scale to [0,1]
+      2. apply exponent alpha to sharpen
+      3. pad circularly then convolve with gaussian kernel
+      4. normalize to sum to 1 and return percentages (0-100)
+    """
+    a = np.asarray(raw_probs, dtype=float) / 100.0
+    # apply floor to avoid all zeros
+    if a.sum() == 0:
+        a = np.ones_like(a) * (MIN_CURVE_PROB / 100.0)
+    # sharpen
+    if alpha != 1.0:
+        a = np.power(a, alpha)
+    # circular padding for smoothing wrap-around months
+    pad = int(max(6, 3 * sigma))
+    padded = np.concatenate([a[-pad:], a, a[:pad]])
+    # build kernel centered
+    xs = np.arange(-pad, pad + len(a) + pad)  # not used directly
+    kern_range = np.arange(-pad, pad + 1)
+    kern = np.exp(-0.5 * (kern_range / sigma) ** 2)
+    kern = kern / kern.sum()
+    smoothed = np.convolve(padded, kern, mode='same')
+    # extract center portion
+    center = smoothed[pad:pad+12]
+    # clip small negatives or tiny numbers
+    center = np.clip(center, 0.0, None)
+    if center.sum() == 0:
+        center = np.ones_like(center) / len(center)
+    # normalize to percentages
+    norm = center / center.sum()
+    perc = (norm * 100.0).round(3)
+    return perc.tolist()
+def is_bell_shaped(perc_list):
+    """
+    Basic unimodal/symmetric check:
+     - count peaks (local maxima) - expect 1
+     - compute skewness sign (negative/positive)
+     - compute ratio of mass on left vs right of peak (expect roughly symmetric)
+    Returns (is_bell, diagnostics_dict)
+    """
+    arr = np.asarray(perc_list, dtype=float)
+    # small smoothing to remove noise
+    arr_smooth = np.array(scipy.signal.savgol_filter(arr, 5 if len(arr)>=5 else len(arr)-1, 3 if len(arr)>=3 else 1))
+    # find peaks
+    peaks = (np.diff(np.sign(np.diff(arr_smooth))) < 0).nonzero()[0] + 1  # indices of local maxima
+    num_peaks = len(peaks)
+    peak_idx = int(peaks[0]) + 1 if num_peaks >= 1 else int(arr_smooth.argmax())
+    # symmetry: mass difference left/right of peak
+    left_mass = arr[:peak_idx].sum()
+    right_mass = arr[peak_idx+1:].sum() if peak_idx+1 < len(arr) else 0.0
+    sym_ratio = left_mass / (right_mass + 1e-9)
+    # skewness:
+    m = arr.mean()
+    s = arr.std(ddof=0) if arr.std(ddof=0) > 0 else 1.0
+    skew = ((arr - m) ** 3).mean() / (s ** 3)
+    # heuristics
+    is_unimodal = (num_peaks <= 1)
+    is_symmish = 0.5 <= sym_ratio <= 2.0  # within factor 2
+    # final decision
+    is_bell = is_unimodal and is_symmish
+    diagnostics = {
+        "num_peaks": num_peaks,
+        "peak_month": int(np.argmax(arr))+1,
+        "sym_ratio_left_to_right": float(sym_ratio),
+        "skewness": float(skew),
+        "is_unimodal": bool(is_unimodal),
+        "is_symmetric_enough": bool(is_symmish)
+    }
+    return bool(is_bell), diagnostics
 def gaussian_pdf_scalar(x_scalar, mean, std):
     """Return Gaussian PDF scalar for scalar x. If x_scalar is None or NaN, return 1.0 (neutral)."""
     try:
             monthly_results[month - 1] = MonthlyResult(**res)
     # Build monthly bell curve: normalize ml_bloom_probability across months
+    # After monthly_results list is filled (MonthlyResult objects)...
+    # Build raw ML probs array (0-100)
+    raw_probs = np.array([(mr.ml_bloom_probability or 0.0) for mr in monthly_results], dtype=float)
+    # 1) Compute smoothed bell curve percentages
+    monthly_perc = smooth_monthly_probs(raw_probs.tolist(), alpha=ALPHA, sigma=SMOOTH_SIGMA)
+    monthly_curve = {i+1: float(monthly_perc[i]) for i in range(12)}
+    # 2) Bell-shape verification
+    bell_ok, bell_diag = is_bell_shaped(list(monthly_perc))
+    # 3) Trim species_probs in each monthly_result to top-K only
+    for mr in monthly_results:
+        if isinstance(mr.species_probs, dict) and len(mr.species_probs) > 0:
+            # sort and keep top K
+            items = sorted(mr.species_probs.items(), key=lambda x: -float(x[1]))[:TOP_K_SPECIES]
+            mr.species_probs = {k: round(float(v), 6) for k, v in items}
+            # also ensure species_top aligns (already top list)
+            mr.species_top = [(s, float(p)) for s, p in mr.species_top[:TOP_K_SPECIES]]
+        else:
+            mr.species_probs = {}
+            mr.species_top = []
+    # include bell verification in response
     response = {
         "success": True,
         "analysis_date": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
         "requested_date": req.date,
         "monthly_results": monthly_results,
         "monthly_curve": monthly_curve,
+        "bell_valid": bell_ok,
+        "bell_diagnostics": bell_diag,
         "processing_time": processing_time
     }
     return BloomPredictionResponse(**response)