Spaces:

NaseefNazrul
/

BloomAI

Sleeping

App Files Files Community

NaseefNazrul commited on Oct 5, 2025

Commit

23c8193

verified ·

1 Parent(s): 6e6068d

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -57

app.py CHANGED Viewed

@@ -30,6 +30,8 @@ SCALER_FILE = Path("mil_scaler.joblib")
 FEATURES_FILE = Path("mil_features.joblib")
 PHENO_FILE = Path("phenologythingy.csv")
 SPECIES_STATS_FILE = Path("species_stats.csv")
 ELEV_IMAGE_ID = "USGS/SRTMGL1_003"
 BUFFER_METERS = int(os.environ.get("BUFFER_METERS", 200))
@@ -68,27 +70,31 @@ class BloomPredictionRequest(BaseModel):
     lon: float = Field(..., ge=-180, le=180)
     date: str = Field(..., description="YYYY-MM-DD")
-class MonthlyResult(BaseModel):
     month: int
-    sample_date: str
-    ml_bloom_probability: Optional[float] = None
-    ml_prediction: Optional[str] = None
-    ml_confidence: Optional[str] = None
-    species_top: Optional[List[Tuple[str, float]]] = None
-    species_probs: Optional[Dict[str, float]] = None
-    elevation_m: Optional[float] = None
-    data_quality: Optional[dict] = None
-    satellite: Optional[str] = None
-    note: Optional[str] = None
 class BloomPredictionResponse(BaseModel):
     success: bool
-    analysis_date: str
     requested_date: str
-    monthly_results: List[MonthlyResult]
-    monthly_curve: Dict[int, float]    # month -> percent (sums to ~100)
-    bell_valid: Optional[bool] = None
-    bell_diagnostics: Optional[Dict[str, float]] = None
     processing_time: float
 # ------------------------------
@@ -696,20 +702,25 @@ def process_month_task(lat, lon, year, month, elevation):
 @app.post("/predict", response_model=BloomPredictionResponse)
 async def predict_bloom(req: BloomPredictionRequest):
     start_time = time.time()
-    # validate date
     try:
         req_dt = datetime.strptime(req.date, "%Y-%m-%d")
     except ValueError:
         raise HTTPException(status_code=400, detail="date must be YYYY-MM-DD")
-    # elevation once
     elevation = get_elevation_from_ee(req.lat, req.lon)
     year = req_dt.year
     monthly_results = [None] * 12
-    # Run monthly tasks in parallel to speed up (bounded workers)
-    tasks = []
     with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
-        futures = {ex.submit(process_month_task, req.lat, req.lon, year, month, elevation): month for month in range(1, 13)}
         for fut in as_completed(futures):
             month = futures[fut]
             try:
@@ -719,9 +730,9 @@ async def predict_bloom(req: BloomPredictionRequest):
                 res = {
                     "month": month,
                     "sample_date": date(year, month, 15).strftime("%Y-%m-%d"),
-                    "ml_bloom_probability": None,
-                    "ml_prediction": None,
-                    "ml_confidence": None,
                     "species_top": [],
                     "species_probs": {},
                     "elevation_m": elevation,
@@ -729,47 +740,82 @@ async def predict_bloom(req: BloomPredictionRequest):
                     "satellite": None,
                     "note": "processing_error"
                 }
-            monthly_results[month - 1] = MonthlyResult(**res)
-    # Build monthly bell curve: normalize ml_bloom_probability across months
-    # After monthly_results list is filled (MonthlyResult objects)...
-    # Build raw ML probs array (0-100)
-    raw_probs = np.array([(mr.ml_bloom_probability or 0.0) for mr in monthly_results], dtype=float)
-    # 1) Compute smoothed bell curve percentages
     monthly_perc = smooth_monthly_probs(raw_probs.tolist(), alpha=ALPHA, sigma=SMOOTH_SIGMA)
     monthly_curve = {i+1: float(monthly_perc[i]) for i in range(12)}
-    # 2) Bell-shape verification
     bell_ok, bell_diag = is_bell_shaped(list(monthly_perc))
-    # 3) Trim species_probs in each monthly_result to top-K only
-    for mr in monthly_results:
-        if isinstance(mr.species_probs, dict) and len(mr.species_probs) > 0:
-            # sort and keep top K
-            items = sorted(mr.species_probs.items(), key=lambda x: -float(x[1]))[:TOP_K_SPECIES]
-            mr.species_probs = {k: round(float(v), 6) for k, v in items}
-            # also ensure species_top aligns (already top list)
-            mr.species_top = [(s, float(p)) for s, p in mr.species_top[:TOP_K_SPECIES]]
-        else:
-            mr.species_probs = {}
-            mr.species_top = []
-    # include bell verification in response
     processing_time = round(time.time() - start_time, 2)
-    response = {
-        "success": True,
-        "analysis_date": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
-        "requested_date": req.date,
-        "monthly_results": monthly_results,
-        "monthly_curve": monthly_curve,
-        "bell_valid": bell_ok,
-        "bell_diagnostics": bell_diag,
-        "processing_time": processing_time
-    }
-    return BloomPredictionResponse(**response)
 # ------------------------------
 # Local run

 FEATURES_FILE = Path("mil_features.joblib")
 PHENO_FILE = Path("phenologythingy.csv")
 SPECIES_STATS_FILE = Path("species_stats.csv")
+MIN_BLOOM_THRESHOLD = float(os.environ.get("MIN_BLOOM_THRESHOLD", 40.0))  # minimum probability to predict species
+MIN_PEAK_FOR_BELL = float(os.environ.get("MIN_PEAK_FOR_BELL", 60.0))
 ELEV_IMAGE_ID = "USGS/SRTMGL1_003"
 BUFFER_METERS = int(os.environ.get("BUFFER_METERS", 200))
     lon: float = Field(..., ge=-180, le=180)
     date: str = Field(..., description="YYYY-MM-DD")
+class SimplifiedMonthlyResult(BaseModel):
     month: int
+    bloom_probability: float
+    prediction: str  # "BLOOM" or "NO_BLOOM"
+class SpeciesResult(BaseModel):
+    name: str
+    probability: float  # as percentage
 class BloomPredictionResponse(BaseModel):
     success: bool
+    status: str  # "BLOOM_DETECTED", "NO_BLOOM", "LOW_CONFIDENCE"
     requested_date: str
+    # Only include these if there's a valid bloom season
+    peak_month: Optional[int] = None
+    peak_probability: Optional[float] = None
+    bloom_window: Optional[List[int]] = None  # months with >40% probability
+    # Only include species if peak > threshold
+    top_species: Optional[List[SpeciesResult]] = None
+    # Simplified monthly data (probabilities only)
+    monthly_probabilities: Dict[int, float]
     processing_time: float
 # ------------------------------
 @app.post("/predict", response_model=BloomPredictionResponse)
 async def predict_bloom(req: BloomPredictionRequest):
     start_time = time.time()
+    # Validate date
     try:
         req_dt = datetime.strptime(req.date, "%Y-%m-%d")
     except ValueError:
         raise HTTPException(status_code=400, detail="date must be YYYY-MM-DD")
+    # Get elevation once
     elevation = get_elevation_from_ee(req.lat, req.lon)
     year = req_dt.year
     monthly_results = [None] * 12
+    # Run monthly tasks in parallel
     with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
+        futures = {
+            ex.submit(process_month_task, req.lat, req.lon, year, month, elevation): month
+            for month in range(1, 13)
+        }
         for fut in as_completed(futures):
             month = futures[fut]
             try:
                 res = {
                     "month": month,
                     "sample_date": date(year, month, 15).strftime("%Y-%m-%d"),
+                    "ml_bloom_probability": 0.0,
+                    "ml_prediction": "NO_BLOOM",
+                    "ml_confidence": "LOW",
                     "species_top": [],
                     "species_probs": {},
                     "elevation_m": elevation,
                     "satellite": None,
                     "note": "processing_error"
                 }
+            monthly_results[month - 1] = res
+    # Extract raw probabilities
+    raw_probs = np.array([
+        (mr.get("ml_bloom_probability") or 0.0) if isinstance(mr, dict)
+        else (mr.ml_bloom_probability or 0.0)
+        for mr in monthly_results
+    ], dtype=float)
+    # Compute smoothed curve
     monthly_perc = smooth_monthly_probs(raw_probs.tolist(), alpha=ALPHA, sigma=SMOOTH_SIGMA)
     monthly_curve = {i+1: float(monthly_perc[i]) for i in range(12)}
+    # Check bell shape
     bell_ok, bell_diag = is_bell_shaped(list(monthly_perc))
+    # Find peak
+    peak_idx = int(np.argmax(monthly_perc))
+    peak_month = peak_idx + 1
+    peak_prob = float(monthly_perc[peak_idx])
+    # Determine status and whether to include species
+    if peak_prob < MIN_PEAK_FOR_BELL or not bell_ok:
+        status = "NO_BLOOM"
+        top_species = None
+        bloom_window = None
+        peak_month_out = None
+        peak_prob_out = None
+    elif peak_prob < MIN_BLOOM_THRESHOLD:
+        status = "LOW_CONFIDENCE"
+        top_species = None
+        bloom_window = [i+1 for i, p in enumerate(monthly_perc) if p > 10.0]
+        peak_month_out = peak_month
+        peak_prob_out = peak_prob
+    else:
+        status = "BLOOM_DETECTED"
+        bloom_window = [i+1 for i, p in enumerate(monthly_perc) if p > MIN_BLOOM_THRESHOLD]
+        peak_month_out = peak_month
+        peak_prob_out = peak_prob
+        # Only predict species if we have a strong bloom signal
+        try:
+            # Use the peak month's data for species prediction
+            peak_result = monthly_results[peak_idx]
+            if isinstance(peak_result, dict):
+                doy = peak_result.get("day_of_year")
+            else:
+                # Estimate DOY from month
+                doy = date(year, peak_month, 15).timetuple().tm_yday
+            species_predictions = predict_species_by_elevation(elevation, doy=doy, top_k=TOP_K_SPECIES)
+            # Convert to response format (probabilities as percentages)
+            top_species = [
+                SpeciesResult(name=sp, probability=round(prob * 100.0, 2))
+                for sp, prob in species_predictions
+            ]
+        except Exception as e:
+            print(f"❌ species prediction error: {e}")
+            top_species = None
     processing_time = round(time.time() - start_time, 2)
+    response = BloomPredictionResponse(
+        success=True,
+        status=status,
+        requested_date=req.date,
+        peak_month=peak_month_out,
+        peak_probability=peak_prob_out,
+        bloom_window=bloom_window,
+        top_species=top_species,
+        monthly_probabilities=monthly_curve,
+        processing_time=processing_time
+    )
+    return response
 # ------------------------------
 # Local run