Spaces:

KennethTM
/

dvpi

Sleeping

App Files Files Community

KennethTM commited on Feb 14

Commit

a4f81c0

verified ·

1 Parent(s): 2df923c

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -46

app.py CHANGED Viewed

@@ -1,53 +1,91 @@
 from fastapi import FastAPI
 from pydantic import BaseModel, Field
-from typing import Literal
-import json
 import numpy as np
 import onnxruntime as ort
 from typing_extensions import Annotated
 import gradio as gr
 from cryptography.fernet import Fernet
 import os
 # Model load
 key = os.getenv("ONNX_KEY")
 cipher = Fernet(key)
-VERSION = "0.0.1"
 TITLE = f"DVPI beregnings API (version {VERSION})"
-DESCRIPTION = "Beregn Dansk Vandløbs Plante Indeks (DVPI) fra dækningsgrad af plantearter. Beregningen er baseret på en model som efterligner DVPI beregningsmetoden og er dermed ikke eksakt, usikkerheden er i gennemsnit **±0.05 EQR-enheder**."
-URL = "https://kennethtm-dvpi.hf.space"
 # Load ONNX model and species mappings
-with open("model.bin", "rb") as f:
     encrypted = f.read()
     decrypted = cipher.decrypt(encrypted)
     ort_session = ort.InferenceSession(decrypted)
-with open("spec2idx.json", "r") as f:
-    spec2idx = json.load(f)
-# Define types
-valid_species = tuple(spec2idx.keys())
 class SpeciesCover(BaseModel):
-    species: dict[Literal[valid_species], Annotated[float, Field(ge=0, le=100)]]
     model_config = {
         "json_schema_extra": {
             "examples": [{
                 "species": {
-                    "Potamogeton alpinus": 25.0,
-                    "Berula erecta": 15.5,
-                    "Calamagrostis canescens": 10.0
                 }
             }]
         }
     }
 class EQRResult(BaseModel):
-    EQR: float  # Round to 2 decimals
     DVPI: int
     version: str = VERSION
@@ -67,48 +105,47 @@ def eqr_to_dvpi(eqr: float) -> int:
     else:
         return 5
 # FastAPI routes
 @app.post("/dvpi")
 def predict(cover_data: SpeciesCover) -> EQRResult:
     """Predict EQR and DVPI from species cover data"""
-    # Initialize input vector with zeros
-    input_vector = np.zeros((1, len(spec2idx)))
-    print(cover_data.species)
-    # Fill values from input
-    for species, cover in cover_data.species.items():
-        idx = spec2idx[species]
         input_vector[0, idx] = cover
-    # Get prediction
     input_name = ort_session.get_inputs()[0].name
     ort_inputs = {input_name: input_vector.astype(np.float32)}
-    ort_output = ort_session.run(None, ort_inputs)
-    eqr = float(ort_output[0][0])
     dvpi = eqr_to_dvpi(eqr)
-    return EQRResult(EQR=round(eqr, 2), DVPI=dvpi)
-@app.get("/arter")
-def list_species() -> dict:
-    """Return list of valid species names"""
-    return {"species": list(spec2idx.keys())}
 # Gradio app
-def add_entry(species, cover, current_dict) -> tuple[SpeciesCover, str]:
     current_dict[species] = cover
     return current_dict, current_dict
 def gradio_predict(cover_data: dict):
     if len(cover_data) == 0:
         return {}
-    data = SpeciesCover(species=cover_data)
     result = predict(data)
     return result.model_dump()
@@ -120,12 +157,13 @@ with gr.Blocks() as io:
     with gr.Tab(label = "Beregner"):
-        gr.Markdown("Beregning er baseret på samfund af plantearter og deres dækningsgrad. Dækningsgraden angives i procent som summen af scoren for dækningsgraden (1-5) divideret med det samlede antal undersøgte kvadrater gange 5, og til sidste konverteret til procent. Eksempel: Potamogeton alpinus findes 3 felter med scorerne 2, 3 og 5 ud af 50 undersøgte kvadrater. Dækningsgraden for Potamogeton alpinus er derfor (2+3+5)/(50*5)*100 = 4%.")
         current_dict = gr.State({})
         with gr.Row():
-            species_input = gr.Dropdown(choices=valid_species, label="Vælg art")
             cover_input = gr.Number(label="Dækningsgrad (%)", minimum=0, maximum=100)
         with gr.Row():
@@ -143,26 +181,28 @@ with gr.Blocks() as io:
         add_btn.click(
             add_entry,
             inputs=[species_input, cover_input, current_dict],
-            outputs=[current_dict, list_display]
         )
         reset_btn.click(
             reset_dict,
             inputs=[],
-            outputs=[current_dict, list_display, results]
         )
         calc_btn.click(
             gradio_predict,
             inputs=[current_dict],
-            outputs=results
         )
         gr.Markdown("App og model af Kenneth Thorø Martinsen.")
     with gr.Tab(label="Dokumentation"):
-        # Add markdown description with code to call the api in python
         gr.Markdown("## Eksempel på brug af API")
         gr.Markdown(f"API dokumentation kan findes på [{URL}/docs]({URL}/docs)")
         gr.Markdown("### Python")
@@ -172,9 +212,9 @@ import json
 data = {{
     "species": {{
-        "Potamogeton alpinus": 25.0,
-        "Berula erecta": 15.5,
-        "Calamagrostis canescens": 10.0
     }}
 }}
@@ -188,9 +228,9 @@ library(httr)
 library(jsonlite)
 data <- list(species = list(
-    "Potamogeton alpinus" = 25.0,
-    "Berula erecta" = 15.5,
-    "Calamagrostis canescens" = 10.0
 ))
 response <- POST("{URL}/dvpi",

 from fastapi import FastAPI
 from pydantic import BaseModel, Field
 import numpy as np
 import onnxruntime as ort
 from typing_extensions import Annotated
 import gradio as gr
+from dotenv import load_dotenv
 from cryptography.fernet import Fernet
 import os
+import pickle as pkl
+load_dotenv()
 # Model load
 key = os.getenv("ONNX_KEY")
 cipher = Fernet(key)
+VERSION = "0.0.3"
 TITLE = f"DVPI beregnings API (version {VERSION})"
+DESCRIPTION = "Beregn Dansk Vandløbs Plante Indeks (DVPI) fra dækningsgrad af plantearter. Beregningen er baseret på en model som efterligner DVPI beregningsmetoden og er dermed ikke eksakt, usikkerheden er i gennemsnit **±0.017 EQR-enheder** og **R<sup>2</sup>=0.98** når den sammenlignes med den originale. Kan der ikke beregnes en værdi, returneres EQR=0 og DVPI=0."
+URL = "http://localhost:8000" #https://kennethtm-dvpi.hf.space
 # Load ONNX model and species mappings
+with open("model_v3.bin", "rb") as f:
     encrypted = f.read()
     decrypted = cipher.decrypt(encrypted)
     ort_session = ort.InferenceSession(decrypted)
+# Load metadata
+with open("metadata_v3.bin", "rb") as f:
+    encrypted = f.read()
+    decrypted = cipher.decrypt(encrypted)
+    metadata = pkl.loads(decrypted)
+latinname2stancode = metadata["latinname2stancode"]
+valid_taxacodes = metadata["valid_taxacodes"]
+normalizer_1 = metadata["normalizer_1"]
+normalizer_2 = metadata["normalizer_2"]
+taxacode2idx = metadata["taxacode2idx"]
+# Preprocess species
+def preprocess_species(species: dict[int: float]) -> dict[int: float]:
+    # Apply filter 1
+    intermediate_species = {}
+    for sccode, value in species.items():
+        if sccode in normalizer_1:
+            new_sccode = normalizer_1[sccode]
+            if new_sccode in intermediate_species:
+                intermediate_species[new_sccode] += value
+            else:
+                intermediate_species[new_sccode] = value
+    # Apply filter 2
+    final_species = {}
+    for sccode, value in intermediate_species.items():
+        if sccode in normalizer_2:
+            if normalizer_2[sccode] is not None:
+                new_sccode = normalizer_2[sccode]
+                if new_sccode in final_species:
+                    final_species[new_sccode] += value
+                else:
+                    final_species[new_sccode] = value
+        else:
+            final_species[sccode] = value
+    # filter valid taxacodes
+    final_species = {taxacode: value for taxacode, value in final_species.items() if taxacode in valid_taxacodes}
+    return final_species
 class SpeciesCover(BaseModel):
+    species: dict[int, Annotated[float, Field(ge=0, le=100)]]
     model_config = {
         "json_schema_extra": {
             "examples": [{
                 "species": {
+                    6458: 25.0,
+                    4158: 15.5,
+                    7208: 10.0
                 }
             }]
         }
     }
 class EQRResult(BaseModel):
+    EQR: float
     DVPI: int
     version: str = VERSION
     else:
         return 5
 # FastAPI routes
 @app.post("/dvpi")
 def predict(cover_data: SpeciesCover) -> EQRResult:
     """Predict EQR and DVPI from species cover data"""
+    species_preproc = preprocess_species(cover_data.species)
+    input_vector = np.zeros((1, len(valid_taxacodes)))
+    for species, cover in species_preproc.items():
+        idx = taxacode2idx[species]
         input_vector[0, idx] = cover
+    if np.sum(input_vector) == 0:
+        return EQRResult(EQR=0, DVPI=0)
     input_name = ort_session.get_inputs()[0].name
     ort_inputs = {input_name: input_vector.astype(np.float32)}
+    _, output_2 = ort_session.run(None, ort_inputs)
+    eqr = float(output_2[0][0])
+    eqr = 1 if eqr > 1 else eqr
     dvpi = eqr_to_dvpi(eqr)
+    return EQRResult(EQR=round(eqr, 3), DVPI=dvpi)
 # Gradio app
+def add_entry(species, cover, current_dict) -> tuple[dict, str]:
     current_dict[species] = cover
     return current_dict, current_dict
 def gradio_predict(cover_data: dict):
     if len(cover_data) == 0:
         return {}
+    cover_data_code = {latinname2stancode[species]: cover for species, cover in cover_data.items()}
+    data = SpeciesCover(species=cover_data_code)
     result = predict(data)
     return result.model_dump()
     with gr.Tab(label = "Beregner"):
+        gr.Markdown("Beregning er baseret på samfund af plantearter og deres dækningsgrad. Når API'et bruges anvendes arternes [Stancode](https://dce.au.dk/overvaagning/stancode/stancodelister) (SC1064) - se 'Dokumentation' for eksempel på brug.")
         current_dict = gr.State({})
         with gr.Row():
+            species_choices = sorted(list(latinname2stancode.keys()))
+            species_input = gr.Dropdown(choices=species_choices, label="Vælg art")
             cover_input = gr.Number(label="Dækningsgrad (%)", minimum=0, maximum=100)
         with gr.Row():
         add_btn.click(
             add_entry,
             inputs=[species_input, cover_input, current_dict],
+            outputs=[current_dict, list_display],
+            show_api=False
         )
         reset_btn.click(
             reset_dict,
             inputs=[],
+            outputs=[current_dict, list_display, results],
+            show_api=False
         )
         calc_btn.click(
             gradio_predict,
             inputs=[current_dict],
+            outputs=results,
+            show_api=False
         )
         gr.Markdown("App og model af Kenneth Thorø Martinsen.")
     with gr.Tab(label="Dokumentation"):
         gr.Markdown("## Eksempel på brug af API")
         gr.Markdown(f"API dokumentation kan findes på [{URL}/docs]({URL}/docs)")
         gr.Markdown("### Python")
 data = {{
     "species": {{
+        6458: 25.0,
+        4158: 15.5,
+        7208: 10.0
     }}
 }}
 library(jsonlite)
 data <- list(species = list(
+    6458 = 25.0,
+    4158 = 15.5,
+    7208 = 10.0
 ))
 response <- POST("{URL}/dvpi",