Spaces:

CesarLeblanc
/

plantbert_space

Running

CesarLeblanc commited on Nov 17, 2023

Commit

b1a0d53

1 Parent(s): d5ff4e3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ from datasets import load_dataset
 import requests
 from bs4 import BeautifulSoup
 classification_model = pipeline("text-classification", model="CesarLeblanc/test_model")
 mask_model = pipeline("fill-mask", model="CesarLeblanc/fill_mask_model")
@@ -52,7 +51,30 @@ def return_species_image(species):
     image = gr.Image(value=image_url)
     return image
 def classification(text, typology, confidence):
     result = classification_model(text)
     habitat_label = result[0]['label']
     habitat_label = dataset['train'].features['label'].names[int(habitat_label.split('_')[1])]
@@ -62,6 +84,7 @@ def classification(text, typology, confidence):
     return formatted_output, image_output
 def masking(text):
     masked_text = text + ', [MASK] [MASK]'
     pred = mask_model(masked_text, top_k=1)
     new_species = [pred[i][0]['token_str'] for i in range(len(pred))]

 import requests
 from bs4 import BeautifulSoup
 classification_model = pipeline("text-classification", model="CesarLeblanc/test_model")
 mask_model = pipeline("fill-mask", model="CesarLeblanc/fill_mask_model")
     image = gr.Image(value=image_url)
     return image
+def gbif_normalization(text):
+    base = "https://api.gbif.org/v1"
+    api = "species"
+    function = "match"
+    parameter = "name"
+    url = f"{base}/{api}/{function}?{parameter}="
+    all_species = text.split(',')
+    all_species = [species.strip() for species in all_species]
+    species_gbif = []
+    for species in all_species:
+        url = url.replace(url.partition('name')[2], f'={species}')
+        r = requests.get(url)
+        r = r.json()
+        if 'species' in r:
+            r = r["species"]
+        else:
+            r = species
+        species_gbif.append(r)
+    text = ", ".join(species_gbif)
+    text = text.lower()
+    return text
 def classification(text, typology, confidence):
+    text = gbif_normalization(text)
     result = classification_model(text)
     habitat_label = result[0]['label']
     habitat_label = dataset['train'].features['label'].names[int(habitat_label.split('_')[1])]
     return formatted_output, image_output
 def masking(text):
+    text = gbif_normalization(text)
     masked_text = text + ', [MASK] [MASK]'
     pred = mask_model(masked_text, top_k=1)
     new_species = [pred[i][0]['token_str'] for i in range(len(pred))]