Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,14 +1,13 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import pipeline
|
| 3 |
import requests
|
|
|
|
| 4 |
from bs4 import BeautifulSoup
|
| 5 |
import pandas as pd
|
| 6 |
|
| 7 |
-
# Initialize models
|
| 8 |
classification_model = pipeline("text-classification", model="models/text_classification_model", tokenizer="models/text_classification_model", top_k=5)
|
| 9 |
-
|
| 10 |
|
| 11 |
-
# Load data
|
| 12 |
eunis_habitats = pd.read_excel('data/eunis_habitats.xlsx')
|
| 13 |
|
| 14 |
def return_habitat_image(habitat_label):
|
|
@@ -20,11 +19,9 @@ def return_habitat_image(habitat_label):
|
|
| 20 |
if img_tag:
|
| 21 |
image_url = img_tag['src']
|
| 22 |
else:
|
| 23 |
-
image_url = "https://
|
| 24 |
else:
|
| 25 |
-
image_url = "https://
|
| 26 |
-
#image_url = "https://www.commissionoceanindien.org/wp-content/uploads/2018/07/plantnet.jpg" # While we don't have the rights
|
| 27 |
-
#image_url = "https://files.ibot.cas.cz/cevs/images/syntaxa/large/Rorippo-Phalaridetum_arundinaceae2.jpg" # 800-600 for Q51
|
| 28 |
image = gr.Image(value=image_url)
|
| 29 |
return image
|
| 30 |
|
|
@@ -38,29 +35,20 @@ def return_species_image(species):
|
|
| 38 |
if img_tag:
|
| 39 |
image_url = img_tag['src']
|
| 40 |
else:
|
| 41 |
-
image_url = "https://
|
| 42 |
else:
|
| 43 |
-
image_url = "https://
|
| 44 |
-
#image_url = "https://www.commissionoceanindien.org/wp-content/uploads/2018/07/plantnet.jpg" # While we don't have the rights
|
| 45 |
-
#image_url = "https://files.ibot.cas.cz/cevs/images/taxa/large/Eryngium_maritimum18.jpg" # 1600-1200 for Q51 for eryngium maritimum
|
| 46 |
image = gr.Image(value=image_url)
|
| 47 |
return image
|
| 48 |
|
| 49 |
def gbif_normalization(text):
|
| 50 |
-
base = "https://api.gbif.org/v1"
|
| 51 |
-
api = "species"
|
| 52 |
-
function = "match"
|
| 53 |
-
parameter = "name"
|
| 54 |
-
url = f"{base}/{api}/{function}?{parameter}="
|
| 55 |
all_species = text.split(',')
|
| 56 |
all_species = [species.strip() for species in all_species]
|
| 57 |
species_gbif = []
|
| 58 |
for species in all_species:
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
if 'species' in r:
|
| 63 |
-
r = r["species"]
|
| 64 |
else:
|
| 65 |
r = species
|
| 66 |
species_gbif.append(r)
|
|
@@ -103,7 +91,7 @@ def masking(text, k):
|
|
| 103 |
|
| 104 |
j = 0
|
| 105 |
while True:
|
| 106 |
-
prediction =
|
| 107 |
species = prediction['token_str']
|
| 108 |
if species in text_split or species in best_predictions:
|
| 109 |
j += 1
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import pipeline
|
| 3 |
import requests
|
| 4 |
+
import pygbif
|
| 5 |
from bs4 import BeautifulSoup
|
| 6 |
import pandas as pd
|
| 7 |
|
|
|
|
| 8 |
classification_model = pipeline("text-classification", model="models/text_classification_model", tokenizer="models/text_classification_model", top_k=5)
|
| 9 |
+
masking_model = pipeline("fill-mask", model="models/fill_mask_model", tokenizer="models/fill_mask_model", top_k=100)
|
| 10 |
|
|
|
|
| 11 |
eunis_habitats = pd.read_excel('data/eunis_habitats.xlsx')
|
| 12 |
|
| 13 |
def return_habitat_image(habitat_label):
|
|
|
|
| 19 |
if img_tag:
|
| 20 |
image_url = img_tag['src']
|
| 21 |
else:
|
| 22 |
+
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/a/ac/No_image_available.svg/2048px-No_image_available.svg.png"
|
| 23 |
else:
|
| 24 |
+
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/a/ac/No_image_available.svg/2048px-No_image_available.svg.png"
|
|
|
|
|
|
|
| 25 |
image = gr.Image(value=image_url)
|
| 26 |
return image
|
| 27 |
|
|
|
|
| 35 |
if img_tag:
|
| 36 |
image_url = img_tag['src']
|
| 37 |
else:
|
| 38 |
+
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/a/ac/No_image_available.svg/2048px-No_image_available.svg.png"
|
| 39 |
else:
|
| 40 |
+
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/a/ac/No_image_available.svg/2048px-No_image_available.svg.png"
|
|
|
|
|
|
|
| 41 |
image = gr.Image(value=image_url)
|
| 42 |
return image
|
| 43 |
|
| 44 |
def gbif_normalization(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
all_species = text.split(',')
|
| 46 |
all_species = [species.strip() for species in all_species]
|
| 47 |
species_gbif = []
|
| 48 |
for species in all_species:
|
| 49 |
+
gbif_match_result = pygbif.species.name_backbone(species, taxonRank="SPECIES")
|
| 50 |
+
if 'usage' in gbif_match_result:
|
| 51 |
+
r = gbif_match_result["usage"]["canonicalName"]
|
|
|
|
|
|
|
| 52 |
else:
|
| 53 |
r = species
|
| 54 |
species_gbif.append(r)
|
|
|
|
| 91 |
|
| 92 |
j = 0
|
| 93 |
while True:
|
| 94 |
+
prediction = masking_model(masked_text)[j]
|
| 95 |
species = prediction['token_str']
|
| 96 |
if species in text_split or species in best_predictions:
|
| 97 |
j += 1
|