Spaces:
Running
Running
Commit
·
b1a0d53
1
Parent(s):
d5ff4e3
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,6 @@ from datasets import load_dataset
|
|
| 4 |
import requests
|
| 5 |
from bs4 import BeautifulSoup
|
| 6 |
|
| 7 |
-
|
| 8 |
classification_model = pipeline("text-classification", model="CesarLeblanc/test_model")
|
| 9 |
mask_model = pipeline("fill-mask", model="CesarLeblanc/fill_mask_model")
|
| 10 |
|
|
@@ -52,7 +51,30 @@ def return_species_image(species):
|
|
| 52 |
image = gr.Image(value=image_url)
|
| 53 |
return image
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
def classification(text, typology, confidence):
|
|
|
|
| 56 |
result = classification_model(text)
|
| 57 |
habitat_label = result[0]['label']
|
| 58 |
habitat_label = dataset['train'].features['label'].names[int(habitat_label.split('_')[1])]
|
|
@@ -62,6 +84,7 @@ def classification(text, typology, confidence):
|
|
| 62 |
return formatted_output, image_output
|
| 63 |
|
| 64 |
def masking(text):
|
|
|
|
| 65 |
masked_text = text + ', [MASK] [MASK]'
|
| 66 |
pred = mask_model(masked_text, top_k=1)
|
| 67 |
new_species = [pred[i][0]['token_str'] for i in range(len(pred))]
|
|
|
|
| 4 |
import requests
|
| 5 |
from bs4 import BeautifulSoup
|
| 6 |
|
|
|
|
| 7 |
classification_model = pipeline("text-classification", model="CesarLeblanc/test_model")
|
| 8 |
mask_model = pipeline("fill-mask", model="CesarLeblanc/fill_mask_model")
|
| 9 |
|
|
|
|
| 51 |
image = gr.Image(value=image_url)
|
| 52 |
return image
|
| 53 |
|
| 54 |
+
def gbif_normalization(text):
|
| 55 |
+
base = "https://api.gbif.org/v1"
|
| 56 |
+
api = "species"
|
| 57 |
+
function = "match"
|
| 58 |
+
parameter = "name"
|
| 59 |
+
url = f"{base}/{api}/{function}?{parameter}="
|
| 60 |
+
all_species = text.split(',')
|
| 61 |
+
all_species = [species.strip() for species in all_species]
|
| 62 |
+
species_gbif = []
|
| 63 |
+
for species in all_species:
|
| 64 |
+
url = url.replace(url.partition('name')[2], f'={species}')
|
| 65 |
+
r = requests.get(url)
|
| 66 |
+
r = r.json()
|
| 67 |
+
if 'species' in r:
|
| 68 |
+
r = r["species"]
|
| 69 |
+
else:
|
| 70 |
+
r = species
|
| 71 |
+
species_gbif.append(r)
|
| 72 |
+
text = ", ".join(species_gbif)
|
| 73 |
+
text = text.lower()
|
| 74 |
+
return text
|
| 75 |
+
|
| 76 |
def classification(text, typology, confidence):
|
| 77 |
+
text = gbif_normalization(text)
|
| 78 |
result = classification_model(text)
|
| 79 |
habitat_label = result[0]['label']
|
| 80 |
habitat_label = dataset['train'].features['label'].names[int(habitat_label.split('_')[1])]
|
|
|
|
| 84 |
return formatted_output, image_output
|
| 85 |
|
| 86 |
def masking(text):
|
| 87 |
+
text = gbif_normalization(text)
|
| 88 |
masked_text = text + ', [MASK] [MASK]'
|
| 89 |
pred = mask_model(masked_text, top_k=1)
|
| 90 |
new_species = [pred[i][0]['token_str'] for i in range(len(pred))]
|