CesarLeblanc commited on
Commit
d34103c
·
verified ·
1 Parent(s): b1677f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -22
app.py CHANGED
@@ -1,14 +1,13 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  import requests
 
4
  from bs4 import BeautifulSoup
5
  import pandas as pd
6
 
7
- # Initialize models
8
  classification_model = pipeline("text-classification", model="models/text_classification_model", tokenizer="models/text_classification_model", top_k=5)
9
- mask_model = pipeline("fill-mask", model="models/fill_mask_model", tokenizer="models/fill_mask_model", top_k=100)
10
 
11
- # Load data
12
  eunis_habitats = pd.read_excel('data/eunis_habitats.xlsx')
13
 
14
  def return_habitat_image(habitat_label):
@@ -20,11 +19,9 @@ def return_habitat_image(habitat_label):
20
  if img_tag:
21
  image_url = img_tag['src']
22
  else:
23
- image_url = "https://www.salonlfc.com/wp-content/uploads/2018/01/image-not-found-scaled-1150x647.png"
24
  else:
25
- image_url = "https://www.salonlfc.com/wp-content/uploads/2018/01/image-not-found-scaled-1150x647.png"
26
- #image_url = "https://www.commissionoceanindien.org/wp-content/uploads/2018/07/plantnet.jpg" # While we don't have the rights
27
- #image_url = "https://files.ibot.cas.cz/cevs/images/syntaxa/large/Rorippo-Phalaridetum_arundinaceae2.jpg" # 800-600 for Q51
28
  image = gr.Image(value=image_url)
29
  return image
30
 
@@ -38,29 +35,20 @@ def return_species_image(species):
38
  if img_tag:
39
  image_url = img_tag['src']
40
  else:
41
- image_url = "https://www.salonlfc.com/wp-content/uploads/2018/01/image-not-found-scaled-1150x647.png"
42
  else:
43
- image_url = "https://www.salonlfc.com/wp-content/uploads/2018/01/image-not-found-scaled-1150x647.png"
44
- #image_url = "https://www.commissionoceanindien.org/wp-content/uploads/2018/07/plantnet.jpg" # While we don't have the rights
45
- #image_url = "https://files.ibot.cas.cz/cevs/images/taxa/large/Eryngium_maritimum18.jpg" # 1600-1200 for Q51 for eryngium maritimum
46
  image = gr.Image(value=image_url)
47
  return image
48
 
49
  def gbif_normalization(text):
50
- base = "https://api.gbif.org/v1"
51
- api = "species"
52
- function = "match"
53
- parameter = "name"
54
- url = f"{base}/{api}/{function}?{parameter}="
55
  all_species = text.split(',')
56
  all_species = [species.strip() for species in all_species]
57
  species_gbif = []
58
  for species in all_species:
59
- url = url.replace(url.partition('name')[2], f'={species}')
60
- r = requests.get(url)
61
- r = r.json()
62
- if 'species' in r:
63
- r = r["species"]
64
  else:
65
  r = species
66
  species_gbif.append(r)
@@ -103,7 +91,7 @@ def masking(text, k):
103
 
104
  j = 0
105
  while True:
106
- prediction = mask_model(masked_text)[j]
107
  species = prediction['token_str']
108
  if species in text_split or species in best_predictions:
109
  j += 1
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import requests
4
+ import pygbif
5
  from bs4 import BeautifulSoup
6
  import pandas as pd
7
 
 
8
  classification_model = pipeline("text-classification", model="models/text_classification_model", tokenizer="models/text_classification_model", top_k=5)
9
+ masking_model = pipeline("fill-mask", model="models/fill_mask_model", tokenizer="models/fill_mask_model", top_k=100)
10
 
 
11
  eunis_habitats = pd.read_excel('data/eunis_habitats.xlsx')
12
 
13
  def return_habitat_image(habitat_label):
 
19
  if img_tag:
20
  image_url = img_tag['src']
21
  else:
22
+ image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/a/ac/No_image_available.svg/2048px-No_image_available.svg.png"
23
  else:
24
+ image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/a/ac/No_image_available.svg/2048px-No_image_available.svg.png"
 
 
25
  image = gr.Image(value=image_url)
26
  return image
27
 
 
35
  if img_tag:
36
  image_url = img_tag['src']
37
  else:
38
+ image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/a/ac/No_image_available.svg/2048px-No_image_available.svg.png"
39
  else:
40
+ image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/a/ac/No_image_available.svg/2048px-No_image_available.svg.png"
 
 
41
  image = gr.Image(value=image_url)
42
  return image
43
 
44
  def gbif_normalization(text):
 
 
 
 
 
45
  all_species = text.split(',')
46
  all_species = [species.strip() for species in all_species]
47
  species_gbif = []
48
  for species in all_species:
49
+ gbif_match_result = pygbif.species.name_backbone(species, taxonRank="SPECIES")
50
+ if 'usage' in gbif_match_result:
51
+ r = gbif_match_result["usage"]["canonicalName"]
 
 
52
  else:
53
  r = species
54
  species_gbif.append(r)
 
91
 
92
  j = 0
93
  while True:
94
+ prediction = masking_model(masked_text)[j]
95
  species = prediction['token_str']
96
  if species in text_split or species in best_predictions:
97
  j += 1