Ludovic Moncla
Update app.py
191af2e
raw
history blame
2.03 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
# Load your fine-tuned CamemBERT NER model
model_name = "GEODE/camembert-base-edda-span-classification"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)
ner_pipeline = pipeline("token-classification", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
def extract_coordinates(text):
# Run NER
entities = ner_pipeline(text)
# Collect coordinate entities
coords_text = [ent['word'] for ent in entities if ent['entity_group'] == 'Latlong']
if coords_text:
return "\n".join(coords_text)
else:
return "No coordinates found"
examples = [
"L’adresse est 48.8584, 2.2945 près de la Tour Eiffel.",
"La latitude est 40.7128 et la longitude est -74.0060 pour New York.",
"Les coordonnées du Colisée sont 41.8902, 12.4922.",
"À Rio de Janeiro : -22.9068, -43.1729.",
"Sydney se situe à -33.8688, 151.2093.",
"Le Mont Fuji se trouve à 35.3606, 138.7274."
]
with gr.Blocks() as demo:
gr.Markdown("## Geographic Coordinate Extractor (CamemBERT NER Demo)")
with gr.Row():
with gr.Column():
inp = gr.Textbox(
label="Enter text",
placeholder="e.g. * AACH ou ACH, s. f. petite ville d'Allemagne dans le cercle de Souabe, près de la source de l'Aach. Long. 26. 57. lat. 47. 55.",
lines=3
)
run_btn = gr.Button("Extract & Show")
run_btn.click(fn=extract_coordinates, inputs=inp, outputs=out_text)
with gr.Column():
out_text = gr.Textbox(label="Extracted coordinates")
#out_map = gr.Map(label="Location on Map")
with gr.Row():
gr.Examples(
examples=examples,
inputs=inp,
label="Examples"
)
# Launch
if __name__ == "__main__":
demo.launch()