import gradio as gr from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline # Load your fine-tuned CamemBERT NER model model_name = "GEODE/camembert-base-edda-span-classification" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForTokenClassification.from_pretrained(model_name) ner_pipeline = pipeline("token-classification", model=model, tokenizer=tokenizer, aggregation_strategy="simple") def extract_coordinates(text): # Run NER entities = ner_pipeline(text) # Collect coordinate entities coords_text = [ent['word'] for ent in entities if ent['entity_group'] == 'Latlong'] if coords_text: return "\n".join(coords_text) else: return "No coordinates found" examples = [ "L’adresse est 48.8584, 2.2945 près de la Tour Eiffel.", "La latitude est 40.7128 et la longitude est -74.0060 pour New York.", "Les coordonnées du Colisée sont 41.8902, 12.4922.", "À Rio de Janeiro : -22.9068, -43.1729.", "Sydney se situe à -33.8688, 151.2093.", "Le Mont Fuji se trouve à 35.3606, 138.7274." ] with gr.Blocks() as demo: gr.Markdown("## Geographic Coordinate Extractor (CamemBERT NER Demo)") with gr.Row(): with gr.Column(): inp = gr.Textbox( label="Enter text", placeholder="e.g. * AACH ou ACH, s. f. petite ville d'Allemagne dans le cercle de Souabe, près de la source de l'Aach. Long. 26. 57. lat. 47. 55.", lines=3 ) run_btn = gr.Button("Extract & Show") run_btn.click(fn=extract_coordinates, inputs=inp, outputs=out_text) with gr.Column(): out_text = gr.Textbox(label="Extracted coordinates") #out_map = gr.Map(label="Location on Map") with gr.Row(): gr.Examples( examples=examples, inputs=inp, label="Examples" ) # Launch if __name__ == "__main__": demo.launch()