import os import gradio as gr from transformers import pipeline auth_token = os.environ.get("CLARIN_KNEXT") from transformers import AutoModelForTokenClassification, AutoTokenizer tokenizer = AutoTokenizer.from_pretrained('clarin-knext/sdadas-polish-roberta-large-v2-kpwr_and_cen-25-2e-05', use_auth_token=auth_token) model = AutoModelForTokenClassification.from_pretrained('clarin-knext/sdadas-polish-roberta-large-v2-kpwr_and_cen-25-2e-05', use_auth_token=auth_token) pipe = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy='first') def predict(text): return { 'text': text, 'entities': [ { 'entity': entity['entity_group'], 'start': entity['start'], 'end': entity['end'] } for entity in pipe(text) ] } with gr.Blocks(title='Clarin WiNER in HF Ecosystem Demo') as demo: gr.Markdown('# Clarin WiNER in HF Ecosystem Demo') with gr.Row(): text_input = gr.Textbox(label='Input text', value='Clarin to świetna firma. Jej główna siedziba mieści się we Wrocławiu, na Dolnym Śląsku.') nered_text = gr.Highlightedtext(label='NERed text') with gr.Row(): ner_button = gr.Button('WiNER this!') ner_button.click(fn=predict, inputs=text_input, outputs=nered_text) demo.queue(concurrency_count=3) demo.launch()