Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline | |
| import time | |
| import torch | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| print(device) | |
| # Load the NER pipeline | |
| print('Preparing pipeline ...\n') | |
| pipe = pipeline("ner", | |
| model="seddiktrk/xlm-roberta-base-finetuned-panx-all", | |
| device=device) | |
| print('\nPipe Ready !!!') | |
| # Example texts | |
| examples = { | |
| "en": "My name is Clara and I live in Berkeley, California.", | |
| "fr": "Je m'appelle Marie et je travaille dans un café à Lyon.", | |
| "ar": "اسمي أحمد وأدرس في جامعة القاهرة.", | |
| "de": "Mein Name ist Hans und ich komme aus München.", | |
| "es": "Mi nombre es Lucía y vivo en una pequeña ciudad en México.", | |
| "it": "Mi chiamo Giulia e faccio il medico a Roma.", | |
| "pt": "Chamo-me Ana e moro em uma fazenda no Brasil.", | |
| "ru": "Меня зовут Ольга, и я живу в Санкт-Петербурге.", | |
| "jp": "私の名前は佐藤です。東京でITエンジニアとして働いています", | |
| "zh": "我叫李华,在北京的一家公司上班" | |
| } | |
| # Define colors for each entity type | |
| ENTITY_COLORS = { | |
| "PER": ("#F7D4DA", "#E31A1C"), # Light pink background, red text | |
| "ORG": ("#D4E2F4", "#2171B5"), # Light blue background, blue text | |
| "LOC": ("#E8DAEF", "#6A51A3"), # Light purple background, purple text | |
| #"MISC": ("#FFE5B4", "#FF8C00"), # Light orange background, dark orange text | |
| } | |
| def get_colored_text(text, entities): | |
| offset = 0 | |
| for entity in entities: | |
| start = entity['start'] + offset | |
| end = entity['end'] + offset | |
| label = entity['entity_group'] | |
| background_color, text_color = ENTITY_COLORS.get(label, ("#FFD700", "#FF4500")) | |
| # HTML structure for styled entity display | |
| entity_text = f''' | |
| <span style=" | |
| background-color:{background_color}; | |
| padding: 3px 5px; | |
| border-radius: 5px; | |
| margin: 0 2px; | |
| display: inline-block; | |
| "> | |
| {text[start:end]} | |
| <span style=" | |
| background-color:{text_color}; | |
| color: white; | |
| padding: 1px 5px; | |
| border-radius: 5px; | |
| margin-left: 5px; | |
| font-size: 0.85em; | |
| vertical-align: middle; | |
| "> | |
| {label} | |
| </span> | |
| </span> | |
| ''' | |
| # Replace the original text with the colored entity text | |
| text = text[:start] + entity_text + text[end:] | |
| # Update offset to adjust for the added characters in entity_text | |
| offset += len(entity_text) - (end - start) | |
| return text | |
| # Streamlit interface | |
| # Streamlit app | |
| st.title('Multilingual NER') | |
| st.markdown( | |
| """ | |
| <p style='color: grey; font-size: 0.85em;'> | |
| This application performs Named Entity Recognition (NER) across 100+ languages. | |
| The model excels in cross-lingual transfer and capable of processing text that contains multiple languages simultaneously. | |
| </p> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| st.write("### 🔠 Token Classification") | |
| # Create a two-column layout | |
| col1, col2 = st.columns([4, 1]) # Adjust column widths as needed | |
| # Dropdown in the right column | |
| with col2: | |
| selected_example = st.selectbox( | |
| 'Select an example:', | |
| list(examples.keys()), | |
| ) | |
| # Text area in the left column | |
| with col1: | |
| user_input = st.text_area('Enter your text here:', value=examples[selected_example]) | |
| # Button to compute | |
| if st.button("Compute"): | |
| with st.spinner(): | |
| start_time = time.time() | |
| # Get NER results | |
| ner_results = pipe(user_input,aggregation_strategy="simple") | |
| # Display the results | |
| colored_text = get_colored_text(user_input, ner_results) | |
| # Display the results | |
| st.markdown(colored_text, unsafe_allow_html=True) | |
| end_time = time.time() | |
| st.write(f"Inference time: {end_time - start_time:.2f} seconds") | |
| with st.expander("Show raw output"): | |
| raw_results = pipe(user_input) | |
| st.json(raw_results) | |