import gradio as gr from huggingface_hub import snapshot_download import sys import os # Download the model repo_path = snapshot_download("Muhsabrys/AMWAL_ArFinNER") sys.path.append(repo_path) # Import the model from amwal import load_ner # Load the NER model ner = load_ner(local_path=repo_path) # Define entity colors for visualization ENTITY_COLORS = { "BANK": "#FF6B6B", "ORGANIZATION": "#4ECDC4", "CURRENCY": "#FFD93D", "FINANCIAL_INSTRUMENT": "#95E1D3", "COUNTRY": "#F38181", "CITY": "#AA96DA", "NATIONALITY": "#FCBAD3", "EVENT": "#A8E6CF", "EVENTS": "#A8E6CF", "TIME": "#FFD3B6", "QUNATITY_OR_UNIT": "#FFAAA5", "PRODUCT_OR_SERVICE": "#FF8B94", "PERSON": "#C7CEEA", "LAW": "#B4F8C8", "DATE": "#FBE7C6", } def process_text(text): """Process Arabic financial text and extract entities""" if not text or not text.strip(): return "Please enter some Arabic financial text.", "" try: # Get predictions result = ner(text) entities = result.get("entities", []) if not entities: return "No entities found in the text.", "" # Create highlighted text with HTML highlighted_html = create_highlighted_html(text, entities) # Format entity information entity_info = format_entity_info(entities) return highlighted_html, entity_info except Exception as e: return f"Error processing text: {str(e)}", "" def create_highlighted_html(text, entities): """Create HTML with highlighted entities""" if not entities: return text # Sort entities by start position in reverse order sorted_entities = sorted(entities, key=lambda x: x['start'], reverse=True) # Process text from end to start to maintain correct positions result_text = text for entity in sorted_entities: start = entity['start'] end = entity['end'] entity_text = entity['text'] label = entity['label'] color = ENTITY_COLORS.get(label, "#CCCCCC") # Create highlighted span highlighted = f'{entity_text}' # Replace in text result_text = result_text[:start] + highlighted + result_text[end:] # Wrap in RTL div for Arabic text html = f'
{result_text}
' return html def format_entity_info(entities): """Format entity information as a readable string""" if not entities: return "No entities detected." info = "### Detected Entities\n\n" # Group entities by type entity_groups = {} for entity in entities: label = entity['label'] if label not in entity_groups: entity_groups[label] = [] entity_groups[label].append(entity['text']) # Format grouped entities for label, texts in sorted(entity_groups.items()): color = ENTITY_COLORS.get(label, "#CCCCCC") info += f"\n**{label}** ({len(texts)}): " # Show unique entities unique_texts = list(dict.fromkeys(texts)) # Preserve order while removing duplicates info += ", ".join(unique_texts) info += "\n" return info # Example texts in Arabic examples = [ ["يطرح البنك المركزي المصري، بعد غد، سندات خزانة ثابتة ومتغيرة العائد بقيمة 45 مليار جنيه"], ["الصادرات البترولية المصرية ترتفع إلى 3.6 مليار دولار خلال 9 أشهر"], ["أعلن بنك الإمارات دبي الوطني عن زيادة رأس المال إلى 500 مليون درهم"], ["ارتفع سعر صرف الدولار الأمريكي مقابل الجنيه المصري في البورصة المصرية"], ] # Create Gradio interface with gr.Blocks(theme=gr.themes.Soft(), title="AMWAL: Arabic Financial NER") as demo: gr.Markdown( """ # 💰 AMWAL: Arabic Financial Named Entity Recognition Extract financial entities from Arabic text using AMWAL, a specialized spaCy-based NER system. **Supported Entity Types:** BANK, ORGANIZATION, CURRENCY, FINANCIAL_INSTRUMENT, COUNTRY, CITY, NATIONALITY, EVENT, TIME, QUANTITY_OR_UNIT, PRODUCT_OR_SERVICE, and more. --- """ ) with gr.Row(): with gr.Column(scale=1): input_text = gr.Textbox( label="Arabic Financial Text", placeholder="أدخل النص المالي العربي هنا...", lines=8, rtl=True ) submit_btn = gr.Button("🔍 Extract Entities", variant="primary", size="lg") gr.Examples( examples=examples, inputs=input_text, label="Example Texts" ) with gr.Row(): with gr.Column(scale=1): output_html = gr.HTML(label="Highlighted Text") output_info = gr.Markdown(label="Entity Information") # Add legend gr.Markdown( """ --- ### 📊 Entity Color Legend
BANK
ORGANIZATION
CURRENCY
FINANCIAL_INSTRUMENT
COUNTRY
CITY
NATIONALITY
EVENT
--- ### 📖 About AMWAL AMWAL is a spaCy-based Named Entity Recognition system designed for extracting financial entities from Arabic text. It was trained on a specialized corpus of Arabic financial news from 2000-2023 and achieves 96% F1-score. **Paper:** [AMWAL: Named Entity Recognition for Arabic Financial News](https://aclanthology.org/2025.finnlp-1.20) (FinNLP @ COLING 2025) **Authors:** Muhammad S. Abdo, Yash Hatekar, Damir Cavar **Model:** [Muhsabrys/AMWAL_ArFinNER](https://huggingface.co/Muhsabrys/AMWAL_ArFinNER) """ ) # Connect the button to the processing function submit_btn.click( fn=process_text, inputs=input_text, outputs=[output_html, output_info] ) # Also allow Enter key to trigger processing input_text.submit( fn=process_text, inputs=input_text, outputs=[output_html, output_info] ) # Launch the app if __name__ == "__main__": demo.launch()