Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import snapshot_download | |
| import sys | |
| import os | |
| # Download the model | |
| repo_path = snapshot_download("Muhsabrys/AMWAL_ArFinNER") | |
| sys.path.append(repo_path) | |
| # Import the model | |
| from amwal import load_ner | |
| # Load the NER model | |
| ner = load_ner(local_path=repo_path) | |
| # Define entity colors for visualization | |
| ENTITY_COLORS = { | |
| "BANK": "#FF6B6B", | |
| "ORGANIZATION": "#4ECDC4", | |
| "CURRENCY": "#FFD93D", | |
| "FINANCIAL_INSTRUMENT": "#95E1D3", | |
| "COUNTRY": "#F38181", | |
| "CITY": "#AA96DA", | |
| "NATIONALITY": "#FCBAD3", | |
| "EVENT": "#A8E6CF", | |
| "EVENTS": "#A8E6CF", | |
| "TIME": "#FFD3B6", | |
| "QUNATITY_OR_UNIT": "#FFAAA5", | |
| "PRODUCT_OR_SERVICE": "#FF8B94", | |
| "PERSON": "#C7CEEA", | |
| "LAW": "#B4F8C8", | |
| "DATE": "#FBE7C6", | |
| } | |
| def process_text(text): | |
| """Process Arabic financial text and extract entities""" | |
| if not text or not text.strip(): | |
| return "Please enter some Arabic financial text.", "" | |
| try: | |
| # Get predictions | |
| result = ner(text) | |
| entities = result.get("entities", []) | |
| if not entities: | |
| return "No entities found in the text.", "" | |
| # Create highlighted text with HTML | |
| highlighted_html = create_highlighted_html(text, entities) | |
| # Format entity information | |
| entity_info = format_entity_info(entities) | |
| return highlighted_html, entity_info | |
| except Exception as e: | |
| return f"Error processing text: {str(e)}", "" | |
| def create_highlighted_html(text, entities): | |
| """Create HTML with highlighted entities""" | |
| if not entities: | |
| return text | |
| # Sort entities by start position in reverse order | |
| sorted_entities = sorted(entities, key=lambda x: x['start'], reverse=True) | |
| # Process text from end to start to maintain correct positions | |
| result_text = text | |
| for entity in sorted_entities: | |
| start = entity['start'] | |
| end = entity['end'] | |
| entity_text = entity['text'] | |
| label = entity['label'] | |
| color = ENTITY_COLORS.get(label, "#CCCCCC") | |
| # Create highlighted span | |
| highlighted = f'<mark style="background-color: {color}; padding: 2px 4px; border-radius: 3px; margin: 0 2px;" title="{label}">{entity_text}</mark>' | |
| # Replace in text | |
| result_text = result_text[:start] + highlighted + result_text[end:] | |
| # Wrap in RTL div for Arabic text | |
| html = f'<div dir="rtl" style="font-size: 18px; line-height: 2; padding: 15px; background-color: #f9f9f9; border-radius: 8px; font-family: \'Arial\', sans-serif;">{result_text}</div>' | |
| return html | |
| def format_entity_info(entities): | |
| """Format entity information as a readable string""" | |
| if not entities: | |
| return "No entities detected." | |
| info = "### Detected Entities\n\n" | |
| # Group entities by type | |
| entity_groups = {} | |
| for entity in entities: | |
| label = entity['label'] | |
| if label not in entity_groups: | |
| entity_groups[label] = [] | |
| entity_groups[label].append(entity['text']) | |
| # Format grouped entities | |
| for label, texts in sorted(entity_groups.items()): | |
| color = ENTITY_COLORS.get(label, "#CCCCCC") | |
| info += f"\n**{label}** ({len(texts)}): " | |
| # Show unique entities | |
| unique_texts = list(dict.fromkeys(texts)) # Preserve order while removing duplicates | |
| info += ", ".join(unique_texts) | |
| info += "\n" | |
| return info | |
| # Example texts in Arabic | |
| examples = [ | |
| ["يطرح البنك المركزي المصري، بعد غد، سندات خزانة ثابتة ومتغيرة العائد بقيمة 45 مليار جنيه"], | |
| ["الصادرات البترولية المصرية ترتفع إلى 3.6 مليار دولار خلال 9 أشهر"], | |
| ["أعلن بنك الإمارات دبي الوطني عن زيادة رأس المال إلى 500 مليون درهم"], | |
| ["ارتفع سعر صرف الدولار الأمريكي مقابل الجنيه المصري في البورصة المصرية"], | |
| ] | |
| # Create Gradio interface | |
| with gr.Blocks(theme=gr.themes.Soft(), title="AMWAL: Arabic Financial NER") as demo: | |
| gr.Markdown( | |
| """ | |
| # 💰 AMWAL: Arabic Financial Named Entity Recognition | |
| Extract financial entities from Arabic text using AMWAL, a specialized spaCy-based NER system. | |
| **Supported Entity Types:** BANK, ORGANIZATION, CURRENCY, FINANCIAL_INSTRUMENT, COUNTRY, CITY, | |
| NATIONALITY, EVENT, TIME, QUANTITY_OR_UNIT, PRODUCT_OR_SERVICE, and more. | |
| --- | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_text = gr.Textbox( | |
| label="Arabic Financial Text", | |
| placeholder="أدخل النص المالي العربي هنا...", | |
| lines=8, | |
| rtl=True | |
| ) | |
| submit_btn = gr.Button("🔍 Extract Entities", variant="primary", size="lg") | |
| gr.Examples( | |
| examples=examples, | |
| inputs=input_text, | |
| label="Example Texts" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| output_html = gr.HTML(label="Highlighted Text") | |
| output_info = gr.Markdown(label="Entity Information") | |
| # Add legend | |
| gr.Markdown( | |
| """ | |
| --- | |
| ### 📊 Entity Color Legend | |
| <div style="display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 10px; margin-top: 10px;"> | |
| <div><mark style="background-color: #FF6B6B; padding: 2px 8px; border-radius: 3px;">BANK</mark></div> | |
| <div><mark style="background-color: #4ECDC4; padding: 2px 8px; border-radius: 3px;">ORGANIZATION</mark></div> | |
| <div><mark style="background-color: #FFD93D; padding: 2px 8px; border-radius: 3px;">CURRENCY</mark></div> | |
| <div><mark style="background-color: #95E1D3; padding: 2px 8px; border-radius: 3px;">FINANCIAL_INSTRUMENT</mark></div> | |
| <div><mark style="background-color: #F38181; padding: 2px 8px; border-radius: 3px;">COUNTRY</mark></div> | |
| <div><mark style="background-color: #AA96DA; padding: 2px 8px; border-radius: 3px;">CITY</mark></div> | |
| <div><mark style="background-color: #FCBAD3; padding: 2px 8px; border-radius: 3px;">NATIONALITY</mark></div> | |
| <div><mark style="background-color: #A8E6CF; padding: 2px 8px; border-radius: 3px;">EVENT</mark></div> | |
| </div> | |
| --- | |
| ### 📖 About AMWAL | |
| AMWAL is a spaCy-based Named Entity Recognition system designed for extracting financial entities from Arabic text. | |
| It was trained on a specialized corpus of Arabic financial news from 2000-2023 and achieves 96% F1-score. | |
| **Paper:** [AMWAL: Named Entity Recognition for Arabic Financial News](https://aclanthology.org/2025.finnlp-1.20) (FinNLP @ COLING 2025) | |
| **Authors:** Muhammad S. Abdo, Yash Hatekar, Damir Cavar | |
| **Model:** [Muhsabrys/AMWAL_ArFinNER](https://huggingface.co/Muhsabrys/AMWAL_ArFinNER) | |
| """ | |
| ) | |
| # Connect the button to the processing function | |
| submit_btn.click( | |
| fn=process_text, | |
| inputs=input_text, | |
| outputs=[output_html, output_info] | |
| ) | |
| # Also allow Enter key to trigger processing | |
| input_text.submit( | |
| fn=process_text, | |
| inputs=input_text, | |
| outputs=[output_html, output_info] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() |