Muhsabrys's picture
Create Gradio app for Arabic Financial NER
e2b111c verified
import gradio as gr
from huggingface_hub import snapshot_download
import sys
import os
# Download the model
repo_path = snapshot_download("Muhsabrys/AMWAL_ArFinNER")
sys.path.append(repo_path)
# Import the model
from amwal import load_ner
# Load the NER model
ner = load_ner(local_path=repo_path)
# Define entity colors for visualization
ENTITY_COLORS = {
"BANK": "#FF6B6B",
"ORGANIZATION": "#4ECDC4",
"CURRENCY": "#FFD93D",
"FINANCIAL_INSTRUMENT": "#95E1D3",
"COUNTRY": "#F38181",
"CITY": "#AA96DA",
"NATIONALITY": "#FCBAD3",
"EVENT": "#A8E6CF",
"EVENTS": "#A8E6CF",
"TIME": "#FFD3B6",
"QUNATITY_OR_UNIT": "#FFAAA5",
"PRODUCT_OR_SERVICE": "#FF8B94",
"PERSON": "#C7CEEA",
"LAW": "#B4F8C8",
"DATE": "#FBE7C6",
}
def process_text(text):
"""Process Arabic financial text and extract entities"""
if not text or not text.strip():
return "Please enter some Arabic financial text.", ""
try:
# Get predictions
result = ner(text)
entities = result.get("entities", [])
if not entities:
return "No entities found in the text.", ""
# Create highlighted text with HTML
highlighted_html = create_highlighted_html(text, entities)
# Format entity information
entity_info = format_entity_info(entities)
return highlighted_html, entity_info
except Exception as e:
return f"Error processing text: {str(e)}", ""
def create_highlighted_html(text, entities):
"""Create HTML with highlighted entities"""
if not entities:
return text
# Sort entities by start position in reverse order
sorted_entities = sorted(entities, key=lambda x: x['start'], reverse=True)
# Process text from end to start to maintain correct positions
result_text = text
for entity in sorted_entities:
start = entity['start']
end = entity['end']
entity_text = entity['text']
label = entity['label']
color = ENTITY_COLORS.get(label, "#CCCCCC")
# Create highlighted span
highlighted = f'<mark style="background-color: {color}; padding: 2px 4px; border-radius: 3px; margin: 0 2px;" title="{label}">{entity_text}</mark>'
# Replace in text
result_text = result_text[:start] + highlighted + result_text[end:]
# Wrap in RTL div for Arabic text
html = f'<div dir="rtl" style="font-size: 18px; line-height: 2; padding: 15px; background-color: #f9f9f9; border-radius: 8px; font-family: \'Arial\', sans-serif;">{result_text}</div>'
return html
def format_entity_info(entities):
"""Format entity information as a readable string"""
if not entities:
return "No entities detected."
info = "### Detected Entities\n\n"
# Group entities by type
entity_groups = {}
for entity in entities:
label = entity['label']
if label not in entity_groups:
entity_groups[label] = []
entity_groups[label].append(entity['text'])
# Format grouped entities
for label, texts in sorted(entity_groups.items()):
color = ENTITY_COLORS.get(label, "#CCCCCC")
info += f"\n**{label}** ({len(texts)}): "
# Show unique entities
unique_texts = list(dict.fromkeys(texts)) # Preserve order while removing duplicates
info += ", ".join(unique_texts)
info += "\n"
return info
# Example texts in Arabic
examples = [
["يطرح البنك المركزي المصري، بعد غد، سندات خزانة ثابتة ومتغيرة العائد بقيمة 45 مليار جنيه"],
["الصادرات البترولية المصرية ترتفع إلى 3.6 مليار دولار خلال 9 أشهر"],
["أعلن بنك الإمارات دبي الوطني عن زيادة رأس المال إلى 500 مليون درهم"],
["ارتفع سعر صرف الدولار الأمريكي مقابل الجنيه المصري في البورصة المصرية"],
]
# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), title="AMWAL: Arabic Financial NER") as demo:
gr.Markdown(
"""
# 💰 AMWAL: Arabic Financial Named Entity Recognition
Extract financial entities from Arabic text using AMWAL, a specialized spaCy-based NER system.
**Supported Entity Types:** BANK, ORGANIZATION, CURRENCY, FINANCIAL_INSTRUMENT, COUNTRY, CITY,
NATIONALITY, EVENT, TIME, QUANTITY_OR_UNIT, PRODUCT_OR_SERVICE, and more.
---
"""
)
with gr.Row():
with gr.Column(scale=1):
input_text = gr.Textbox(
label="Arabic Financial Text",
placeholder="أدخل النص المالي العربي هنا...",
lines=8,
rtl=True
)
submit_btn = gr.Button("🔍 Extract Entities", variant="primary", size="lg")
gr.Examples(
examples=examples,
inputs=input_text,
label="Example Texts"
)
with gr.Row():
with gr.Column(scale=1):
output_html = gr.HTML(label="Highlighted Text")
output_info = gr.Markdown(label="Entity Information")
# Add legend
gr.Markdown(
"""
---
### 📊 Entity Color Legend
<div style="display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 10px; margin-top: 10px;">
<div><mark style="background-color: #FF6B6B; padding: 2px 8px; border-radius: 3px;">BANK</mark></div>
<div><mark style="background-color: #4ECDC4; padding: 2px 8px; border-radius: 3px;">ORGANIZATION</mark></div>
<div><mark style="background-color: #FFD93D; padding: 2px 8px; border-radius: 3px;">CURRENCY</mark></div>
<div><mark style="background-color: #95E1D3; padding: 2px 8px; border-radius: 3px;">FINANCIAL_INSTRUMENT</mark></div>
<div><mark style="background-color: #F38181; padding: 2px 8px; border-radius: 3px;">COUNTRY</mark></div>
<div><mark style="background-color: #AA96DA; padding: 2px 8px; border-radius: 3px;">CITY</mark></div>
<div><mark style="background-color: #FCBAD3; padding: 2px 8px; border-radius: 3px;">NATIONALITY</mark></div>
<div><mark style="background-color: #A8E6CF; padding: 2px 8px; border-radius: 3px;">EVENT</mark></div>
</div>
---
### 📖 About AMWAL
AMWAL is a spaCy-based Named Entity Recognition system designed for extracting financial entities from Arabic text.
It was trained on a specialized corpus of Arabic financial news from 2000-2023 and achieves 96% F1-score.
**Paper:** [AMWAL: Named Entity Recognition for Arabic Financial News](https://aclanthology.org/2025.finnlp-1.20) (FinNLP @ COLING 2025)
**Authors:** Muhammad S. Abdo, Yash Hatekar, Damir Cavar
**Model:** [Muhsabrys/AMWAL_ArFinNER](https://huggingface.co/Muhsabrys/AMWAL_ArFinNER)
"""
)
# Connect the button to the processing function
submit_btn.click(
fn=process_text,
inputs=input_text,
outputs=[output_html, output_info]
)
# Also allow Enter key to trigger processing
input_text.submit(
fn=process_text,
inputs=input_text,
outputs=[output_html, output_info]
)
# Launch the app
if __name__ == "__main__":
demo.launch()