Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline | |
| import torch | |
| # Set device to CPU explicitly | |
| device = "cpu" | |
| # Load the model and tokenizer | |
| model_name = "HooshvareLab/bert-base-parsbert-ner-uncased" | |
| print("Loading model and tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForTokenClassification.from_pretrained(model_name) | |
| model.to(device) | |
| # Create NER pipeline | |
| ner_pipeline = pipeline( | |
| "ner", | |
| model=model, | |
| tokenizer=tokenizer, | |
| device=-1, # -1 means CPU | |
| aggregation_strategy="simple" # Groups entities together | |
| ) | |
| # Label mapping for better readability | |
| label_colors = { | |
| "B-PER": "#FF6B6B", # Person - Red | |
| "I-PER": "#FFB3B3", # Person continuation - Light Red | |
| "B-ORG": "#4ECDC4", # Organization - Teal | |
| "I-ORG": "#A7E9E4", # Organization continuation - Light Teal | |
| "B-LOC": "#95E1D3", # Location - Green | |
| "I-LOC": "#C7F0E8", # Location continuation - Light Green | |
| "B-DAT": "#FFA07A", # Date - Orange | |
| "I-DAT": "#FFDAB9", # Date continuation - Light Orange | |
| "B-TIM": "#DDA0DD", # Time - Purple | |
| "I-TIM": "#E6D0E6", # Time continuation - Light Purple | |
| "B-MON": "#FFD700", # Money - Gold | |
| "I-MON": "#FFEB99", # Money continuation - Light Gold | |
| "B-PCT": "#87CEEB", # Percent - Sky Blue | |
| "I-PCT": "#B3DFEF", # Percent continuation - Light Sky Blue | |
| } | |
| label_names = { | |
| "PER": "شخص (Person)", | |
| "ORG": "سازمان (Organization)", | |
| "LOC": "مکان (Location)", | |
| "DAT": "تاریخ (Date)", | |
| "TIM": "زمان (Time)", | |
| "MON": "پول (Money)", | |
| "PCT": "درصد (Percent)", | |
| } | |
| def highlight_entities(text, entities): | |
| """Create HTML with highlighted entities""" | |
| if not entities: | |
| return text | |
| # Sort entities by start position (reverse order to replace from end to start) | |
| entities_sorted = sorted(entities, key=lambda x: x['start'], reverse=True) | |
| result = text | |
| for entity in entities_sorted: | |
| start = entity['start'] | |
| end = entity['end'] | |
| label = entity['entity_group'] | |
| word = text[start:end] | |
| score = entity['score'] | |
| # Get color for this label | |
| color = label_colors.get(f"B-{label}", "#CCCCCC") | |
| # Create highlighted span | |
| highlighted = f'<span style="background-color: {color}; padding: 2px 6px; border-radius: 3px; margin: 0 2px; display: inline-block;" title="{label} (confidence: {score:.2f})">{word} <sup style="font-size: 0.7em; font-weight: bold;">[{label}]</sup></span>' | |
| result = result[:start] + highlighted + result[end:] | |
| return result | |
| def perform_ner(text): | |
| """Perform NER on input text""" | |
| if not text.strip(): | |
| return "<p style='color: red;'>لطفا متن فارسی وارد کنید (Please enter Persian text)</p>", "" | |
| try: | |
| # Perform NER | |
| entities = ner_pipeline(text) | |
| # Create highlighted version | |
| highlighted_html = f"<div style='direction: rtl; text-align: right; font-size: 18px; line-height: 2; padding: 20px; border: 1px solid #ddd; border-radius: 5px; background-color: #f9f9f9;'>{highlight_entities(text, entities)}</div>" | |
| # Create entities table | |
| if entities: | |
| entity_info = "### موجودیتهای شناسایی شده (Detected Entities):\n\n" | |
| entity_info += "| کلمه (Word) | نوع (Type) | اطمینان (Confidence) |\n" | |
| entity_info += "|------------|-----------|---------------------|\n" | |
| for ent in entities: | |
| label_fa = label_names.get(ent['entity_group'], ent['entity_group']) | |
| entity_info += f"| {ent['word']} | {label_fa} | {ent['score']:.2%} |\n" | |
| else: | |
| entity_info = "هیچ موجودیتی شناسایی نشد (No entities detected)" | |
| return highlighted_html, entity_info | |
| except Exception as e: | |
| return f"<p style='color: red;'>خطا (Error): {str(e)}</p>", "" | |
| # Example texts | |
| examples = [ | |
| ["باراک اوباما در هاوایی متولد شد و در شیکاگو زندگی میکرد."], | |
| ["شرکت گوگل در کالیفرنیا واقع شده است."], | |
| ["رضا در تهران در تاریخ ۱۵ خرداد ۱۳۸۰ متولد شد."], | |
| ["دانشگاه تهران یکی از قدیمیترین دانشگاههای ایران است."], | |
| ["علی و حسین به همراه مریم به مشهد سفر کردند."], | |
| ] | |
| # Create Gradio interface | |
| with gr.Blocks(title="Persian NER - شناسایی موجودیتهای نامدار فارسی", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # 🇮🇷 Persian Named Entity Recognition | |
| # شناسایی موجودیتهای نامدار فارسی | |
| این سیستم موجودیتهای نامدار مانند اسامی اشخاص، سازمانها، مکانها، تاریخها و ... را در متن فارسی شناسایی میکند. | |
| This system identifies named entities such as person names, organizations, locations, dates, etc. in Persian text. | |
| **Model:** ParsBERT-NER (HooshvareLab) | |
| **Running on:** CPU (may be slow for long texts) | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_text = gr.Textbox( | |
| label="متن فارسی خود را وارد کنید (Enter Persian Text)", | |
| placeholder="مثال: رضا در تهران زندگی میکند...", | |
| lines=5, | |
| rtl=True | |
| ) | |
| submit_btn = gr.Button("🔍 تحلیل متن (Analyze Text)", variant="primary") | |
| with gr.Column(): | |
| output_html = gr.HTML(label="متن با موجودیتهای برجسته (Text with Highlighted Entities)") | |
| output_entities = gr.Markdown(label="لیست موجودیتها (Entity List)") | |
| gr.Examples( | |
| examples=examples, | |
| inputs=input_text, | |
| label="مثالها (Examples)" | |
| ) | |
| # Legend | |
| gr.Markdown(""" | |
| ### راهنمای رنگها (Color Guide): | |
| - 🔴 **PER (شخص)**: اسامی اشخاص / Person names | |
| - 🔵 **ORG (سازمان)**: نام سازمانها / Organizations | |
| - 🟢 **LOC (مکان)**: نام مکانها / Locations | |
| - 🟠 **DAT (تاریخ)**: تاریخها / Dates | |
| - 🟣 **TIM (زمان)**: زمانها / Times | |
| - 🟡 **MON (پول)**: مقادیر پولی / Money | |
| - 🔷 **PCT (درصد)**: درصدها / Percentages | |
| """) | |
| # Event handler | |
| submit_btn.click( | |
| fn=perform_ner, | |
| inputs=input_text, | |
| outputs=[output_html, output_entities] | |
| ) | |
| input_text.submit( | |
| fn=perform_ner, | |
| inputs=input_text, | |
| outputs=[output_html, output_entities] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() |