|
|
import gradio as gr |
|
|
from transformers import ( |
|
|
AutoTokenizer, |
|
|
AutoModelForTokenClassification, |
|
|
AutoModelForCausalLM, |
|
|
pipeline |
|
|
) |
|
|
import torch |
|
|
import pandas as pd |
|
|
import re |
|
|
from typing import List, Dict, Tuple |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
dtype = torch.float16 if torch.cuda.is_available() else torch.float32 |
|
|
|
|
|
|
|
|
print("Loading NER model...") |
|
|
ner_model_name = "HooshvareLab/bert-base-parsbert-ner-uncased" |
|
|
ner_tokenizer = AutoTokenizer.from_pretrained(ner_model_name) |
|
|
ner_model = AutoModelForTokenClassification.from_pretrained(ner_model_name) |
|
|
ner_model.to(device) |
|
|
|
|
|
|
|
|
ner_pipeline = pipeline( |
|
|
"ner", |
|
|
model=ner_model, |
|
|
tokenizer=ner_tokenizer, |
|
|
device=0 if device == "cuda" else -1, |
|
|
aggregation_strategy="simple" |
|
|
) |
|
|
|
|
|
|
|
|
print("Loading context understanding model...") |
|
|
|
|
|
context_model_name = "microsoft/phi-2" |
|
|
|
|
|
try: |
|
|
context_tokenizer = AutoTokenizer.from_pretrained(context_model_name, trust_remote_code=True) |
|
|
context_model = AutoModelForCausalLM.from_pretrained( |
|
|
context_model_name, |
|
|
torch_dtype=dtype, |
|
|
trust_remote_code=True, |
|
|
device_map="auto" if torch.cuda.is_available() else None |
|
|
) |
|
|
if device == "cpu": |
|
|
context_model = context_model.to(device) |
|
|
|
|
|
|
|
|
if context_tokenizer.pad_token is None: |
|
|
context_tokenizer.pad_token = context_tokenizer.eos_token |
|
|
|
|
|
use_llm_model = True |
|
|
print(f"Successfully loaded {context_model_name}") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Could not load Phi-2 model: {e}") |
|
|
print("Falling back to zero-shot classification model...") |
|
|
|
|
|
|
|
|
context_model_name = "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli" |
|
|
classifier = pipeline( |
|
|
"zero-shot-classification", |
|
|
model=context_model_name, |
|
|
device=0 if device == "cuda" else -1 |
|
|
) |
|
|
use_llm_model = False |
|
|
|
|
|
|
|
|
def load_stock_symbols(csv_path="symbols.csv"): |
|
|
"""Load Iranian stock market symbols from CSV file""" |
|
|
try: |
|
|
df = pd.read_csv(csv_path, encoding='utf-8') |
|
|
symbols_dict = {} |
|
|
for _, row in df.iterrows(): |
|
|
symbol = row['symbol'] |
|
|
symbols_dict[symbol] = { |
|
|
'company': row['company_name'], |
|
|
'bazaar': row['bazaar'], |
|
|
'bazaar_group': row['bazaar_group'] |
|
|
} |
|
|
return symbols_dict |
|
|
except Exception as e: |
|
|
print(f"Error loading symbols CSV: {e}") |
|
|
|
|
|
return { |
|
|
'وبصادر': {'company': 'بانک صادرات ایران', 'bazaar': 'بورس - بازار دوم', 'bazaar_group': 'بانکها و موسسات اعتباری'}, |
|
|
'فولاد': {'company': 'فولاد مبارکه اصفهان', 'bazaar': 'بورس - بازار اول', 'bazaar_group': 'فلزات اساسی'}, |
|
|
'فارس': {'company': 'پتروشیمی فارس', 'bazaar': 'بورس - بازار اول', 'bazaar_group': 'محصولات شیمیایی'}, |
|
|
'شپنا': {'company': 'پالایش نفت اصفهان', 'bazaar': 'بورس - بازار اول', 'bazaar_group': 'فرآوردههای نفتی'}, |
|
|
'خودرو': {'company': 'ایران خودرو', 'bazaar': 'بورس - بازار اول', 'bazaar_group': 'خودرو'}, |
|
|
'وبملت': {'company': 'بانک ملت', 'bazaar': 'بورس - بازار اول', 'bazaar_group': 'بانکها'}, |
|
|
'وتوسکا': {'company': 'سرمایه گذاری توسعه توکا', 'bazaar': 'بورس', 'bazaar_group': 'سرمایه گذاریها'}, |
|
|
'پی پاد': {'company': 'پرداخت الکترونیک پاسارگاد', 'bazaar': 'بورس', 'bazaar_group': 'رایانه و فعالیت های وابسته'}, |
|
|
} |
|
|
|
|
|
|
|
|
STOCK_SYMBOLS = load_stock_symbols() |
|
|
SYMBOL_NAMES = set(STOCK_SYMBOLS.keys()) |
|
|
|
|
|
|
|
|
MARKET_KEYWORDS = { |
|
|
'سهام', 'سهم', 'بورس', 'فرابورس', 'معامله', 'معاملات', 'خرید', 'فروش', |
|
|
'قیمت', 'ارزش', 'بازار', 'سرمایه', 'سرمایهگذاری', 'پرتفوی', 'نماد', |
|
|
'شاخص', 'حجم', 'عرضه', 'تقاضا', 'صف', 'نوسان', 'بازدهی', 'سود', |
|
|
'زیان', 'ریال', 'تومان', 'میلیارد', 'میلیون', 'درصد', 'رشد', 'افت' |
|
|
} |
|
|
|
|
|
def use_phi_for_disambiguation(text: str, potential_symbol: str, symbol_info: Dict) -> float: |
|
|
""" |
|
|
Use Phi-2 model to determine if a word is used as a stock symbol |
|
|
Returns confidence score (0-1) |
|
|
""" |
|
|
if not use_llm_model: |
|
|
|
|
|
return use_zero_shot_classification(text, potential_symbol, symbol_info) |
|
|
|
|
|
try: |
|
|
|
|
|
prompt = f"""Analyze this Persian text and determine if "{potential_symbol}" is used as a stock market symbol. |
|
|
|
|
|
Context: "{potential_symbol}" could be a stock symbol for {symbol_info['company']} company. |
|
|
|
|
|
Text: {text} |
|
|
|
|
|
Answer with only "STOCK" if it's a stock symbol, or "WORD" if it's a regular word: |
|
|
Answer: """ |
|
|
|
|
|
|
|
|
inputs = context_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) |
|
|
inputs = {k: v.to(device) for k, v in inputs.items()} |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = context_model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=10, |
|
|
temperature=0.1, |
|
|
do_sample=False, |
|
|
pad_token_id=context_tokenizer.eos_token_id |
|
|
) |
|
|
|
|
|
|
|
|
response = context_tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) |
|
|
response = response.strip().upper() |
|
|
|
|
|
|
|
|
if "STOCK" in response: |
|
|
return 0.9 |
|
|
elif "WORD" in response: |
|
|
return 0.1 |
|
|
else: |
|
|
|
|
|
return 0.5 |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Phi-2 inference error: {e}") |
|
|
return 0.5 |
|
|
|
|
|
def use_zero_shot_classification(text: str, potential_symbol: str, symbol_info: Dict) -> float: |
|
|
""" |
|
|
Fallback: Use zero-shot classification to determine if a word is a stock symbol |
|
|
""" |
|
|
try: |
|
|
|
|
|
symbol_pos = text.find(potential_symbol) |
|
|
if symbol_pos == -1: |
|
|
return 0.5 |
|
|
|
|
|
start = max(0, symbol_pos - 100) |
|
|
end = min(len(text), symbol_pos + len(potential_symbol) + 100) |
|
|
context_text = text[start:end] |
|
|
|
|
|
|
|
|
candidate_labels = [ |
|
|
f"نماد بورسی {symbol_info['company']}", |
|
|
f"کلمه عادی {potential_symbol}", |
|
|
"stock market symbol", |
|
|
"regular word" |
|
|
] |
|
|
|
|
|
|
|
|
result = classifier( |
|
|
context_text, |
|
|
candidate_labels=candidate_labels, |
|
|
multi_label=False |
|
|
) |
|
|
|
|
|
|
|
|
top_label = result['labels'][0] |
|
|
top_score = result['scores'][0] |
|
|
|
|
|
if 'نماد بورسی' in top_label or 'stock' in top_label.lower(): |
|
|
return top_score |
|
|
else: |
|
|
return 1 - top_score |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Classification error: {e}") |
|
|
return 0.5 |
|
|
|
|
|
def check_stock_symbol_context(text: str, potential_symbol: str, symbol_info: Dict) -> Tuple[bool, float]: |
|
|
""" |
|
|
Check if a potential symbol is actually used as a stock symbol in context |
|
|
""" |
|
|
|
|
|
symbol_pos = text.find(potential_symbol) |
|
|
if symbol_pos == -1: |
|
|
return False, 0.0 |
|
|
|
|
|
start_context = max(0, symbol_pos - 100) |
|
|
end_context = min(len(text), symbol_pos + len(potential_symbol) + 100) |
|
|
context_window = text[start_context:end_context] |
|
|
|
|
|
|
|
|
words_in_context = context_window.split() |
|
|
market_keyword_count = sum(1 for word in words_in_context if word in MARKET_KEYWORDS) |
|
|
|
|
|
|
|
|
heuristic_score = min(market_keyword_count * 0.2, 1.0) |
|
|
|
|
|
|
|
|
if market_keyword_count >= 5: |
|
|
return True, 0.95 |
|
|
elif market_keyword_count == 0 and len(words_in_context) > 10: |
|
|
return False, 0.05 |
|
|
|
|
|
|
|
|
if use_llm_model: |
|
|
ai_score = use_phi_for_disambiguation(context_window, potential_symbol, symbol_info) |
|
|
else: |
|
|
ai_score = use_zero_shot_classification(context_window, potential_symbol, symbol_info) |
|
|
|
|
|
|
|
|
final_score = (heuristic_score * 0.3 + ai_score * 0.7) |
|
|
|
|
|
|
|
|
is_stock = final_score > 0.5 |
|
|
|
|
|
return is_stock, final_score |
|
|
|
|
|
def find_stock_symbols_in_text(text: str) -> List[Dict]: |
|
|
"""Find and validate stock symbols in text""" |
|
|
found_symbols = [] |
|
|
processed_positions = set() |
|
|
|
|
|
|
|
|
pattern = r'\b[\u0600-\u06FF]+\b' |
|
|
|
|
|
for match in re.finditer(pattern, text): |
|
|
word = match.group() |
|
|
|
|
|
if word in SYMBOL_NAMES and match.start() not in processed_positions: |
|
|
symbol_info = STOCK_SYMBOLS[word] |
|
|
|
|
|
|
|
|
is_stock, confidence = check_stock_symbol_context(text, word, symbol_info) |
|
|
|
|
|
if is_stock: |
|
|
found_symbols.append({ |
|
|
'word': word, |
|
|
'start': match.start(), |
|
|
'end': match.end(), |
|
|
'entity_group': 'STOCK', |
|
|
'score': confidence, |
|
|
'company': symbol_info['company'], |
|
|
'bazaar': symbol_info['bazaar'], |
|
|
'bazaar_group': symbol_info['bazaar_group'] |
|
|
}) |
|
|
processed_positions.add(match.start()) |
|
|
|
|
|
return found_symbols |
|
|
|
|
|
|
|
|
label_colors = { |
|
|
"B-PER": "#FF6B6B", |
|
|
"I-PER": "#FFB3B3", |
|
|
"B-ORG": "#4ECDC4", |
|
|
"I-ORG": "#A7E9E4", |
|
|
"B-LOC": "#95E1D3", |
|
|
"I-LOC": "#C7F0E8", |
|
|
"B-DAT": "#FFA07A", |
|
|
"I-DAT": "#FFDAB9", |
|
|
"B-TIM": "#DDA0DD", |
|
|
"I-TIM": "#E6D0E6", |
|
|
"B-MON": "#FFD700", |
|
|
"I-MON": "#FFEB99", |
|
|
"B-PCT": "#87CEEB", |
|
|
"I-PCT": "#B3DFEF", |
|
|
"STOCK": "#00FA9A", |
|
|
} |
|
|
|
|
|
label_names = { |
|
|
"PER": "شخص (Person)", |
|
|
"ORG": "سازمان (Organization)", |
|
|
"LOC": "مکان (Location)", |
|
|
"DAT": "تاریخ (Date)", |
|
|
"TIM": "زمان (Time)", |
|
|
"MON": "پول (Money)", |
|
|
"PCT": "درصد (Percent)", |
|
|
"STOCK": "نماد بورسی (Stock Symbol)", |
|
|
} |
|
|
|
|
|
def merge_overlapping_entities(entities: List[Dict], stock_entities: List[Dict]) -> List[Dict]: |
|
|
"""Merge entities, removing overlaps""" |
|
|
all_entities = [] |
|
|
all_entities.extend(stock_entities) |
|
|
|
|
|
for ner_ent in entities: |
|
|
overlap = False |
|
|
for stock_ent in stock_entities: |
|
|
if not (ner_ent['end'] <= stock_ent['start'] or ner_ent['start'] >= stock_ent['end']): |
|
|
overlap = True |
|
|
break |
|
|
if not overlap: |
|
|
all_entities.append(ner_ent) |
|
|
|
|
|
return all_entities |
|
|
|
|
|
def highlight_entities(text, all_entities): |
|
|
"""Create HTML with highlighted entities""" |
|
|
if not all_entities: |
|
|
return text |
|
|
|
|
|
entities_sorted = sorted(all_entities, key=lambda x: x['start'], reverse=True) |
|
|
|
|
|
result = text |
|
|
for entity in entities_sorted: |
|
|
start = entity['start'] |
|
|
end = entity['end'] |
|
|
label = entity['entity_group'] |
|
|
word = text[start:end] |
|
|
score = entity['score'] |
|
|
|
|
|
color = label_colors.get(label if label == 'STOCK' else f"B-{label}", "#CCCCCC") |
|
|
|
|
|
tooltip_info = f"{label} (confidence: {score:.2f})" |
|
|
if label == 'STOCK': |
|
|
company = entity.get('company', '') |
|
|
bazaar = entity.get('bazaar', '') |
|
|
if company: |
|
|
tooltip_info = f"{company} - {bazaar} (confidence: {score:.2f})" |
|
|
|
|
|
highlighted = f'<span style="background-color: {color}; padding: 2px 6px; border-radius: 3px; margin: 0 2px; display: inline-block;" title="{tooltip_info}">{word} <sup style="font-size: 0.7em; font-weight: bold;">[{label}]</sup></span>' |
|
|
|
|
|
result = result[:start] + highlighted + result[end:] |
|
|
|
|
|
return result |
|
|
|
|
|
def perform_ner(text): |
|
|
"""Perform integrated NER and stock symbol detection""" |
|
|
if not text.strip(): |
|
|
return "<p style='color: red;'>لطفا متن فارسی وارد کنید (Please enter Persian text)</p>", "" |
|
|
|
|
|
try: |
|
|
|
|
|
entities = ner_pipeline(text) |
|
|
|
|
|
|
|
|
stock_entities = find_stock_symbols_in_text(text) |
|
|
|
|
|
|
|
|
all_entities = merge_overlapping_entities(entities, stock_entities) |
|
|
|
|
|
|
|
|
highlighted_html = f""" |
|
|
<div style='direction: rtl; text-align: right; font-size: 18px; line-height: 2.5; |
|
|
padding: 20px; border: 1px solid #ddd; border-radius: 5px; |
|
|
background-color: #f9f9f9; font-family: Tahoma, Arial;'> |
|
|
{highlight_entities(text, all_entities)} |
|
|
</div> |
|
|
""" |
|
|
|
|
|
|
|
|
if all_entities: |
|
|
entity_info = "### موجودیتهای شناسایی شده (Detected Entities):\n\n" |
|
|
entity_info += "| کلمه (Word) | نوع (Type) | جزئیات (Details) | اطمینان (Confidence) |\n" |
|
|
entity_info += "|:------------|:-----------|:------------------|:---------------------|\n" |
|
|
|
|
|
all_entities.sort(key=lambda x: x['start']) |
|
|
|
|
|
for ent in all_entities: |
|
|
label_fa = label_names.get(ent['entity_group'], ent['entity_group']) |
|
|
details = "" |
|
|
if ent['entity_group'] == 'STOCK': |
|
|
company = ent.get('company', '') |
|
|
bazaar = ent.get('bazaar', '') |
|
|
group = ent.get('bazaar_group', '') |
|
|
details = f"{company}<br>{bazaar}<br>{group}" |
|
|
entity_info += f"| **{ent['word']}** | {label_fa} | {details} | {ent['score']:.2%} |\n" |
|
|
else: |
|
|
entity_info = "هیچ موجودیتی شناسایی نشد (No entities detected)" |
|
|
|
|
|
|
|
|
stats = f"\n\n### آمار (Statistics):\n" |
|
|
stats += f"- تعداد کل موجودیتها: {len(all_entities)}\n" |
|
|
stats += f"- نمادهای بورسی: {len([e for e in all_entities if e['entity_group'] == 'STOCK'])}\n" |
|
|
stats += f"- اشخاص: {len([e for e in all_entities if e['entity_group'] == 'PER'])}\n" |
|
|
stats += f"- سازمانها: {len([e for e in all_entities if e['entity_group'] == 'ORG'])}\n" |
|
|
stats += f"- مکانها: {len([e for e in all_entities if e['entity_group'] == 'LOC'])}\n" |
|
|
|
|
|
return highlighted_html, entity_info + stats |
|
|
|
|
|
except Exception as e: |
|
|
return f"<p style='color: red;'>خطا (Error): {str(e)}</p>", str(e) |
|
|
|
|
|
|
|
|
examples = [ |
|
|
["علی احمدی دیروز در تهران با مدیر شرکت ملی نفت ایران دیدار کرد."], |
|
|
["سهام وبصادر و فولاد در بورس امروز با افزایش قیمت مواجه شدند."], |
|
|
["صنعت فولاد در اصفهان یکی از مهمترین صنایع کشور است."], |
|
|
["قیمت سهام شپنا در معاملات امروز ۵ درصد رشد داشت و به ۱۲۰۰۰ ریال رسید."], |
|
|
["بانک ملت اعلام کرد که سود سهام وبملت را در تاریخ ۱۵ خرداد ۱۴۰۳ پرداخت خواهد کرد."], |
|
|
["شرکت فولاد مبارکه با نماد فولاد در بورس تهران فعال است و محصولات فولادی تولید میکند."], |
|
|
["من دیروز ۱۰۰۰ سهم از وتوسکا خریدم و امیدوارم تا پایان هفته ۲۰ درصد سود کنم."], |
|
|
] |
|
|
|
|
|
|
|
|
with gr.Blocks( |
|
|
title="Persian NER + Stock Symbols | شناسایی موجودیتها و نمادهای بورسی", |
|
|
theme=gr.themes.Soft(), |
|
|
css=""" |
|
|
.rtl-text { direction: rtl; text-align: right; font-family: 'B Nazanin', Tahoma, Arial; } |
|
|
""" |
|
|
) as demo: |
|
|
gr.Markdown(f""" |
|
|
# 🏦 شناسایی هوشمند موجودیتها و نمادهای بورس ایران |
|
|
## Persian Named Entity Recognition with Stock Symbol Detection |
|
|
### Using {context_model_name.split('/')[-1]} for Context Understanding |
|
|
|
|
|
<div class="rtl-text"> |
|
|
این برنامه متنهای فارسی را تحلیل کرده و موجودیتهای مختلف را شناسایی میکند. |
|
|
</div> |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=6): |
|
|
input_text = gr.Textbox( |
|
|
label="متن فارسی را وارد کنید (Enter Persian Text)", |
|
|
placeholder="مثال: سهام فولاد در بورس تهران معامله میشود...", |
|
|
lines=6, |
|
|
rtl=True, |
|
|
elem_classes=["rtl-text"] |
|
|
) |
|
|
with gr.Row(): |
|
|
submit_btn = gr.Button("🔍 تحلیل متن", variant="primary", scale=2) |
|
|
clear_btn = gr.Button("🗑️ پاک کردن", scale=1) |
|
|
|
|
|
with gr.Column(scale=6): |
|
|
output_html = gr.HTML( |
|
|
label="نتیجه تحلیل (Analysis Result)", |
|
|
elem_classes=["rtl-text"] |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
output_entities = gr.Markdown( |
|
|
label="جدول موجودیتها (Entity Table)", |
|
|
elem_classes=["rtl-text"] |
|
|
) |
|
|
|
|
|
gr.Examples( |
|
|
examples=examples, |
|
|
inputs=input_text, |
|
|
label="نمونههای آماده (Ready Examples)", |
|
|
examples_per_page=4 |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Accordion("📖 راهنمای استفاده (User Guide)", open=True): |
|
|
gr.Markdown(""" |
|
|
<div class="rtl-text"> |
|
|
|
|
|
## چگونه از این برنامه استفاده کنیم؟ |
|
|
|
|
|
1. **متن فارسی خود را در کادر بالا وارد کنید** |
|
|
2. **دکمه «تحلیل متن» را بزنید** |
|
|
3. **نتایج را در دو بخش مشاهده کنید:** |
|
|
- متن با موجودیتهای رنگی شده |
|
|
- جدول کامل موجودیتها با جزئیات |
|
|
|
|
|
## انواع موجودیتهایی که شناسایی میشوند: |
|
|
|
|
|
| رنگ | نوع | مثال | |
|
|
|:---:|:----|:-----| |
|
|
| 🔴 قرمز | **اشخاص** | علی احمدی، مریم رضایی | |
|
|
| 🔵 آبی | **سازمانها** | شرکت ملی نفت، بانک ملت | |
|
|
| 🟢 سبز | **مکانها** | تهران، اصفهان، ایران | |
|
|
| 🟠 نارنجی | **تاریخها** | ۱۵ خرداد ۱۴۰۳ | |
|
|
| 🟣 بنفش | **زمانها** | ساعت ۱۰ صبح | |
|
|
| 🟡 زرد | **مبالغ پولی** | ۱۰۰۰ ریال، ۵ میلیارد تومان | |
|
|
| 🔷 آبی آسمانی | **درصدها** | ۲۰ درصد، ۵٪ | |
|
|
| 💚 سبز روشن | **نمادهای بورسی** | فولاد، وبملت، شپنا | |
|
|
|
|
|
## ویژگی خاص: تشخیص هوشمند نمادهای بورسی |
|
|
|
|
|
برنامه با استفاده از **هوش مصنوعی** تشخیص میدهد که آیا یک کلمه نماد بورسی است یا خیر. |
|
|
|
|
|
**مثال:** |
|
|
- «سهام **فولاد** در بورس معامله شد» ← فولاد = نماد بورسی ✅ |
|
|
- «صنعت **فولاد** در کشور مهم است» ← فولاد = کلمه عادی ❌ |
|
|
|
|
|
## نحوه تفسیر نتایج: |
|
|
|
|
|
- **رنگها**: نوع موجودیت را نشان میدهند |
|
|
- **برچسبها**: نوع موجودیت به صورت مختصر |
|
|
- **درصد اطمینان**: میزان اطمینان سیستم (۰-۱۰۰٪) |
|
|
- **جزئیات نمادها**: نام شرکت، بازار و گروه صنعت |
|
|
|
|
|
## مدلهای استفاده شده: |
|
|
|
|
|
- **ParsBERT NER**: شناسایی موجودیتهای عمومی |
|
|
- **Microsoft Phi-2 / mDeBERTa**: تحلیل هوشمند متن برای تشخیص نمادهای بورسی |
|
|
|
|
|
</div> |
|
|
""") |
|
|
|
|
|
|
|
|
submit_btn.click( |
|
|
fn=perform_ner, |
|
|
inputs=input_text, |
|
|
outputs=[output_html, output_entities] |
|
|
) |
|
|
|
|
|
clear_btn.click( |
|
|
lambda: ("", "", ""), |
|
|
outputs=[input_text, output_html, output_entities] |
|
|
) |
|
|
|
|
|
input_text.submit( |
|
|
fn=perform_ner, |
|
|
inputs=input_text, |
|
|
outputs=[output_html, output_entities] |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("Starting Persian NER + Stock Symbol Detection System...") |
|
|
print(f"Using device: {device}") |
|
|
print(f"Loaded {len(STOCK_SYMBOLS)} stock symbols") |
|
|
print("Models loaded:") |
|
|
print(" - NER: HooshvareLab/bert-base-parsbert-ner-uncased") |
|
|
print(f" - Context: {context_model_name}") |
|
|
demo.launch( |
|
|
share=False, |
|
|
debug=True |
|
|
) |