|
|
""" |
|
|
Fixed NER Anonymization Evaluator |
|
|
ارزیاب درست و دقیق - بدون مشکلات tokenization |
|
|
|
|
|
این نسخه مستقیماً entities را مقایسه میکند بدون IOB2 |
|
|
""" |
|
|
|
|
|
import pandas as pd |
|
|
import re |
|
|
from typing import Dict, List, Set, Tuple |
|
|
import gradio as gr |
|
|
from datetime import datetime |
|
|
import tempfile |
|
|
import os |
|
|
|
|
|
|
|
|
class FixedNEREvaluator: |
|
|
"""ارزیاب درست - مقایسه مستقیم entities""" |
|
|
|
|
|
def __init__(self): |
|
|
self.results_df = None |
|
|
|
|
|
|
|
|
|
|
|
self.entity_patterns = [ |
|
|
|
|
|
r'\b(COMPANY|company|PERSON|person|AMOUNT|amount|PERCENT|percent|GROUP|group|STOCK|stock)-(\d+)\b', |
|
|
|
|
|
r'\b(COMPANY|PERSON|AMOUNT|PERCENT|GROUP|STOCK)_(\d+)\b', |
|
|
|
|
|
r'\b(COMPANY|PERSON|AMOUNT|PERCENT|GROUP|STOCK)_(\d+)_[A-Z]+\b', |
|
|
|
|
|
r'\bSTOCK_SYMBOL_(\d+)(?:_[A-Z]+)?\b', |
|
|
] |
|
|
|
|
|
def extract_entities(self, text: str) -> Set[Tuple[str, str]]: |
|
|
""" |
|
|
استخراج entities از متن |
|
|
|
|
|
Returns: |
|
|
Set of (entity_type, entity_id) tuples |
|
|
مثال: {('COMPANY', '01'), ('PERSON', '02')} |
|
|
""" |
|
|
if pd.isna(text) or not isinstance(text, str): |
|
|
return set() |
|
|
|
|
|
entities = set() |
|
|
|
|
|
for pattern in self.entity_patterns: |
|
|
matches = re.finditer(pattern, text, re.IGNORECASE) |
|
|
for match in matches: |
|
|
groups = match.groups() |
|
|
if len(groups) >= 2: |
|
|
entity_type = groups[0].upper() |
|
|
entity_id = groups[1] |
|
|
|
|
|
entities.add((entity_type, entity_id)) |
|
|
|
|
|
return entities |
|
|
|
|
|
def calculate_metrics(self, reference_entities: Set, predicted_entities: Set) -> Dict: |
|
|
""" |
|
|
محاسبه metrics بر اساس مجموعه entities |
|
|
|
|
|
Args: |
|
|
reference_entities: مجموعه entities مرجع |
|
|
predicted_entities: مجموعه entities پیشبینی شده |
|
|
|
|
|
Returns: |
|
|
دیکشنری شامل TP, FP, FN, Precision, Recall, F1 |
|
|
""" |
|
|
|
|
|
tp = len(reference_entities & predicted_entities) |
|
|
fp = len(predicted_entities - reference_entities) |
|
|
fn = len(reference_entities - predicted_entities) |
|
|
|
|
|
|
|
|
precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0 |
|
|
recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0 |
|
|
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0 |
|
|
|
|
|
|
|
|
if len(reference_entities) == 0 and len(predicted_entities) == 0: |
|
|
precision = recall = f1 = 1.0 |
|
|
|
|
|
return { |
|
|
'tp': tp, |
|
|
'fp': fp, |
|
|
'fn': fn, |
|
|
'precision': round(precision, 4), |
|
|
'recall': round(recall, 4), |
|
|
'f1': round(f1, 4) |
|
|
} |
|
|
|
|
|
def evaluate_single_row(self, reference_text: str, predicted_text: str) -> Dict: |
|
|
""" |
|
|
ارزیابی یک سطر |
|
|
|
|
|
Returns: |
|
|
دیکشنری شامل metrics + entities برای debug |
|
|
""" |
|
|
ref_entities = self.extract_entities(reference_text) |
|
|
pred_entities = self.extract_entities(predicted_text) |
|
|
|
|
|
metrics = self.calculate_metrics(ref_entities, pred_entities) |
|
|
|
|
|
|
|
|
metrics['ref_entities'] = sorted(list(ref_entities)) |
|
|
metrics['pred_entities'] = sorted(list(pred_entities)) |
|
|
metrics['matched'] = sorted(list(ref_entities & pred_entities)) |
|
|
metrics['missed'] = sorted(list(ref_entities - pred_entities)) |
|
|
metrics['extra'] = sorted(list(pred_entities - ref_entities)) |
|
|
|
|
|
return metrics |
|
|
|
|
|
def evaluate_dataset(self, file_path: str) -> Tuple[bool, str, pd.DataFrame]: |
|
|
"""ارزیابی کل دیتاست""" |
|
|
try: |
|
|
print(f"📂 در حال خواندن فایل: {file_path}") |
|
|
df = pd.read_csv(file_path, encoding='utf-8-sig') |
|
|
print(f"✅ فایل خوانده شد: {len(df)} سطر") |
|
|
print(f"📋 ستونها: {list(df.columns)}") |
|
|
|
|
|
|
|
|
if 'Reference_text' in df.columns and 'anonymized_text' in df.columns: |
|
|
reference_col = 'Reference_text' |
|
|
predicted_col = 'anonymized_text' |
|
|
elif 'original_text' in df.columns and 'anonymized_text' in df.columns: |
|
|
reference_col = 'original_text' |
|
|
predicted_col = 'anonymized_text' |
|
|
else: |
|
|
return ( |
|
|
False, |
|
|
f"❌ ستونهای مورد نیاز یافت نشد!\n\nستونهای موجود: {list(df.columns)}", |
|
|
pd.DataFrame() |
|
|
) |
|
|
|
|
|
print(f"🔍 شروع ارزیابی...") |
|
|
|
|
|
|
|
|
results = [] |
|
|
for index, row in df.iterrows(): |
|
|
if (index + 1) % 10 == 0: |
|
|
print(f" پردازش سطر {index + 1}/{len(df)}...") |
|
|
|
|
|
metrics = self.evaluate_single_row( |
|
|
str(row[reference_col]), |
|
|
str(row[predicted_col]) |
|
|
) |
|
|
results.append(metrics) |
|
|
|
|
|
print(f"✅ ارزیابی کامل شد!") |
|
|
|
|
|
|
|
|
results_df = pd.DataFrame(results) |
|
|
|
|
|
|
|
|
for col in df.columns: |
|
|
results_df[col] = df[col].values |
|
|
|
|
|
|
|
|
metric_cols = ['precision', 'recall', 'f1', 'tp', 'fp', 'fn'] |
|
|
debug_cols = ['ref_entities', 'pred_entities', 'matched', 'missed', 'extra'] |
|
|
main_cols = [col for col in df.columns if col in results_df.columns] |
|
|
|
|
|
results_df = results_df[metric_cols + debug_cols + main_cols] |
|
|
|
|
|
self.results_df = results_df |
|
|
|
|
|
|
|
|
avg_precision = results_df['precision'].mean() |
|
|
avg_recall = results_df['recall'].mean() |
|
|
avg_f1 = results_df['f1'].mean() |
|
|
|
|
|
total_tp = results_df['tp'].sum() |
|
|
total_fp = results_df['fp'].sum() |
|
|
total_fn = results_df['fn'].sum() |
|
|
|
|
|
|
|
|
macro_f1 = avg_f1 |
|
|
|
|
|
|
|
|
micro_precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0 |
|
|
micro_recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0 |
|
|
micro_f1 = 2 * micro_precision * micro_recall / (micro_precision + micro_recall) if (micro_precision + micro_recall) > 0 else 0 |
|
|
|
|
|
high_f1 = len(results_df[results_df['f1'] >= 0.9]) |
|
|
mid_f1 = len(results_df[results_df['f1'] >= 0.7]) |
|
|
low_f1 = len(results_df[results_df['f1'] < 0.5]) |
|
|
|
|
|
status = f"""✅ ارزیابی با موفقیت انجام شد! |
|
|
|
|
|
📊 **نتایج کلی (Direct Entity Matching):** |
|
|
• Macro-Average F1: {macro_f1:.4f} |
|
|
• Micro-Average F1: {micro_f1:.4f} |
|
|
• میانگین Precision: {avg_precision:.4f} |
|
|
• میانگین Recall: {avg_recall:.4f} |
|
|
|
|
|
📈 **آمار کلی:** |
|
|
• کل True Positives: {total_tp} |
|
|
• کل False Positives: {total_fp} |
|
|
• کل False Negatives: {total_fn} |
|
|
• تعداد سطرها: {len(df)} |
|
|
|
|
|
📊 **توزیع عملکرد:** |
|
|
• F1 ≥ 0.9 (عالی): {high_f1} سطر ({high_f1/len(df)*100:.1f}%) |
|
|
• F1 ≥ 0.7 (خوب): {mid_f1} سطر ({mid_f1/len(df)*100:.1f}%) |
|
|
• F1 < 0.5 (ضعیف): {low_f1} سطر ({low_f1/len(df)*100:.1f}%) |
|
|
|
|
|
🔬 **مقایسه:** |
|
|
• مرجع (انسانی): {reference_col} |
|
|
• پیشبینی (LLM): {predicted_col} |
|
|
|
|
|
💡 **تفاوت با seqeval:** |
|
|
این نسخه مستقیماً entities را مقایسه میکند بدون مشکلات tokenization |
|
|
""" |
|
|
|
|
|
return True, status, results_df |
|
|
|
|
|
except Exception as e: |
|
|
import traceback |
|
|
error_details = traceback.format_exc() |
|
|
return False, f"❌ خطا در پردازش:\n\n{str(e)}\n\n{error_details[:500]}", pd.DataFrame() |
|
|
|
|
|
def create_downloadable_csv(self) -> str: |
|
|
"""ایجاد فایل CSV برای دانلود""" |
|
|
if self.results_df is None or self.results_df.empty: |
|
|
return None |
|
|
|
|
|
try: |
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
|
temp_filename = f"fixed_evaluation_results_{timestamp}.csv" |
|
|
temp_path = os.path.join(tempfile.gettempdir(), temp_filename) |
|
|
|
|
|
|
|
|
df_to_save = self.results_df.copy() |
|
|
for col in ['ref_entities', 'pred_entities', 'matched', 'missed', 'extra']: |
|
|
if col in df_to_save.columns: |
|
|
df_to_save[col] = df_to_save[col].apply(str) |
|
|
|
|
|
df_to_save.to_csv(temp_path, index=False, encoding='utf-8-sig') |
|
|
|
|
|
return temp_path |
|
|
except Exception as e: |
|
|
print(f"❌ خطا در ایجاد CSV: {str(e)}") |
|
|
return None |
|
|
|
|
|
|
|
|
def create_interface(): |
|
|
"""ایجاد رابط کاربری Gradio""" |
|
|
|
|
|
evaluator = FixedNEREvaluator() |
|
|
|
|
|
with gr.Blocks(title="Fixed NER Evaluator", theme=gr.themes.Soft()) as demo: |
|
|
|
|
|
gr.Markdown(""" |
|
|
# 🎯 ارزیاب درست و دقیق NER |
|
|
## Fixed NER Anonymization Evaluator |
|
|
|
|
|
### ✅ این نسخه بدون مشکلات tokenization کار میکند |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("### 📂 بارگذاری فایل") |
|
|
|
|
|
file_input = gr.File( |
|
|
label="فایل CSV (با ستونهای Reference_text و anonymized_text)", |
|
|
file_types=[".csv"] |
|
|
) |
|
|
|
|
|
evaluate_btn = gr.Button("🚀 شروع ارزیابی", variant="primary", size="lg") |
|
|
download_btn = gr.Button("💾 دانلود نتایج CSV", visible=False, variant="secondary") |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
status_output = gr.Markdown("آماده دریافت فایل...") |
|
|
|
|
|
results_table = gr.Dataframe( |
|
|
label="نتایج تفصیلی (10 سطر اول)", |
|
|
visible=False, |
|
|
wrap=True |
|
|
) |
|
|
|
|
|
download_file = gr.File(visible=False) |
|
|
|
|
|
with gr.Accordion("📖 راهنمای استفاده", open=False): |
|
|
gr.Markdown(""" |
|
|
## نحوه استفاده: |
|
|
|
|
|
1. فایل CSV خود را آپلود کنید |
|
|
2. فایل باید شامل این ستونها باشد: |
|
|
- `Reference_text` (مرجع انسانی) |
|
|
- `anonymized_text` (پیشبینی LLM) |
|
|
3. روی دکمه "شروع ارزیابی" کلیک کنید |
|
|
4. نتایج را مشاهده و دانلود کنید |
|
|
|
|
|
## تفاوت با نسخه قبلی: |
|
|
|
|
|
- ✅ مستقیماً entities را مقایسه میکند |
|
|
- ✅ بدون مشکلات tokenization |
|
|
- ✅ برای فارسی کاملاً دقیق |
|
|
- ✅ شامل اطلاعات debug (matched, missed, extra entities) |
|
|
""") |
|
|
|
|
|
def evaluate_file(file): |
|
|
if file is None: |
|
|
return ( |
|
|
"❌ لطفاً فایل CSV را بارگذاری کنید", |
|
|
gr.Dataframe(visible=False), |
|
|
gr.Button(visible=False), |
|
|
gr.File(visible=False) |
|
|
) |
|
|
|
|
|
success, message, df = evaluator.evaluate_dataset(file) |
|
|
|
|
|
if not success: |
|
|
return ( |
|
|
f"❌ {message}", |
|
|
gr.Dataframe(visible=False), |
|
|
gr.Button(visible=False), |
|
|
gr.File(visible=False) |
|
|
) |
|
|
|
|
|
return ( |
|
|
message, |
|
|
gr.Dataframe(value=df.head(10), visible=True), |
|
|
gr.Button(visible=True), |
|
|
gr.File(visible=False) |
|
|
) |
|
|
|
|
|
def download_results(): |
|
|
csv_path = evaluator.create_downloadable_csv() |
|
|
if csv_path and os.path.exists(csv_path): |
|
|
return "✅ فایل نتایج آماده دانلود است", gr.File(value=csv_path, visible=True) |
|
|
return "❌ خطا در ایجاد فایل", gr.File(visible=False) |
|
|
|
|
|
evaluate_btn.click( |
|
|
fn=evaluate_file, |
|
|
inputs=[file_input], |
|
|
outputs=[status_output, results_table, download_btn, download_file] |
|
|
) |
|
|
|
|
|
download_btn.click( |
|
|
fn=download_results, |
|
|
outputs=[status_output, download_file] |
|
|
) |
|
|
|
|
|
return demo |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo = create_interface() |
|
|
demo.launch(server_name="0.0.0.0", server_port=7860, share=False) |
|
|
|