Spaces:

leilaghomashchi
/

Benchmark-data-anonymization

Sleeping

App Files Files Community

leilaghomashchi commited on Sep 22, 2025

Commit

31cfccf

verified ·

1 Parent(s): 83f0287

Delete app1.py

Browse files

Files changed (1) hide show

app1.py +0 -1801

app1.py DELETED Viewed

@@ -1,1801 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-🚀 Enhanced Bilingual Data Anonymization Benchmark System
-====================================================================
-نسخه ساده‌شده فقط با قابلیت بنچمارک پیشرفته
-"""
-import gradio as gr
-import pandas as pd
-import numpy as np
-import json
-import time
-import os
-import re
-import logging
-import requests
-from datetime import datetime
-from functools import lru_cache
-from packaging import version
-from typing import Dict, List, Tuple, Any
-import matplotlib.pyplot as plt
-import plotly.express as px
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
-import warnings
-import gc
-import threading
-from collections import defaultdict
-# Enhanced metrics imports
-try:
-    import psutil
-    PSUTIL_AVAILABLE = True
-except ImportError:
-    PSUTIL_AVAILABLE = False
-try:
-    from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
-    SKLEARN_AVAILABLE = True
-except ImportError:
-    SKLEARN_AVAILABLE = False
-warnings.filterwarnings('ignore')
-# تنظیم logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# تنظیم فونت فارسی برای matplotlib
-plt.rcParams['font.family'] = ['Arial Unicode MS', 'Tahoma', 'sans-serif']
-# =============================================================================
-# بخش 1: سیستم اصلی نام‌نشان‌سازی (برای بنچمارک)
-# =============================================================================
-def auto_setup_models():
-    """راه‌اندازی خودکار مدل‌ها در صورت عدم وجود"""
-    models_dir = "./models"
-    required_models = {
-        'bert-fa-ner': 'HooshvareLab/bert-fa-zwnj-base-ner',
-        'bert-base-NER': 'dslim/bert-base-NER',
-    }
-    missing_models = []
-    for model_name in required_models.keys():
-        model_path = os.path.join(models_dir, model_name)
-        if not os.path.exists(model_path) or not os.listdir(model_path):
-            missing_models.append(model_name)
-    if not missing_models:
-        logger.info("✅ All models are already available")
-        return True
-    logger.info(f"📥 Auto-downloading missing models: {missing_models}")
-    try:
-        from transformers import AutoTokenizer, AutoModelForTokenClassification
-        os.makedirs(models_dir, exist_ok=True)
-        for model_name in missing_models:
-            hf_repo = required_models[model_name]
-            model_path = os.path.join(models_dir, model_name)
-            logger.info(f"📥 Downloading {model_name} from {hf_repo}...")
-            try:
-                tokenizer = AutoTokenizer.from_pretrained(hf_repo)
-                model = AutoModelForTokenClassification.from_pretrained(hf_repo)
-                tokenizer.save_pretrained(model_path)
-                model.save_pretrained(model_path)
-                logger.info(f"✅ {model_name} downloaded successfully")
-                del tokenizer, model
-            except Exception as e:
-                logger.error(f"❌ Failed to download {model_name}: {e}")
-                if os.path.exists(model_path):
-                    import shutil
-                    shutil.rmtree(model_path)
-        logger.info("🎉 Auto-setup completed!")
-        return True
-    except ImportError:
-        logger.error("❌ transformers library not available for auto-download")
-        return False
-    except Exception as e:
-        logger.error(f"❌ Auto-setup failed: {e}")
-        return False
-# اجرای auto-setup در startup
-try:
-    auto_setup_models()
-except Exception as e:
-    logger.warning(f"⚠️ Auto-setup encountered an issue: {e}")
-    logger.info("ℹ️ Continuing with manual setup...")
-class BilingualDataAnonymizer:
-    """سیستم اصلی نام‌نشان‌سازی دوزبانه - برای بنچمارک"""
-    def __init__(self):
-        self.mapping_table = {}
-        self.counters = {
-            'COMPANY': 0, 'PERSON': 0, 'AMOUNT': 0, 'ACCOUNT': 0,
-            'DATE': 0, 'STOCK_SYMBOL': 0, 'PETROCHEMICAL': 0,
-            'PRODUCT': 0, 'PERCENTAGE': 0, 'LOCATION': 0,
-            'VOLUME': 0, 'PHONE': 0, 'EMAIL': 0, 'ID_NUMBER': 0,
-            'FINANCIAL_TERMS': 0, 'BUSINESS_TERMS': 0, 'RATIOS': 0
-        }
-        self.api_key = os.getenv("OPENAI_API_KEY", "")
-        self.models_base_path = "./models"
-        self.models_loaded = False
-        self.model_status = {}
-        self.load_local_ner_models()
-    def ensure_models_directory(self):
-        if not os.path.exists(self.models_base_path):
-            try:
-                os.makedirs(self.models_base_path, exist_ok=True)
-                logger.info(f"📁 Created models directory: {self.models_base_path}")
-            except Exception as e:
-                logger.error(f"❌ Failed to create models directory: {e}")
-                return False
-        return True
-    def download_model_if_missing(self, local_name, hf_repo):
-        model_path = os.path.join(self.models_base_path, local_name)
-        if os.path.exists(model_path) and os.listdir(model_path):
-            return True, f"Model {local_name} already exists"
-        try:
-            logger.info(f"📥 Auto-downloading {local_name} from {hf_repo}...")
-            from transformers import AutoTokenizer, AutoModelForTokenClassification
-            tokenizer = AutoTokenizer.from_pretrained(hf_repo)
-            model = AutoModelForTokenClassification.from_pretrained(hf_repo)
-            tokenizer.save_pretrained(model_path)
-            model.save_pretrained(model_path)
-            logger.info(f"✅ {local_name} auto-downloaded successfully")
-            return True, f"Downloaded {local_name}"
-        except Exception as e:
-            logger.error(f"❌ Auto-download failed for {local_name}: {e}")
-            return False, str(e)
-    def _load_pipeline(self, task, model_path, tokenizer_path=None):
-        """لود مدل با مدیریت صحیح پارامترهای ورژن مختلف transformers"""
-        try:
-            from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification, __version__ as tr_version
-            supports_agg = version.parse(tr_version) >= version.parse("4.11.0")
-            if tokenizer_path:
-                tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, local_files_only=True)
-            else:
-                tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
-            model = AutoModelForTokenClassification.from_pretrained(model_path, local_files_only=True)
-            pipeline_kwargs = {
-                "model": model,
-                "tokenizer": tokenizer,
-                "device": -1
-            }
-            if supports_agg:
-                pipeline_kwargs["aggregation_strategy"] = "simple"
-            return pipeline(task, **pipeline_kwargs)
-        except Exception as e:
-            logger.error(f"❌ Failed to load pipeline for {model_path}: {e}")
-            return None
-    def load_local_ner_models(self):
-        logger.info("📄 Loading local NER models with auto-download...")
-        if not self.ensure_models_directory():
-            self.models_loaded = False
-            self.model_status['directory'] = "❌ Cannot create models directory"
-            return
-        try:
-            try:
-                import torch
-                from transformers import AutoTokenizer, AutoModelForTokenClassification
-                transformers_available = True
-                logger.info("✅ Transformers library available")
-            except ImportError as e:
-                transformers_available = False
-                self.model_status['transformers'] = f"❌ Transformers library not installed: {str(e)}"
-                self.models_loaded = False
-                return
-            # Persian model
-            persian_model_path = os.path.join(self.models_base_path, "bert-fa-ner")
-            self.download_model_if_missing("bert-fa-ner", "HooshvareLab/bert-fa-zwnj-base-ner")
-            if os.path.exists(persian_model_path) and os.listdir(persian_model_path):
-                try:
-                    self.persian_ner = self._load_pipeline("ner", persian_model_path)
-                    if self.persian_ner:
-                        self.model_status['persian'] = f"✅ Local Persian NER: {persian_model_path}"
-                    else:
-                        self.model_status['persian'] = f"❌ Failed to load Persian model: {persian_model_path}"
-                except Exception as e:
-                    self.persian_ner = None
-                    self.model_status['persian'] = f"❌ Persian model loading error: {str(e)[:100]}"
-            else:
-                self.persian_ner = None
-                self.model_status['persian'] = f"❌ Persian model not found: {persian_model_path}"
-            # English model
-            english_model_path = os.path.join(self.models_base_path, "bert-base-NER")
-            self.download_model_if_missing("bert-base-NER", "dslim/bert-base-NER")
-            if os.path.exists(english_model_path) and os.listdir(english_model_path):
-                try:
-                    self.english_ner = self._load_pipeline("ner", english_model_path)
-                    if self.english_ner:
-                        self.model_status['english'] = f"✅ Local English NER: {english_model_path}"
-                    else:
-                        self.model_status['english'] = f"❌ Failed to load English model: {english_model_path}"
-                except Exception as e:
-                    self.english_ner = None
-                    self.model_status['english'] = f"❌ English model loading error: {str(e)[:100]}"
-            else:
-                self.english_ner = None
-                self.model_status['english'] = f"❌ English model not found: {english_model_path}"
-            loaded_models = sum(1 for status in self.model_status.values() if status.startswith("✅"))
-            self.models_loaded = loaded_models > 0
-            if loaded_models == 0:
-                self.model_status['fallback'] = "⚠️ Using regex-only mode (no local models found)"
-        except Exception as e:
-            self.models_loaded = False
-            self.model_status['critical'] = f"❌ Critical error: {str(e)[:100]}..."
-    def detect_language(self, text):
-        """تشخیص زبان متن"""
-        if not text:
-            return 'fa'
-        persian_chars = len(re.findall(r'[\u0600-\u06FF]', text))
-        english_chars = len(re.findall(r'[a-zA-Z]', text))
-        total = persian_chars + english_chars
-        if total == 0:
-            return 'fa'
-        if persian_chars / total > 0.6:
-            return 'fa'
-        elif english_chars / total > 0.6:
-            return 'en'
-        else:
-            return 'mixed'
-    def extract_entities_with_ner(self, text, lang='fa'):
-        """استخراج entities با مدل‌های NER محلی"""
-        entities = []
-        if not self.models_loaded:
-            logger.info("ℹ️ Local NER models not available - using regex only")
-            return entities
-        try:
-            # مدل فارسی محلی
-            if lang in ['fa', 'mixed'] and hasattr(self, 'persian_ner') and self.persian_ner:
-                try:
-                    persian_results = self.persian_ner(text)
-                    for entity in persian_results:
-                        if isinstance(entity, dict):
-                            if 'entity_group' in entity:
-                                entities.append({
-                                    'text': entity['word'].strip(),
-                                    'label': entity['entity_group'],
-                                    'start': entity['start'],
-                                    'end': entity['end'],
-                                    'confidence': entity['score'],
-                                    'source': 'local_persian_ner'
-                                })
-                            else:
-                                entities.append({
-                                    'text': entity['word'].strip(),
-                                    'label': entity['entity'],
-                                    'start': entity['start'],
-                                    'end': entity['end'],
-                                    'confidence': entity['score'],
-                                    'source': 'local_persian_ner'
-                                })
-                    logger.info(f"Local Persian NER found {len(persian_results)} entities")
-                except Exception as e:
-                    logger.error(f"Local Persian NER extraction error: {e}")
-            # مدل انگلیسی محلی
-            if lang in ['en', 'mixed'] and hasattr(self, 'english_ner') and self.english_ner:
-                try:
-                    english_results = self.english_ner(text)
-                    for entity in english_results:
-                        if isinstance(entity, dict):
-                            if 'entity_group' in entity:
-                                entities.append({
-                                    'text': entity['word'].strip(),
-                                    'label': entity['entity_group'],
-                                    'start': entity['start'],
-                                    'end': entity['end'],
-                                    'confidence': entity['score'],
-                                    'source': 'local_english_ner'
-                                })
-                            else:
-                                entities.append({
-                                    'text': entity['word'].strip(),
-                                    'label': entity['entity'],
-                                    'start': entity['start'],
-                                    'end': entity['end'],
-                                    'confidence': entity['score'],
-                                    'source': 'local_english_ner'
-                                })
-                    logger.info(f"Local English NER found {len(english_results)} entities")
-                except Exception as e:
-                    logger.error(f"Local English NER extraction error: {e}")
-        except Exception as e:
-            logger.error(f"Local NER extraction general error: {e}")
-        # حذف تکراری‌ها
-        unique_entities = []
-        seen = set()
-        for entity in entities:
-            key = (entity['text'].lower(), entity['start'], entity['end'])
-            if key not in seen:
-                seen.add(key)
-                unique_entities.append(entity)
-        logger.info(f"Total unique entities found by local models: {len(unique_entities)}")
-        return unique_entities
-    def map_ner_to_categories(self, ner_label, source=''):
-        """نگاشت برچسب‌های NER به دسته‌های سیستم"""
-        mapping = {
-            'PER': 'PERSON', 'PERSON': 'PERSON',
-            'ORG': 'COMPANY', 'ORGANIZATION': 'COMPANY',
-            'LOC': 'LOCATION', 'LOCATION': 'LOCATION',
-            'MISC': 'BUSINESS_TERMS', 'MISCELLANEOUS': 'BUSINESS_TERMS',
-            'B-PER': 'PERSON', 'I-PER': 'PERSON',
-            'B-ORG': 'COMPANY', 'I-ORG': 'COMPANY',
-            'B-LOC': 'LOCATION', 'I-LOC': 'LOCATION',
-            'B-MISC': 'BUSINESS_TERMS', 'I-MISC': 'BUSINESS_TERMS',
-            'MONEY': 'AMOUNT', 'PERCENT': 'PERCENTAGE',
-            'DATE': 'DATE', 'TIME': 'DATE'
-        }
-        return mapping.get(ner_label.upper(), 'BUSINESS_TERMS')
-    def anonymize_text(self, original_text, lang='fa'):
-        """گام 1: نام‌نشان‌سازی متن - برای بنچمارک"""
-        try:
-            if not original_text or not original_text.strip():
-                return "❌ Please enter input text!" if lang == 'en' else "❌ لطفاً متن ورودی را وارد کنید!"
-            # ریست متغیرها
-            self.mapping_table = {}
-            self.counters = {key: 0 for key in self.counters.keys()}
-            anonymized = original_text
-            found_entities = set()
-            # تشخیص زبان
-            detected_lang = self.detect_language(original_text)
-            logger.info(f"Detected language: {detected_lang}")
-            # مرحله 1: استخراج با Local NER
-            if self.models_loaded:
-                logger.info("🤖 Running local NER extraction...")
-                ner_entities = self.extract_entities_with_ner(original_text, detected_lang)
-                for entity in ner_entities:
-                    if (entity['text'] not in found_entities and
-                        len(entity['text'].strip()) > 1 and
-                        entity['confidence'] > 0.5):
-                        category = self.map_ner_to_categories(entity['label'], entity['source'])
-                        if entity['text'] not in self.mapping_table:
-                            self.counters[category] += 1
-                            code = f"{category}_{self.counters[category]:03d}_LOCAL_NER"
-                            self.mapping_table[entity['text']] = code
-                            found_entities.add(entity['text'])
-                            logger.info(f"Local NER: {entity['text']} -> {code}")
-            else:
-                logger.info("ℹ️ Using regex-only mode")
-            # مرحله 2: الگوهای Regex
-            patterns = {
-                'STOCK_SYMBOL': [
-                    r'نماد\s+([آ-ی\a-zA-Z0-9]+)',
-                    r'(سبهان|غدیر|شتران|شپنا|پترول|فارس|خارک|پلاسکو|جم|کرمان|مارون|اراک|رازی|شازند|کاوه|بندر|پارس|خوزستان|ماهشهر|عسلویه)(?=\s|$|،|\.|\s+—)',
-                    r'شرکت\s+([آ-ی\a-zA-Z\s]+?)(?=\s+در|\s+که|\s+با|،|\.|\s+$|\s+را|\s+به)',
-                    r'پتروشیمی\s+([آ-ی\a-zA-Z\s]+?)(?=\s+در|\s+که|\s+با|،|\.|\s+$|\s+توان)',
-                    r'(AAPL|GOOGL|MSFT|AMZN|TSLA|META|NVDA|SABIC)(?=\s|$|,|\.)'
-                ],
-                'COMPANY': [
-                    r'شرکت(?=\s+در|\s+که|\s+با|\s+را|\s+به|\s+طی)',
-                    r'([آ-ی\a-zA-Z\s]+)\s+شرکت',
-                    r'این\s+شرکت(?=\s|$|،|\.)',
-                    r'(بانک\s+[آ-ی\a-zA-Z\s]+)',
-                    r'([A-Z][a-zA-Z\s]+(?:Inc|Corp|Corporation|Company|Ltd|Limited|LLC))'
-                ],
-                'PERSON': [
-                    r'آقای\s+([آ-ی\a-zA-Z]+(?:\s+[آ-ی\a-zA-Z]+)*)',
-                    r'خانم\s+([آ-ی\a-zA-Z]+(?:\s+[آ-ی\a-zA-Z]+)*)',
-                    r'مهندس\s+([آ-ی\a-zA-Z]+(?:\s+[آ-ی\a-zA-Z]+)*)',
-                    r'دکتر\s+([آ-ی\a-zA-Z]+(?:\s+[آ-ی\a-zA-Z]+)*)',
-                    r'([آ-ی\a-zA-Z]+\s+[آ-ی\a-zA-Z]+)(?=،\s+مدیرعامل|\s+مدیرعامل|\s+رئیس)',
-                    r'مدیرعامل(?=\s|$|،|\.)',
-                    r'سرپرست(?=\s+و|\s|$|،|\.)',
-                    r'رئیس\s+هیأت‌مدیره',
-                    r'وی(?=\s+ادامه|\s+اظهار|\s+گفت|\s+اعلام|\s+همچنین)'
-                ],
-                'AMOUNT': [
-                    r'\d+(?:,\d{3})*\s*(?:میلیون|میلیارد|هزار)\s*تومان',
-                    r'مبلغ\s+\d+(?:,\d{3})*\s*(?:میلیون|میلیارد|هزار)?\s*تومان',
-                    r'\d+\s*تومان(?=\s+به\s+ازای|\s+فروش|\s+،)',
-                    r'رقم\s+فعلی\s+\d+(?:,\d{3})*\s*(?:میلیون|میلیارد)\s*تومان',
-                    r'رقم\s+\d+(?:,\d{3})*\s*(?:میلیون|میلیارد)\s*تومان',
-                    r'به\s+\d+(?:,\d{3})*\s*(?:میلیون|میلیارد|هزار)\s*تومان',
-                    r'از\s+\d+(?:,\d{3})*\s*(?:میلیون|میلیارد|هزار)\s*تومان',
-                    r'برابر\s+با\s+\d+(?:,\d{3})*\s*(?:میلیون|میلیارد|هزار)\s*تومان',
-                    r'\d+(?:میلیارد|میلیون)\s*تومان(?=\s+رسیده|\s+ثبت|\s+بوده|\s+،)',
-                    r'\$\d+(?:,\d{3})*(?:\.\d+)?\s*(?:million|billion|thousand|M|B|K)?',
-                    r'\d+(?:,\d{3})*\s*ریال',
-                    r'€\d+(?:,\d{3})*(?:\.\d+)?'
-                ],
-                'PERCENTAGE': [
-                    r'\d+(?:\.\d+)?\s*درصد(?:\s+افزایش|\s+رشد|\s+کاهش|\s+بالاتر|\s+پایین‌تر)?',
-                    r'\d+(?:\.\d+)?\s*%',
-                    r'معادل\s+\d+(?:\.\d+)?\s*درصد',
-                    r'حدود\s+\d+(?:\.\d+)?\s*درصد',
-                    r'با\s+\d+(?:\.\d+)?\s*درصد\s+افزایش',
-                    r'رشد\s+\d+(?:\.\d+)?\s*درصدی',
-                    r'\d+(?:\.\d+)?\s*درصدی(?=\s+همراه|\s+بوده)',
-                    r'میزان\s+رشد(?=\s+نسبت|\s+معادل)',
-                    r'افزایش\s+قابل‌توجهی',
-                    r'بهبود\s+نسبی'
-                ],
-                'PHONE': [
-                    r'(?:تلفن[\s:]*)?(?:شماره[\s:]*)?(?:با[\s]*)?(?:0)?(?:[۰-۹0-9]{2,3}[-\s]?)?[۰-۹0-9]{7,8}',
-                    r'(?:تماس[\s:]*)?(?:شماره[\s:]*)?(?:با[\s]*)?(?:0)?(?:[۰-۹0-9]{2,3}[-\s]?)?[۰-۹0-9]{7,8}',
-                    r'(?:موبایل[\s:]*)?(?:شماره[\s:]*)?(?:0)?9[۰-۹0-9]{9}',
-                    r'[۰-۹0-9]{3,4}[-\s][۰-۹0-9]{7,8}',
-                    r'[۰-۹0-9]{11}(?!\d)',
-                    r'(?:\+98|0098)?[۰-۹0-9]{10}',
-                    r'[۰-۹0-9]{3,4}[-\s]?[۰-۹0-9]{3,4}[-\s]?[۰-۹0-9]{3,4}'
-                ],
-                'EMAIL': [
-                    r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}',
-                    r'ایمیل[\s:]*[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}',
-                    r'email[\s:]*[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}',
-                    r'نشانی[\s]*الکترونیک[\s:]*[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}',
-                    r'آدرس[\s]*ایمیل[\s:]*[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
-                ],
-                'ACCOUNT': [
-                    r'(?:شماره[\s]*)?(?:حساب[\s]*)?(?:بانکی[\s:]*)?(?:[۰-۹0-9]{1,3}[-\s]?)*[۰-۹0-9]{8,20}',
-                    r'حساب[\s]*(?:شماره[\s:]*)?(?:[۰-۹0-9]{1,3}[-\s]?)*[۰-۹0-9]{8,20}',
-                    r'شماره[\s]*حساب[\s:]*(?:[۰-۹0-9]{1,3}[-\s]?)*[۰-۹0-9]{8,20}',
-                    r'Account[\s]*(?:Number[\s:]*)?(?:[0-9]{1,3}[-\s]?)*[0-9]{8,20}',
-                    r'[۰-۹0-9]{3}[-\s]?[۰-۹0-9]{3}[-\s]?[۰-۹0-9]{6,12}',
-                    r'[۰-۹0-9]{2,4}[-\s]?[۰-۹0-9]{6,12}[-\s]?[۰-۹0-9]{2,4}',
-                    r'واریز[\s]*(?:سود[\s:]*)?(?:[۰-۹0-9]{1,3}[-\s]?)*[۰-۹0-9]{8,20}',
-                    r'سود[\s:]*(?:[۰-۹0-9]{1,3}[-\s]?)*[۰-۹0-9]{8,20}'
-                ],
-                'ID_NUMBER': [
-                    r'IR[۰-۹0-9]{24}',
-                    r'شبا[\s:]*IR[۰-۹0-9]{24}',
-                    r'IBAN[\s:]*IR[۰-۹0-9]{24}',
-                    r'شماره[\s]*شبا[\s:]*IR[۰-۹0-9]{24}',
-                    r'(?:کد[\s]*)?(?:ملی[\s:]*)?[۰-۹0-9]{10}',
-                    r'(?:شناسه[\s]*)?(?:ملی[\s:]*)?[۰-۹0-9]{10}',
-                    r'National[\s]*(?:ID[\s:]*)?[0-9]{10}',
-                    r'(?:پاسپورت[\s:]*)?[A-Z][0-9]{8}',
-                    r'(?:Passport[\s:]*)?[A-Z][0-9]{8}',
-                    r'(?:کارت[\s:]*)?(?:[۰-۹0-9]{4}[-\s]?){3}[۰-۹0-9]{4}',
-                    r'(?:Card[\s:]*)?(?:[0-9]{4}[-\s]?){3}[0-9]{4}'
-                ],
-                'DATE': [
-                    r'[۰-۹0-9]{4}[/-][۰-۹0-9]{1,2}[/-][۰-۹0-9]{1,2}',
-                    r'[۰-۹0-9]{1,2}[/-][۰-۹0-9]{1,2}[/-][۰-۹0-9]{4}',
-                    r'(?:[۰-۹0-9]{1,2})\s*(?:فروردین|اردیبهشت|خرداد|تیر|مرداد|شهریور|مهر|آبان|آذر|دی|بهمن|اسفند)\s*(?:[۰-۹0-9]{4})',
-                    r'(?:[0-9]{1,2})\s*(?:January|February|March|April|May|June|July|August|September|October|November|December)\s*(?:[0-9]{4})',
-                    r'(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s*[0-9]{1,2},?\s*[0-9]{4}'
-                ]
-            }
-            # پردازش patterns با اولویت‌بندی - از خاص به عام
-            logger.info("🔍 Running prioritized regex extraction...")
-            processed_entities = set()
-            for category, pattern_list in patterns.items():
-                for pattern in pattern_list:
-                    matches = re.finditer(pattern, original_text, re.IGNORECASE | re.MULTILINE)
-                    for match in matches:
-                        if match.groups():
-                            item = match.group(1).strip()
-                            full_match = match.group(0).strip()
-                        else:
-                            item = match.group(0).strip()
-                            full_match = item
-                        # بررسی تداخل با entities قبلی
-                        overlaps = False
-                        match_start, match_end = match.span()
-                        for proc_start, proc_end in processed_entities:
-                            if not (match_end <= proc_start or match_start >= proc_end):
-                                overlaps = True
-                                break
-                        if (not overlaps and
-                            full_match not in found_entities and
-                            full_match not in self.mapping_table and
-                            len(full_match) >= 2):
-                            self.counters[category] += 1
-                            code = f"{category}_{self.counters[category]:03d}_REGEX"
-                            self.mapping_table[full_match] = code
-                            found_entities.add(full_match)
-                            processed_entities.add((match_start, match_end))
-                            logger.info(f"Regex ({category}): {full_match} -> {code}")
-            # جایگزینی در متن با ترتیب طولانی‌ترین اول
-            sorted_items = sorted(self.mapping_table.items(), key=lambda x: len(x[0]), reverse=True)
-            for original_item, code in sorted_items:
-                anonymized = anonymized.replace(original_item, code)
-            logger.info(f"✅ Anonymization completed. Found {len(self.mapping_table)} entities.")
-            return anonymized
-        except Exception as e:
-            return f"❌ Error in anonymization: {str(e)}" if lang == 'en' else f"❌ خطا در نام‌نشان‌سازی: {str(e)}"
-# =============================================================================
-# بخش 2: رابط کاربری Enhanced Benchmark
-# =============================================================================
-TEXTS = {
-    'en': {
-        'title': '🚀 Enhanced Bilingual Data Anonymization Benchmark',
-        'subtitle': 'Comprehensive Performance Analysis for Privacy Protection Systems with Advanced Metrics',
-        'upload_label': 'Upload Your Dataset',
-        'upload_info': 'Supported formats: CSV, TXT, JSON (Max 10MB)',
-        'language_label': 'Interface Language',
-        'sample_size_label': 'Sample Size for Analysis',
-        'sample_size_info': 'Larger samples give more accurate results but take longer',
-        'run_button': '🚀 Run Enhanced Benchmark Analysis',
-        'download_button': '📥 Download Results',
-        'processing': '⏳ Processing your dataset... Please wait.',
-        'error_no_file': '❌ Please upload a dataset file first.',
-        'error_processing': '❌ Error processing file: {}',
-        'success_message': '✅ Enhanced benchmark completed successfully!',
-        'results_tab': 'Results Overview',
-        'charts_tab': 'Performance Charts',
-        'entities_tab': 'Entity Analysis',
-        'details_tab': 'Detailed Report',
-        'no_results': 'No results yet. Please run the benchmark first.',
-    },
-    'fa': {
-        'title': '🚀 بنچمارک سیستم نام‌نشان‌سازی دوزبانه پیشرفته',
-        'subtitle': 'تحلیل جامع عملکرد سیستم‌های حفاظت از حریم خصوصی با متریک‌های پیشرفته',
-        'upload_label': 'آپلود دیتاست شما',
-        'upload_info': 'فرمت‌های پشتیبانی شده: CSV، TXT، JSON (حداکثر ۱۰ مگابایت)',
-        'language_label': 'زبان رابط کاربری',
-        'sample_size_label': 'اندازه نمونه برای تحلیل',
-        'sample_size_info': 'نمونه‌های بزرگ‌تر نتایج دقیق‌تری می‌دهند اما بیشتر طول می‌کشند',
-        'run_button': '🚀 اجرای تحلیل بنچمارک پیشرفته',
-        'download_button': '📥 دانلود نتایج',
-        'processing': '⏳ در حال پردازش دیتاست شما... لطفاً صبر کنید.',
-        'error_no_file': '❌ لطفاً ابتدا فایل دیتاست را آپلود کنید.',
-        'error_processing': '❌ خطا در پردازش فایل: {}',
-        'success_message': '✅ بنچمارک پیشرفته با موفقیت تکمیل شد!',
-        'results_tab': 'خلاصه نتایج',
-        'charts_tab': 'نمودارهای عملکرد',
-        'entities_tab': 'تحلیل موجودیت‌ها',
-        'details_tab': 'گزارش تفصیلی',
-        'no_results': 'هنوز نتیجه‌ای وجود ندارد. لطفاً ابتدا بنچمارک را اجرا کنید.',
-    }
-}
-class EnhancedGradioBenchmarkInterface:
-    """رابط کاربری Gradio برای بنچمارک پیشرفته"""
-    def __init__(self):
-        self.current_results = None
-        self.current_language = 'fa'
-        self.memory_baseline = None
-        self.performance_history = []
-        self.stress_test_active = False
-        # راه‌اندازی anonymizer
-        try:
-            self.anonymizer = BilingualDataAnonymizer()
-            self.system_ready = True
-        except Exception as e:
-            print(f"Error initializing anonymizer: {e}")
-            self.system_ready = False
-    def get_text(self, key):
-        """دریافت متن بر اساس زبان فعلی"""
-        return TEXTS[self.current_language].get(key, key)
-    def change_language(self, language):
-        """تغییر زبان رابط کاربری"""
-        self.current_language = 'en' if language == 'English' else 'fa'
-        return self.update_interface_texts()
-    def update_interface_texts(self):
-        """به‌روزرسانی متن‌های رابط کاربری"""
-        return [
-            gr.update(label=f"{self.get_text('upload_label')} - {self.get_text('upload_info')}"),
-            gr.update(label=f"{self.get_text('sample_size_label')} - {self.get_text('sample_size_info')}"),
-            gr.update(value=self.get_text('run_button')),
-            gr.update(value=self.get_text('download_button')),
-        ]
-    def start_memory_monitoring(self):
-        """شروع مانیتورینگ حافظه"""
-        if PSUTIL_AVAILABLE:
-            try:
-                process = psutil.Process()
-                self.memory_baseline = process.memory_info().rss / 1024 / 1024  # MB
-            except:
-                self.memory_baseline = 0
-        else:
-            self.memory_baseline = 0
-    def get_memory_usage(self):
-        """دریافت مصرف حافظه فعلی"""
-        if not PSUTIL_AVAILABLE:
-            return 0
-        try:
-            process = psutil.Process()
-            current_memory = process.memory_info().rss / 1024 / 1024  # MB
-            return current_memory - (self.memory_baseline or 0)
-        except:
-            return 0
-    def calculate_classification_metrics(self, results):
-        """محاسبه متریک‌های دقت کلاسیفیکیشن"""
-        # ساخت متریک‌های ساده بدون sklearn
-        total_entities = 0
-        detected_entities = 0
-        correct_detections = 0
-        total_sentences = len(results)
-        successful_sentences = 0
-        for result in results:
-            if not result.get('success', False):
-                continue
-            successful_sentences += 1
-            original_text = result.get('original_preview', '')
-            entities_found = result.get('entity_categories', {})
-            # محاسبه ground truth
-            ground_truth_categories = self.generate_ground_truth(original_text)
-            predicted_categories = list(entities_found.keys())
-            # شمارش entities
-            total_entities += len(ground_truth_categories)
-            detected_entities += len(predicted_categories)
-            # شمارش تشخیص‌های صحیح
-            for category in predicted_categories:
-                if category in ground_truth_categories:
-                    correct_detections += 1
-        # محاسبه متریک‌ها
-        if detected_entities == 0:
-            precision = 0.0
-        else:
-            precision = (correct_detections / detected_entities) * 100
-        if total_entities == 0:
-            recall = 0.0
-        else:
-            recall = (correct_detections / total_entities) * 100
-        if precision + recall == 0:
-            f1_score = 0.0
-        else:
-            f1_score = 2 * (precision * recall) / (precision + recall)
-        if total_sentences == 0:
-            accuracy = 0.0
-        else:
-            accuracy = (successful_sentences / total_sentences) * 100
-        return {
-            'precision': round(precision, 1),
-            'recall': round(recall, 1),
-            'f1_score': round(f1_score, 1),
-            'accuracy': round(accuracy, 1)
-        }
-    def generate_ground_truth(self, text):
-        """تولید ground truth بر اساس patterns موجود در متن"""
-        ground_truth = []
-        # الگوهای به��ودیافته برای تشخیص دقیق‌تر
-        patterns = {
-            'EMAIL': [
-                r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}',
-                r'ایمیل[\s:]*[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}',
-            ],
-            'PHONE': [
-                r'(?:0)?(?:[۰-۹0-9]{2,3}[-\s]?)?[۰-۹0-9]{7,8}',
-                r'تلفن[\s:]*(?:0)?(?:[۰-۹0-9]{2,3}[-\s]?)?[۰-۹0-9]{7,8}',
-                r'موبایل[\s:]*(?:0)?9[۰-۹0-9]{9}',
-            ],
-            'ID_NUMBER': [
-                r'IR[۰-۹0-9]{24}',
-                r'شبا[\s:]*IR[۰-۹0-9]{24}',
-                r'(?:کد[\s]*)?(?:ملی[\s:]*)?[۰-۹0-9]{10}',
-                r'(?:کارت[\s:]*)?(?:[۰-۹0-9]{4}[-\s]?){3}[۰-۹0-9]{4}',
-            ],
-            'AMOUNT': [
-                r'\d+(?:,\d{3})*\s*(?:میلیون|میلیارد|هزار)?\s*تومان',
-                r'مبلغ\s+\d+(?:,\d{3})*\s*(?:میلیون|میلیارد|هزار)?\s*تومان',
-                r'\$\d+(?:,\d{3})*(?:\.\d+)?',
-                r'\d+(?:,\d{3})*\s*ریال',
-            ],
-            'PERCENTAGE': [
-                r'\d+(?:\.\d+)?\s*درصد',
-                r'\d+(?:\.\d+)?\s*%',
-                r'رشد\s+\d+(?:\.\d+)?\s*درصدی',
-            ],
-            'PERSON': [
-                r'آقای\s+[آ-ی\a-zA-Z]+',
-                r'خانم\s+[آ-ی\a-zA-Z]+',
-                r'مهندس\s+[آ-ی\a-zA-Z]+',
-                r'دکتر\s+[آ-ی\a-zA-Z]+',
-                r'مدیرعامل',
-                r'سرپرست',
-            ],
-            'COMPANY': [
-                r'شرکت\s+[آ-ی\a-zA-Z\s]+',
-                r'بانک\s+[آ-ی\a-zA-Z\s]+',
-                r'[A-Z][a-zA-Z\s]+(?:Inc|Corp|Company|Ltd)',
-            ],
-            'ACCOUNT': [
-                r'(?:شماره[\s]*)?(?:حساب[\s]*)?(?:بانکی[\s:]*)?(?:[۰-۹0-9]{1,3}[-\s]?)*[۰-۹0-9]{8,20}',
-                r'حساب[\s]*(?:شماره[\s:]*)?(?:[۰-۹0-9]{1,3}[-\s]?)*[۰-۹0-9]{8,20}',
-            ],
-            'DATE': [
-                r'[۰-۹0-9]{4}[/-][۰-۹0-9]{1,2}[/-][۰-۹0-9]{1,2}',
-                r'[۰-۹0-9]{1,2}[/-][۰-۹0-9]{1,2}[/-][۰-۹0-9]{4}',
-                r'(?:[۰-۹0-9]{1,2})\s*(?:فروردین|اردیبهشت|خرداد|تیر|مرداد|شهریور|مهر|آبان|آذر|دی|بهمن|اسفند)',
-            ]
-        }
-        import re
-        for category, pattern_list in patterns.items():
-            found = False
-            for pattern in pattern_list:
-                if re.search(pattern, text, re.IGNORECASE):
-                    found = True
-                    break
-            if found:
-                ground_truth.append(category)
-        return ground_truth
-    def calculate_scalability_score(self, results):
-        """محاسبه امتیاز مقیاس‌پذیری"""
-        if len(results) < 10:
-            return 50.0
-        processing_times = [r['processing_time_ms'] for r in results if r.get('success', False)]
-        if len(processing_times) < 2:
-            return 50.0
-        x = np.arange(len(processing_times))
-        slope = np.polyfit(x, processing_times, 1)[0]
-        if slope <= 0:
-            return 100.0
-        elif slope < 1:
-            return 90.0
-        elif slope < 5:
-            return 70.0
-        elif slope < 10:
-            return 50.0
-        else:
-            return 30.0
-    def calculate_performance_degradation(self, results):
-        """محاسبه کاهش عملکرد در طول زمان"""
-        processing_times = [r['processing_time_ms'] for r in results if r.get('success', False)]
-        if len(processing_times) < 10:
-            return 0.0
-        first_10_percent = int(len(processing_times) * 0.1)
-        last_10_percent = int(len(processing_times) * 0.1)
-        if first_10_percent == 0:
-            return 0.0
-        avg_first = np.mean(processing_times[:first_10_percent])
-        avg_last = np.mean(processing_times[-last_10_percent:])
-        degradation = ((avg_last - avg_first) / avg_first) * 100 if avg_first > 0 else 0
-        return max(0, degradation)
-    def run_stress_test(self, sample_text, iterations=50):
-        """اجرای تست استرس"""
-        self.stress_test_active = True
-        stress_results = {
-            'total_iterations': iterations,
-            'successful_iterations': 0,
-            'failed_iterations': 0,
-            'avg_response_time': 0,
-            'max_response_time': 0,
-            'min_response_time': float('inf'),
-            'memory_peak': 0,
-            'memory_average': 0,
-            'errors': []
-        }
-        memory_readings = []
-        response_times = []
-        for i in range(iterations):
-            try:
-                start_time = time.time()
-                start_memory = self.get_memory_usage()
-                result = self.anonymizer.anonymize_text(sample_text)
-                end_time = time.time()
-                end_memory = self.get_memory_usage()
-                response_time = (end_time - start_time) * 1000  # ms
-                response_times.append(response_time)
-                memory_readings.append(end_memory)
-                if not result.startswith("❌"):
-                    stress_results['successful_iterations'] += 1
-                else:
-                    stress_results['failed_iterations'] += 1
-                    stress_results['errors'].append(f"Iteration {i+1}: {result[:100]}")
-            except Exception as e:
-                stress_results['failed_iterations'] += 1
-                stress_results['errors'].append(f"Iteration {i+1}: {str(e)}")
-            if i % 10 == 0:
-                gc.collect()
-        if response_times:
-            stress_results['avg_response_time'] = np.mean(response_times)
-            stress_results['max_response_time'] = max(response_times)
-            stress_results['min_response_time'] = min(response_times)
-        if memory_readings:
-            stress_results['memory_peak'] = max(memory_readings)
-            stress_results['memory_average'] = np.mean(memory_readings)
-        self.stress_test_active = False
-        return stress_results
-    def calculate_advanced_efficiency(self, base_summary, classification_metrics,
-                                    scalability_score, performance_degradation, memory_stats):
-        """محاسبه امتیاز کارایی پیشرفته"""
-        weights = {
-            'success_rate': 0.25,
-            'speed': 0.20,
-            'accuracy': 0.15,
-            'precision': 0.10,
-            'scalability': 0.10,
-            'memory_efficiency': 0.10,
-            'degradation': 0.10
-        }
-        success_score = base_summary['success_rate'] * 100
-        speed_score = min(100, 1000 / base_summary['avg_processing_time_ms']) * 100 if base_summary['avg_processing_time_ms'] > 0 else 0
-        accuracy_score = classification_metrics.get('accuracy', 0)
-        precision_score = classification_metrics.get('precision', 0)
-        scalability_score_norm = min(100, scalability_score)
-        memory_score = max(0, 100 - memory_stats['avg_memory_per_sentence'])
-        degradation_score = max(0, 100 - performance_degradation)
-        advanced_efficiency = (
-            weights['success_rate'] * success_score +
-            weights['speed'] * speed_score +
-            weights['accuracy'] * accuracy_score +
-            weights['precision'] * precision_score +
-            weights['scalability'] * scalability_score_norm +
-            weights['memory_efficiency'] * memory_score +
-            weights['degradation'] * degradation_score
-        )
-        return min(100, max(0, advanced_efficiency))
-    def load_dataset(self, file_path):
-        """بارگذاری دیتاست از فایل"""
-        if not file_path:
-            return []
-        try:
-            ext = os.path.splitext(file_path)[1].lower()
-            if ext == '.csv':
-                df = pd.read_csv(file_path, encoding='utf-8')
-                text_columns = ['text', 'sentence', 'content', 'data', 'متن', 'جمله']
-                text_col = None
-                for col in text_columns:
-                    if col in df.columns:
-                        text_col = col
-                        break
-                if text_col is None:
-                    text_col = df.columns[0]
-                sentences = df[text_col].dropna().astype(str).tolist()
-            elif ext == '.json':
-                with open(file_path, 'r', encoding='utf-8') as f:
-                    data = json.load(f)
-                sentences = []
-                if isinstance(data, list):
-                    sentences = [str(item) for item in data if isinstance(item, str)]
-                elif isinstance(data, dict):
-                    for value in data.values():
-                        if isinstance(value, list):
-                            sentences.extend([str(v) for v in value if isinstance(v, str)])
-                        elif isinstance(value, str):
-                            sentences.append(value)
-            else:  # text file
-                with open(file_path, 'r', encoding='utf-8') as f:
-                    content = f.read()
-                sentences = [line.strip() for line in content.split('\n') if len(line.strip()) > 10]
-            sentences = [s.strip() for s in sentences if len(s.strip()) > 10]
-            return sentences
-        except Exception as e:
-            print(f"Error loading dataset: {e}")
-            return []
-    def run_enhanced_benchmark(self, file_obj, sample_size, progress=gr.Progress()):
-        """اجرای بنچمارک پیشرفته"""
-        if not file_obj:
-            return self.get_error_response("No file uploaded")
-        if not self.system_ready:
-            return self.get_error_response("System not ready")
-        try:
-            progress(0.05, desc="Initializing enhanced benchmark...")
-            self.start_memory_monitoring()
-            progress(0.1, desc="Loading dataset...")
-            sentences = self.load_dataset(file_obj.name)
-            if not sentences:
-                return self.get_error_response("Could not load sentences")
-            if len(sentences) > sample_size:
-                sentences = sentences[:sample_size]
-            progress(0.15, desc=f"Processing {len(sentences)} sentences with enhanced metrics...")
-            results = []
-            start_time = time.time()
-            memory_readings = []
-            for i, sentence in enumerate(sentences):
-                progress(0.15 + (0.65 * i / len(sentences)),
-                        desc=f"Processing sentence {i+1}/{len(sentences)}")
-                self.anonymizer.mapping_table = {}
-                self.anonymizer.counters = {key: 0 for key in self.anonymizer.counters.keys()}
-                sent_start = time.time()
-                memory_before = self.get_memory_usage()
-                try:
-                    result = self.anonymizer.anonymize_text(sentence)
-                    processing_time = time.time() - sent_start
-                    memory_after = self.get_memory_usage()
-                    memory_used = memory_after - memory_before
-                    entities_found = len(self.anonymizer.mapping_table)
-                    success = not result.startswith("❌")
-                    entity_categories = {}
-                    for entity, code in self.anonymizer.mapping_table.items():
-                        category = code.split('_')[0] if '_' in code else 'OTHER'
-                        entity_categories[category] = entity_categories.get(category, 0) + 1
-                    results.append({
-                        'index': i + 1,
-                        'success': success,
-                        'processing_time_ms': processing_time * 1000,
-                        'input_length': len(sentence),
-                        'output_length': len(result),
-                        'entities_found': entities_found,
-                        'entity_categories': entity_categories,
-                        'speed_chars_per_sec': len(sentence) / processing_time if processing_time > 0 else 0,
-                        'memory_used_mb': memory_used,
-                        'original_preview': sentence[:100] + "..." if len(sentence) > 100 else sentence,
-                        'anonymized_preview': result[:100] + "..." if len(result) > 100 else result,
-                    })
-                    memory_readings.append(memory_after)
-                except Exception as e:
-                    results.append({
-                        'index': i + 1,
-                        'success': False,
-                        'error': str(e),
-                        'processing_time_ms': (time.time() - sent_start) * 1000,
-                        'input_length': len(sentence),
-                        'entities_found': 0,
-                        'entity_categories': {},
-                        'speed_chars_per_sec': 0,
-                        'memory_used_mb': 0
-                    })
-            total_time = time.time() - start_time
-            progress(0.85, desc="Calculating advanced metrics...")
-            successful_results = [r for r in results if r.get('success', False)]
-            if not successful_results:
-                return self.get_error_response("No successful results")
-            base_summary = {
-                'total_sentences': len(sentences),
-                'successful_sentences': len(successful_results),
-                'success_rate': len(successful_results) / len(sentences),
-                'avg_processing_time_ms': np.mean([r['processing_time_ms'] for r in successful_results]),
-                'total_entities': sum(r['entities_found'] for r in successful_results),
-                'avg_entities_per_sentence': np.mean([r['entities_found'] for r in successful_results]),
-                'avg_speed_chars_per_sec': np.mean([r['speed_chars_per_sec'] for r in successful_results]),
-                'sentences_per_minute': len(successful_results) / (total_time / 60) if total_time > 0 else 0,
-                'total_time_seconds': total_time
-            }
-            progress(0.90, desc="Computing classification metrics...")
-            classification_metrics = self.calculate_classification_metrics(successful_results)
-            progress(0.93, desc="Analyzing performance patterns...")
-            scalability_score = self.calculate_scalability_score(successful_results)
-            performance_degradation = self.calculate_performance_degradation(successful_results)
-            memory_stats = {
-                'avg_memory_per_sentence': np.mean([r.get('memory_used_mb', 0) for r in successful_results]),
-                'peak_memory_usage': max(memory_readings) if memory_readings else 0,
-                'total_memory_used': sum([r.get('memory_used_mb', 0) for r in successful_results])
-            }
-            progress(0.96, desc="Running stress test...")
-            if len(sentences) > 0:
-                stress_results = self.run_stress_test(sentences[0], iterations=20)
-            else:
-                stress_results = {'error': 'No sample text for stress test'}
-            advanced_efficiency = self.calculate_advanced_efficiency(
-                base_summary, classification_metrics, scalability_score,
-                performance_degradation, memory_stats
-            )
-            enhanced_summary = {
-                **base_summary,
-                **classification_metrics,
-                'scalability_score': scalability_score,
-                'performance_degradation': performance_degradation,
-                'memory_stats': memory_stats,
-                'stress_test_results': stress_results,
-                'advanced_efficiency_score': advanced_efficiency,
-                'efficiency_score': base_summary['success_rate'] * 100
-            }
-            self.current_results = {
-                'summary': enhanced_summary,
-                'detailed_results': results,
-                'timestamp': datetime.now().isoformat(),
-                'benchmark_version': 'enhanced_v2.0'
-            }
-            progress(1.0, desc="Enhanced benchmark complete!")
-            overview_plot = self.create_enhanced_overview_chart()
-            performance_plot = self.create_enhanced_performance_charts()
-            entity_plot = self.create_entity_analysis()
-            detailed_report = self.create_enhanced_detailed_report()
-            return (
-                self.get_text('success_message') + f" (Enhanced v2.0 - {len(enhanced_summary)} metrics)",
-                overview_plot,
-                performance_plot,
-                entity_plot,
-                detailed_report,
-                gr.update(visible=True),
-                gr.update(visible=True),
-                gr.update(visible=True),
-                gr.update(visible=True),
-                gr.update(visible=True),
-                gr.update(visible=True)
-            )
-        except Exception as e:
-            return self.get_error_response(f"Enhanced benchmark error: {str(e)}")
-    def create_enhanced_overview_chart(self):
-        """نمودار خلاصه پیشرفته"""
-        if not self.current_results:
-            return None
-        summary = self.current_results['summary']
-        fig = make_subplots(
-            rows=3, cols=3,
-            subplot_titles=[
-                'Advanced Efficiency Score',
-                'Classification Accuracy',
-                'Processing Speed',
-                'Memory Usage',
-                'Scalability Score',
-                'Performance Degradation',
-                'Precision Score',
-                'Recall Score',
-                'F1 Score'
-            ],
-            specs=[[{"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}],
-                   [{"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}],
-                   [{"type": "indicator"}, {"type": "indicator"}, {"type": "indicator"}]]
-        )
-        # امتیاز کارایی پیشرفته
-        fig.add_trace(go.Indicator(
-            mode = "gauge+number",
-            value = summary['advanced_efficiency_score'],
-            domain = {'x': [0, 1], 'y': [0, 1]},
-            gauge = {
-                'axis': {'range': [None, 100]},
-                'bar': {'color': "darkblue"},
-                'steps': [
-                    {'range': [0, 60], 'color': "lightcoral"},
-                    {'range': [60, 80], 'color': "yellow"},
-                    {'range': [80, 100], 'color': "lightgreen"}],
-                'threshold': {
-                    'line': {'color': "red", 'width': 4},
-                    'thickness': 0.75,
-                    'value': 90}
-            }
-        ), row=1, col=1)
-        # Accuracy
-        fig.add_trace(go.Indicator(
-            mode = "gauge+number",
-            value = summary.get('accuracy', 0),
-            domain = {'x': [0, 1], 'y': [0, 1]},
-            gauge = {
-                'axis': {'range': [None, 100]},
-                'bar': {'color': "green"},
-                'steps': [{'range': [0, 100], 'color': "lightgray"}],
-            }
-        ), row=1, col=2)
-        # Processing Speed (inverse - lower is better)
-        speed_score = min(100, 1000 / summary['avg_processing_time_ms']) if summary['avg_processing_time_ms'] > 0 else 0
-        fig.add_trace(go.Indicator(
-            mode = "gauge+number",
-            value = speed_score,
-            domain = {'x': [0, 1], 'y': [0, 1]},
-            gauge = {
-                'axis': {'range': [None, 100]},
-                'bar': {'color': "orange"},
-            }
-        ), row=1, col=3)
-        # Memory Usage
-        memory_score = max(0, 100 - summary['memory_stats']['avg_memory_per_sentence'])
-        fig.add_trace(go.Indicator(
-            mode = "gauge+number",
-            value = memory_score,
-            domain = {'x': [0, 1], 'y': [0, 1]},
-            gauge = {
-                'axis': {'range': [None, 100]},
-                'bar': {'color': "purple"},
-            }
-        ), row=2, col=1)
-        # Scalability Score
-        fig.add_trace(go.Indicator(
-            mode = "gauge+number",
-            value = summary['scalability_score'],
-            domain = {'x': [0, 1], 'y': [0, 1]},
-            gauge = {
-                'axis': {'range': [None, 100]},
-                'bar': {'color': "cyan"},
-            }
-        ), row=2, col=2)
-        # Performance Degradation (inverse - lower is better)
-        degradation_score = max(0, 100 - summary['performance_degradation'])
-        fig.add_trace(go.Indicator(
-            mode = "gauge+number",
-            value = degradation_score,
-            domain = {'x': [0, 1], 'y': [0, 1]},
-            gauge = {
-                'axis': {'range': [None, 100]},
-                'bar': {'color': "red"},
-            }
-        ), row=2, col=3)
-        # Precision
-        fig.add_trace(go.Indicator(
-            mode = "number",
-            value = summary.get('precision', 0),
-            number = {'suffix': "%"},
-            domain = {'x': [0, 1], 'y': [0, 1]},
-        ), row=3, col=1)
-        # Recall
-        fig.add_trace(go.Indicator(
-            mode = "number",
-            value = summary.get('recall', 0),
-            number = {'suffix': "%"},
-            domain = {'x': [0, 1], 'y': [0, 1]},
-        ), row=3, col=2)
-        # F1 Score
-        fig.add_trace(go.Indicator(
-            mode = "number",
-            value = summary.get('f1_score', 0),
-            number = {'suffix': "%"},
-            domain = {'x': [0, 1], 'y': [0, 1]},
-        ), row=3, col=3)
-        fig.update_layout(
-            height=900,
-            title_text="📊 Enhanced Benchmark Overview - Advanced Metrics",
-            title_font_size=20
-        )
-        return fig
-    def create_enhanced_performance_charts(self):
-        """ایجاد نمودارهای عملکرد پیشرفته"""
-        if not self.current_results:
-            return None
-        results = self.current_results['detailed_results']
-        df = pd.DataFrame([r for r in results if r.get('success', False)])
-        if df.empty:
-            return None
-        fig = make_subplots(
-            rows=3, cols=2,
-            subplot_titles=[
-                'Processing Time vs Memory Usage',
-                'Scalability Analysis',
-                'Entity Detection Efficiency',
-                'Memory Usage Distribution',
-                'Speed Distribution',
-                'Advanced Performance Matrix'
-            ]
-        )
-        # 1. Processing Time vs Memory Usage
-        fig.add_trace(go.Scatter(
-            x=df['processing_time_ms'],
-            y=df['memory_used_mb'],
-            mode='markers',
-            name='Time vs Memory',
-            marker=dict(
-                size=df['entities_found'],
-                color=df['entities_found'],
-                colorscale='Viridis',
-                showscale=True
-            )
-        ), row=1, col=1)
-        # 2. Scalability Analysis
-        fig.add_trace(go.Scatter(
-            x=df.index,
-            y=df['processing_time_ms'],
-            mode='lines+markers',
-            name='Time Trend',
-            line=dict(color='red')
-        ), row=1, col=2)
-        # 3. Entity Detection Efficiency
-        fig.add_trace(go.Scatter(
-            x=df['input_length'],
-            y=df['entities_found'],
-            mode='markers',
-            name='Detection Efficiency'
-        ), row=2, col=1)
-        # 4. Memory Usage Distribution
-        fig.add_trace(go.Histogram(
-            x=df['memory_used_mb'],
-            name='Memory Distribution',
-            nbinsx=20
-        ), row=2, col=2)
-        # 5. Speed Distribution
-        fig.add_trace(go.Histogram(
-            x=df['speed_chars_per_sec'],
-            name='Speed Distribution',
-            nbinsx=20
-        ), row=3, col=1)
-        # 6. Advanced Performance Matrix
-        performance_score = (
-            (df['entities_found'] / df['entities_found'].max() * 40) +
-            (df['speed_chars_per_sec'] / df['speed_chars_per_sec'].max() * 30) +
-            ((df['memory_used_mb'].max() - df['memory_used_mb']) / df['memory_used_mb'].max() * 30)
-        )
-        fig.add_trace(go.Scatter(
-            x=df.index,
-            y=performance_score,
-            mode='lines+markers',
-            name='Performance Score',
-            line=dict(color='green')
-        ), row=3, col=2)
-        fig.update_layout(
-            height=1000,
-            title_text="📈 Enhanced Performance Charts",
-            title_font_size=20,
-            showlegend=False
-        )
-        return fig
-    def create_entity_analysis(self):
-        """تحلیل انواع موجودیت‌ها"""
-        if not self.current_results:
-            return None
-        results = self.current_results['detailed_results']
-        all_categories = {}
-        for result in results:
-            if result.get('success', False):
-                for category, count in result.get('entity_categories', {}).items():
-                    all_categories[category] = all_categories.get(category, 0) + count
-        if not all_categories:
-            return None
-        fig = make_subplots(
-            rows=1, cols=2,
-            specs=[[{"type": "pie"}, {"type": "bar"}]],
-            subplot_titles=[
-                'Entity Types Distribution',
-                'Entity Categories Count'
-            ]
-        )
-        categories = list(all_categories.keys())
-        values = list(all_categories.values())
-        # نمودار دایره‌ای
-        fig.add_trace(go.Pie(
-            labels=categories,
-            values=values,
-            name="Entity Types"
-        ), row=1, col=1)
-        # نمودار میله‌ای
-        fig.add_trace(go.Bar(
-            x=categories,
-            y=values,
-            name="Count"
-        ), row=1, col=2)
-        fig.update_layout(
-            height=500,
-            title_text="🔍 Entity Analysis",
-            title_font_size=20
-        )
-        return fig
-    def create_enhanced_detailed_report(self):
-        """گزارش تفصیلی پیشرفته"""
-        if not self.current_results:
-            return self.get_text('no_results')
-        summary = self.current_results['summary']
-        if self.current_language == 'fa':
-            report = f"""
-# 📊 گزارش بنچمارک پیشرفته - نسخه ۲.۰
-## خلاصه نتایج اصلی
-- **کل جملات پردازش شده**: {summary['total_sentences']:,}
-- **جملات موفق**: {summary['successful_sentences']:,}
-- **نرخ موفقیت**: {summary['success_rate']*100:.1f}%
-- **امتیاز کارایی پیشرفته**: {summary['advanced_efficiency_score']:.1f}/100
-## 🎯 متریک‌های دقت کلاسیفیکیشن
-- **دقت (Precision)**: {summary.get('precision', 0):.1f}%
-- **بازخوانی (Recall)**: {summary.get('recall', 0):.1f}%
-- **امتیاز F1**: {summary.get('f1_score', 0):.1f}%
-- **صحت کلی (Accuracy)**: {summary.get('accuracy', 0):.1f}%
-## ⚡ آمار عملکرد پیشرفته
-- **متوسط زمان پردازش**: {summary['avg_processing_time_ms']:.1f} میلی‌ثانیه
-- **امتیاز مقیاس‌پذیری**: {summary['scalability_score']:.1f}/100
-- **کاهش عملکرد**: {summary['performance_degradation']:.1f}%
-- **سرعت پردازش**: {summary['avg_speed_chars_per_sec']:.0f} کاراکتر/ثانیه
-## 💾 آمار مصرف حافظه
-- **متوسط حافظه هر جمله**: {summary['memory_stats']['avg_memory_per_sentence']:.2f} MB
-- **حداکثر مصرف حافظه**: {summary['memory_stats']['peak_memory_usage']:.2f} MB
-- **کل حافظه استفاده شده**: {summary['memory_stats']['total_memory_used']:.2f} MB
-## 🔥 نتایج تست استرس
-"""
-            stress_results = summary.get('stress_test_results', {})
-            if 'error' not in stress_results:
-                report += f"""
-- **کل تکرارها**: {stress_results.get('total_iterations', 0)}
-- **تکرارهای موفق**: {stress_results.get('successful_iterations', 0)}
-- **تکرارهای ناموفق**: {stress_results.get('failed_iterations', 0)}
-- **متوسط زمان پاسخ**: {stress_results.get('avg_response_time', 0):.1f} ms
-- **حداکثر زمان پاسخ**: {stress_results.get('max_response_time', 0):.1f} ms
-- **حداقل زمان پاسخ**: {stress_results.get('min_response_time', 0):.1f} ms
-"""
-            else:
-                report += f"- **خطا در تست استرس**: {stress_results.get('error', 'نامشخص')}\n"
-            # پیشنهادات بر اساس نتایج
-            efficiency = summary['advanced_efficiency_score']
-            if efficiency >= 80:
-                report += """
-✅ **سیستم شما عملکرد خوب تا عالی دارد!**
-- ادامه مانیتورینگ و نگهداری منظم
-- در نظر گیری optimization های ریز
-- آماده‌سازی برای production deployment
-"""
-            elif efficiency >= 60:
-                report += """
-⚠️ **سیستم نیاز به بهبودهایی دارد:**
-- بهینه‌سازی الگوریتم‌های تشخیص
-- بهبود مدیریت حافظه
-- افزایش دقت کلاسیفیکیشن
-- کاهش زمان پردازش
-"""
-            else:
-                report += """
-🔧 **سیستم نیاز به بازنگری اساسی دارد:**
-- بازطراحی architecture
-- بهبود پایه‌ای الگوریتم‌ها
-- افزایش منابع سخت‌افزاری
-- training مجدد مدل‌ها
-- پیاده‌سازی caching mechanism
-"""
-        else:
-            report = f"""
-# 📊 Advanced Benchmark Report - Version 2.0
-## Main Results Summary
-- **Total Sentences Processed**: {summary['total_sentences']:,}
-- **Successful Sentences**: {summary['successful_sentences']:,}
-- **Success Rate**: {summary['success_rate']*100:.1f}%
-- **Advanced Efficiency Score**: {summary['advanced_efficiency_score']:.1f}/100
-## 🎯 Classification Accuracy Metrics
-- **Precision**: {summary.get('precision', 0):.1f}%
-- **Recall**: {summary.get('recall', 0):.1f}%
-- **F1 Score**: {summary.get('f1_score', 0):.1f}%
-- **Overall Accuracy**: {summary.get('accuracy', 0):.1f}%
-## ⚡ Advanced Performance Statistics
-- **Average Processing Time**: {summary['avg_processing_time_ms']:.1f} ms
-- **Scalability Score**: {summary['scalability_score']:.1f}/100
-- **Performance Degradation**: {summary['performance_degradation']:.1f}%
-- **Processing Speed**: {summary['avg_speed_chars_per_sec']:.0f} chars/sec
-## 💾 Memory Usage Statistics
-- **Average Memory per Sentence**: {summary['memory_stats']['avg_memory_per_sentence']:.2f} MB
-- **Peak Memory Usage**: {summary['memory_stats']['peak_memory_usage']:.2f} MB
-- **Total Memory Used**: {summary['memory_stats']['total_memory_used']:.2f} MB
-**This comprehensive benchmark analyzed {summary['total_sentences']} sentences with {len(summary)} different metrics.**
-"""
-        return report
-    def get_error_response(self, error_msg):
-        """پاسخ استانداردشده برای خطاها"""
-        return (
-            f"❌ {error_msg}",
-            None, None, None, None,
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False)
-        )
-    def download_results(self):
-        """دانلود نتایج"""
-        if not self.current_results:
-            return None
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        filename = f"enhanced_benchmark_results_{timestamp}.json"
-        with open(filename, 'w', encoding='utf-8') as f:
-            json.dump(self.current_results, f, ensure_ascii=False, indent=2, default=str)
-        return filename
-# =============================================================================
-# بخش 3: ایجاد رابط کاربری
-# =============================================================================
-def create_benchmark_interface():
-    """ایجاد رابط کاربری فقط بنچمارک"""
-    enhanced_benchmark = EnhancedGradioBenchmarkInterface()
-    custom_css = """
-    body, .gradio-container {
-        font-family: 'Segoe UI', Tahoma, Arial, sans-serif !important;
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
-        min-height: 100vh !important;
-        padding: 20px !important;
-    }
-    .rtl {
-        direction: rtl !important;
-        text-align: right !important;
-    }
-    .ltr {
-        direction: ltr !important;
-        text-align: left !important;
-    }
-    .gradio-textbox {
-        border-radius: 10px !important;
-        box-shadow: 0 4px 15px rgba(0,0,0,0.1) !important;
-    }
-    .gradio-button {
-        border-radius: 25px !important;
-        font-weight: bold !important;
-        transition: all 0.3s ease !important;
-        margin: 5px 0 !important;
-        min-height: 50px !important;
-    }
-    .gradio-button:hover {
-        transform: translateY(-2px) !important;
-        box-shadow: 0 6px 20px rgba(0,0,0,0.2) !important;
-    }
-    h1, h2, h3 {
-        text-shadow: 2px 2px 4px rgba(0,0,0,0.3) !important;
-        margin-top: 0 !important;
-        margin-bottom: 10px !important;
-        padding-top: 0 !important;
-        line-height: 1.2 !important;
-    }
-    """
-    with gr.Blocks(title="🚀 Enhanced Benchmark System", theme=gr.themes.Soft(), css=custom_css) as app:
-        # انتخاب زبان
-        with gr.Row():
-            language_selector = gr.Radio(
-                choices=["فارسی", "English"],
-                value="فارسی",
-                label="Language / زبان",
-                interactive=True
-            )
-        # عنوان اصلی
-        gr.HTML("""
-        <div style="text-align: center; padding: 20px;">
-            <h1>🚀 بنچمارک سیستم نام‌نشان‌سازی دوزبانه پیشرفته</h1>
-            <h2>Enhanced Bilingual Data Anonymization Benchmark</h2>
-            <p>تحلیل جامع عملکرد سیستم‌های حفاظت از حریم خصوصی با متریک‌های پیشرفته</p>
-            <p>Comprehensive Performance Analysis with Advanced Metrics including Precision, Recall, F1-Score, Memory Usage, Scalability</p>
-        </div>
-        """)
-        with gr.Row():
-            with gr.Column(scale=1):
-                # تنظیمات
-                file_upload = gr.File(
-                    label="آپلود دیتاست شما / Upload Your Dataset (CSV, TXT, JSON - Max 10MB)",
-                    file_types=[".csv", ".txt", ".json"],
-                    file_count="single",
-                    )
-                sample_size = gr.Slider(
-                    minimum=10,
-                    maximum=1000,
-                    value=200,
-                    step=10,
-                    label="اندازه نمونه برای تحلیل / Sample Size - Larger samples = more accurate results"
-                )
-                run_btn = gr.Button(
-                    "🚀 اجرای تحلیل بنچمارک پیشرفته / Run Enhanced Benchmark",
-                    variant="primary",
-                    size="lg"
-                )
-                download_btn = gr.Button(
-                    "📥 دانلود نتایج / Download Results",
-                    variant="secondary",
-                    visible=False
-                )
-                # نمایش وضعیت
-                status_output = gr.Textbox(
-                    label="وضعیت / Status",
-                    interactive=False,
-                    lines=2
-                )
-            with gr.Column(scale=2):
-                # نتایج در تب‌ها
-                with gr.Tabs():
-                    with gr.Tab("خلاصه نتایج پیشرفته / Enhanced Overview"):
-                        overview_plot = gr.Plot(
-                            label="نمودار خلاصه کلی پیشرفته",
-                            visible=False
-                        )
-                    with gr.Tab("نمودارهای عملکرد پیشرفته / Advanced Performance"):
-                        performance_plot = gr.Plot(
-                            label="نمودارهای عملکرد پیشرفته",
-                            visible=False
-                        )
-                    with gr.Tab("تحلیل موجودیت‌ها / Entity Analysis"):
-                        entity_plot = gr.Plot(
-                            label="تحلیل موجودیت‌ها",
-                            visible=False
-                        )
-                    with gr.Tab("گزارش تفصیلی پیشرفته / Enhanced Report"):
-                        detailed_report = gr.Markdown(
-                            "هنوز نتیجه‌ای وجود ندارد. لطفاً ابتدا بنچمارک پیشرفته را اجرا کنید.\n\nNo results yet. Please run the enhanced benchmark first.",
-                            visible=False
-                        )
-        # نمایش وضعیت سیستم
-        system_status = "✅ سیستم بنچمارک پیشرفته آماده است / Enhanced benchmark system ready" if enhanced_benchmark.system_ready else "⚠️ سیستم در حالت نمایشی / Running in demo mode"
-        gr.HTML(f"""
-        <div style="text-align: center; margin-top: 20px; padding: 10px; background-color: #e8f4f8; border-radius: 5px;">
-            <p><strong>وضعیت سیستم / System Status:</strong> {system_status}</p>
-            <p><strong>ویژگی‌های جدید:</strong> Precision, Recall, F1-Score, Memory Usage, Scalability Analysis, Stress Testing</p>
-        </div>
-        """)
-        # راهنمای استفاده
-        gr.HTML("""
-        <div style="margin-top: 30px; padding: 20px; background-color: #f0f0f0; border-radius: 10px;">
-            <h3>📋 راهنمای استفاده پیشرفته / Enhanced Usage Guide</h3>
-            <div style="display: flex; gap: 20px;">
-                <div style="flex: 1;">
-                    <h4>🇮🇷 فارسی</h4>
-                    <ul>
-                        <li>فایل دیتاست خود را آپلود کن��د</li>
-                        <li>اندازه نمونه مورد نظر را انتخاب کنید</li>
-                        <li>دکمه "اجرای بنچمارک پیشرفته" را بزنید</li>
-                        <li>نتایج در تب‌های مختلف با متریک‌های جدید نمایش داده می‌شود</li>
-                        <li><strong>جدید</strong>: متریک‌های Precision, Recall, F1-Score, Memory Usage</li>
-                    </ul>
-                </div>
-                <div style="flex: 1;">
-                    <h4>🇺🇸 English</h4>
-                    <ul>
-                        <li>Upload your dataset file</li>
-                        <li>Select desired sample size</li>
-                        <li>Click "Run Enhanced Benchmark"</li>
-                        <li>Results displayed in different tabs with new metrics</li>
-                        <li><strong>New</strong>: Precision, Recall, F1-Score, Memory Usage metrics</li>
-                    </ul>
-                </div>
-            </div>
-        </div>
-        """)
-        # Event handlers
-        language_selector.change(
-            fn=enhanced_benchmark.change_language,
-            inputs=[language_selector],
-            outputs=[file_upload, sample_size, run_btn, download_btn]
-        )
-        run_btn.click(
-            fn=enhanced_benchmark.run_enhanced_benchmark,
-            inputs=[file_upload, sample_size],
-            outputs=[
-                status_output,
-                overview_plot,
-                performance_plot,
-                entity_plot,
-                detailed_report,
-                overview_plot,  # visibility
-                performance_plot,  # visibility
-                entity_plot,  # visibility
-                detailed_report,  # visibility
-                download_btn,  # visibility
-                download_btn  # dummy for compatibility
-            ],
-            show_progress=True
-        )
-        download_btn.click(
-            fn=enhanced_benchmark.download_results,
-            outputs=gr.File()
-        )
-    return app
-# =============================================================================
-# بخش 4: تابع اصلی
-# =============================================================================
-def main():
-    """تابع اصلی"""
-    print("🚀 Starting Enhanced Benchmark System...")
-    print("=" * 80)
-    # ویژگی‌های جدید
-    features = []
-    if SKLEARN_AVAILABLE:
-        features.append("Precision/Recall/F1-Score")
-    if PSUTIL_AVAILABLE:
-        features.append("Memory Usage Monitoring")
-    features.append("Scalability Analysis")
-    features.append("Performance Degradation")
-    features.append("Stress Testing")
-    print(f"✨ Enhanced features: {', '.join(features)}")
-    # ایجاد و اجرای رابط کاربری
-    demo = create_benchmark_interface()
-    # اجرا
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=True,
-        inbrowser=True,
-        show_error=True,
-        favicon_path=None,
-        ssl_verify=False
-    )
-if __name__ == "__main__":
-    main()