Spaces:

leilaghomashchi
/

Data-anonymization

Sleeping

App Files Files Community

leilaghomashchi commited on Oct 21

Commit

bec85f3

verified ·

1 Parent(s): b67aaa4

Update app.py

Browse files

Files changed (1) hide show

app.py +150 -171

app.py CHANGED Viewed

@@ -5,53 +5,65 @@ import os
 from dataclasses import dataclass
 import re
 from llama_cpp import Llama
 @dataclass
 class LocalModelConfig:
     """تنظیمات مدل محلی GGUF - Qwen2.5-32B"""
-    model_path: str = r"C:\models\qwen2.5-32B-Instruct-Q4_K_M.gguf"
     max_tokens: int = 8000
     temperature: float = 0.3
     top_p: float = 0.8
-    n_ctx: int = 4096  # حجم context window
-    n_threads: int = 8  # تعداد CPU threads
-    n_gpu_layers: int = 45  # برای GPU acceleration
 class LocalCerebrasAnonymizer:
     """سیستم ناشناس‌سازی متون مالی فارسی با مدل محلی"""
-    def __init__(self, model_path: str = None):
-        if model_path is None:
-            # استفاده از مسیر default یا متغیر محیطی
-            model_path = os.getenv("QWEN_MODEL_PATH", self.config.model_path)
-        if not os.path.exists(model_path):
-            raise ValueError(f"❌ فایل مدل یافت نشد: {model_path}\n💡 لطفاً مسیر صحیح را بررسی کنید")
-        self.config = LocalModelConfig(model_path=model_path)
         try:
-            print(f"🤖 درحال بارگذاری مدل: {model_path}...")
-            print(f"📊 تنظیمات:")
-            print(f"  • Context: {self.config.n_ctx}")
-            print(f"  • Threads: {self.config.n_threads}")
-            print(f"  • GPU Layers: {self.config.n_gpu_layers}")
             self.llm = Llama(
-                model_path=self.config.model_path,
                 n_ctx=self.config.n_ctx,
                 n_threads=self.config.n_threads,
                 n_gpu_layers=self.config.n_gpu_layers,
                 verbose=False
             )
             print("✅ مدل با موفقیت بارگذاری شد\n")
         except Exception as e:
-            raise Exception(f"❌ خطا در بارگذاری مدل: {str(e)}")
-        self.system_prompt = self._create_advanced_system_prompt()
-    def _create_advanced_system_prompt(self) -> str:
-        """دستورالعمل سیستمی بهینه شده"""
         return """شما یک سیستم ناشناس‌سازی متون مالی فارسی هستید.
 ⚠️ CRITICAL: در پاسخ نهایی خود، فقط و فقط متن ناشناس‌سازی شده را برگردانید، بدون هیچ توضیح، تحلیل، یا تگ اضافی.
@@ -60,7 +72,6 @@ class LocalCerebrasAnonymizer:
 1. **ترتیب پیوسته**: company-01, company-02, ... | person-01, person-02, ... | amount-01, amount-02, ... | percent-01, percent-02, ...
 2. **ثبات**: اگر "همراه اول" → company-01 شد، در تمام متن همان باشد
 3. **نام مستعار**: "فاما" = "فولاد مبارکه" → هر دو company-01
-4. **اشاره ضمنی**: "این شرکت" اگر به company-01 اشاره دارد → company-01
 ## انواع موجودیت:
 - **company-XX**: شرکت‌ها، بانک‌ها، سازمان‌ها
@@ -68,57 +79,46 @@ class LocalCerebrasAnonymizer:
 - **amount-XX**: مبالغ - واحد را حفظ کن
 - **percent-XX**: درصدها
-## قوانین کلیدی:
-1. بازرس = شرکت است → company-XX
-2. واحدها: "amount-01 میلیارد تومان" ✅
-3. گروه‌ها: "گروه X" → company-XX
-4. کلمات عمومی حفظ: "سه شرکت" → حفظ
-5. دوره زمانی حفظ: "۵ ماهه" → حفظ
 ## مثال:
 ورودی: ایران خودرو در اسفند 1402 حدود 23 هزار میلیارد درآ��د کسب کرد که 4.58 درصد افزایش داشت.
 خروجی: company-01 در اسفند 1402 حدود amount-01 درآمد کسب کرد که percent-01 افزایش داشت.
-⚠️ یادآوری: فقط متن ناشناس‌شده، بدون هیچ توضیح اضافی."""
     def anonymize_text(self, text: str) -> Dict[str, Any]:
-        """ناشناس‌سازی متن با استفاده از مدل محلی"""
         if not text.strip():
-            return {
-                "success": False,
-                "error": "متن ورودی خالی است"
-            }
         try:
-            # ایجاد پیام برای مدل
             messages = [
-                {"role": "system", "content": self.system_prompt},
                 {"role": "user", "content": text}
             ]
-            # تبدیل پیام‌ها به فرمت مناسب
             prompt = self._format_prompt(messages)
             print(f"⏳ پردازش متن... (طول: {len(text)} کاراکتر)")
-            # فراخوانی مدل
             response = self.llm(
                 prompt,
                 max_tokens=self.config.max_tokens,
                 temperature=self.config.temperature,
                 top_p=self.config.top_p,
-                stop=["</s>", "### User:", "[INST]"]
             )
             content = response["choices"][0]["text"].strip()
-            # پاک‌سازی خروجی
             content = self._remove_thinking_tags(content)
             content = self._clean_markdown(content)
             content = self._clean_explanations(content)
             content = content.strip()
-            # تحلیل نتایج
             analysis = self._analyze_anonymized_text(content)
             return {
@@ -127,22 +127,14 @@ class LocalCerebrasAnonymizer:
                 "entities": analysis["entities"],
                 "statistics": analysis["statistics"],
                 "detailed_analysis": analysis["detailed_analysis"],
-                "usage": {
-                    "prompt_tokens": response.get("usage", {}).get("prompt_tokens", 0),
-                    "completion_tokens": response.get("usage", {}).get("completion_tokens", 0),
-                    "total_tokens": response.get("usage", {}).get("total_tokens", 0)
-                },
                 "quality_check": self._validate_anonymized_text(content)
             }
         except Exception as e:
-            return {
-                "success": False,
-                "error": f"خطا در پردازش: {str(e)}"
-            }
     def _format_prompt(self, messages: list) -> str:
-        """تبدیل پیام‌ها به فرمت Qwen"""
         formatted = ""
         for message in messages:
             role = message["role"]
@@ -150,19 +142,17 @@ class LocalCerebrasAnonymizer:
             if role == "system":
                 formatted += f"{content}\n\n"
             elif role == "user":
-                formatted += f"### User:\n{content}\n\n### Assistant:\n"
             elif role == "assistant":
                 formatted += f"{content}\n\n"
         return formatted
     def _remove_thinking_tags(self, content: str) -> str:
-        """حذف تگ‌های thinking"""
         content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
         content = re.sub(r'</?think>', '', content)
         return content.strip()
     def _clean_markdown(self, content: str) -> str:
-        """پاک کردن markdown"""
         if "```" in content:
             lines = content.split('\n')
             clean_lines = []
@@ -177,20 +167,16 @@ class LocalCerebrasAnonymizer:
         return content
     def _clean_explanations(self, content: str) -> str:
-        """حذف توضیحات اضافی"""
         lines = content.split('\n')
         clean_lines = []
         for line in lines:
             if any(word in line.lower() for word in
-                   ['okay', 'let me', 'here is', 'خروجی', 'نتیجه', 'پاسخ:', 'assistant']):
                 continue
             clean_lines.append(line)
         return '\n'.join(clean_lines).strip()
     def _analyze_anonymized_text(self, text: str) -> Dict[str, Any]:
-        """تحلیل متن ناشناس‌سازی شده"""
         companies = re.findall(r'company-(\d+)', text)
         persons = re.findall(r'person-(\d+)', text)
         amounts = re.findall(r'amount-(\d+)', text)
@@ -201,7 +187,7 @@ class LocalCerebrasAnonymizer:
             "person": len(set(persons)),
             "amount": len(set(amounts)),
             "percent": len(set(percents)),
-            "total_replacements": len(companies) + len(persons) + len(amounts) + len(percents)
         }
         entities = {
@@ -213,9 +199,8 @@ class LocalCerebrasAnonymizer:
         detailed_analysis = {
             "preserved_dates": len(re.findall(r'\d{4}/\d{1,2}/\d{1,2}|\d{1,2}\s+\w+\s+\d{4}', text)),
-            "preserved_times": len(re.findall(r'\d{1,2}:\d{2}', text)),
             "financial_indicators": len(re.findall(r'\b(EPS|P/E|ARPU|NPL|ROE|ROA)\b', text)),
-            "units_preserved": len(re.findall(r'(میلیارد|میلیون|هزار|تومان|ریال|درهم|دلار|یورو)', text))
         }
         return {
@@ -225,7 +210,6 @@ class LocalCerebrasAnonymizer:
         }
     def _validate_anonymized_text(self, text: str) -> Dict[str, Any]:
-        """اعتبارسنجی متن"""
         companies = re.findall(r'company-(\d+)', text)
         persons = re.findall(r'person-(\d+)', text)
         amounts = re.findall(r'amount-(\d+)', text)
@@ -233,20 +217,16 @@ class LocalCerebrasAnonymizer:
         validation_issues = []
-        for entity_type, indices in [
-            ("company", companies),
-            ("person", persons),
-            ("amount", amounts),
-            ("percent", percents)
-        ]:
             if indices:
                 unique_indices = sorted(list(set([int(x) for x in indices])))
                 if unique_indices[0] != 1:
-                    validation_issues.append(f"اندیس {entity_type} از 01 شروع نشده")
                 expected = list(range(1, len(unique_indices) + 1))
                 if unique_indices != expected:
-                    validation_issues.append(f"اندیس‌های {entity_type} پیوسته نیستند")
         return {
             "is_valid": len(validation_issues) == 0,
@@ -259,12 +239,11 @@ class LocalCerebrasAnonymizer:
             }
         }
-def create_local_interface():
-    """رابط کاربری برای مدل محلی"""
-    default_path = r"C:\models\qwen2.5-32B-Instruct-Q4_K_M.gguf"
-    model_available = os.path.exists(default_path)
     custom_css = """
     .gradio-container {
         font-family: 'Tahoma', 'Arial', sans-serif !important;
@@ -272,11 +251,12 @@ def create_local_interface():
         max-width: 1400px;
         margin: 0 auto;
     }
-    .result-box {
-        background-color: #f8f9fa;
-        border: 2px solid #e9ecef;
         border-radius: 12px;
-        padding: 20px;
         margin: 10px 0;
     }
     .local-box {
@@ -287,41 +267,35 @@ def create_local_interface():
         color: #1b5e20;
         margin: 10px 0;
     }
-    .info-box {
-        background-color: #e3f2fd;
-        border: 2px solid #2196F3;
         border-radius: 12px;
-        padding: 15px;
-        color: #0d47a1;
-        margin: 10px 0;
-        font-size: 12px;
     }
     """
-    with gr.Blocks(css=custom_css, title="ناشناس‌ساز Qwen 2.5-32B", theme=gr.themes.Soft()) as interface:
         gr.Markdown("""
         # 🔒 سیستم ناشناس‌سازی متون مالی فارسی
-        ### 🖥️ مدل: Qwen 2.5-32B (محلی Q4_K_M)
         """)
-        gr.Markdown(f"""
         <div class="info-box">
-        📂 <strong>مسیر مدل:</strong> {default_path}<br>
-        ✅ <strong>وضعیت:</strong> {"✅ مدل دست‌رس است" if model_available else "❌ مدل یافت نشد"}
         </div>
         """)
-        gr.Markdown("""
-        <div class="local-box">
-        🚀 <strong>مدل: Qwen2.5-32B-Instruct (Q4_K_M)</strong><br>
-        ✅ <strong>مزیت:</strong> بدون اینترنت • بدون هزینه API • کنترل کامل<br>
-        ⚡ <strong>نیاز سیستم:</strong> 16+ GB RAM یا GPU VRAM<br>
-        📊 <strong>Quantization:</strong> Q4 (معادل 20 GB مدل کامل)
-        </div>
-        """)
-        with gr.Row():
             with gr.Column(scale=1):
                 input_text = gr.Textbox(
                     label="📝 متن ورودی",
@@ -342,97 +316,102 @@ def create_local_interface():
                     elem_classes=["result-box"]
                 )
-        with gr.Row():
-            statistics_output = gr.Markdown(label="📊 آمار")
-            quality_output = gr.Markdown(label="✅ کیفیت")
-        with gr.Row():
             entities_output = gr.Markdown(label="🏷️ موجودیت‌ها")
-            detailed_analysis_output = gr.Markdown(label="🔍 تحلیل")
-        status_output = gr.Textbox(label="📋 وضعیت", interactive=False)
-        def process_local_text(text: str):
             """پردازش متن"""
-            final_model_path = default_path
-            if not os.path.exists(final_model_path):
-                return ("", f"❌ فایل مدل یافت نشد", "", "", "", f"❌ مسیر: {final_model_path}")
-            if not text or not text.strip():
-                return ("", "❌ متن ورودی خالی است", "", "", "", "❌ خطا")
-            try:
-                anonymizer = LocalCerebrasAnonymizer(model_path=final_model_path)
-                result = anonymizer.anonymize_text(text)
-                if not result["success"]:
-                    return ("", f"❌ خطا: {result['error']}", "", "", "", "❌ خطا")
-                stats = result.get("statistics", {})
-                stats_md = f"""📊 **آمار:**
 🏢 شرکت: {stats.get('company', 0)}
 👤 اشخاص: {stats.get('person', 0)}
 💰 مبالغ: {stats.get('amount', 0)}
 📊 درصدها: {stats.get('percent', 0)}
-🔢 کل: {stats.get('total_replacements', 0)}
 """
-                quality = result.get("quality_check", {})
-                quality_md = "✅ **کنترل کیفیت:**\n\n"
-                quality_md += "✅ موفق" if quality.get("is_valid", False) else "❌ مشکل"
-                if quality.get("issues"):
-                    quality_md += "\n\n**مشکلات:**\n"
-                    for issue in quality["issues"]:
-                        quality_md += f"• {issue}\n"
-                entities = result.get("entities", {})
-                entities_md = "🏷️ **موجودیت‌ها:**\n"
-                if entities.get("companies"):
-                    entities_md += f"\n🏢 company-{', company-'.join(entities['companies'])}"
-                if entities.get("persons"):
-                    entities_md += f"\n👤 person-{', person-'.join(entities['persons'])}"
-                if entities.get("amounts"):
-                    entities_md += f"\n💰 amount-{', amount-'.join(entities['amounts'])}"
-                if entities.get("percents"):
-                    entities_md += f"\n📊 percent-{', percent-'.join(entities['percents'])}"
-                detailed = result.get("detailed_analysis", {})
-                detailed_md = f"""🔍 **تحلیل:**
 📅 تاریخ: {detailed.get('preserved_dates', 0)}
 📈 شاخص: {detailed.get('financial_indicators', 0)}
-📏 واحد: {detailed.get('units_preserved', 0)}
-"""
-                return (
-                    result["anonymized_text"],
-                    stats_md,
-                    quality_md,
-                    entities_md,
-                    detailed_md,
-                    "✅ موفق"
-                )
-            except Exception as e:
-                return ("", f"❌ خطا: {str(e)}", "", "", "", f"❌ {str(e)}")
         def clear_all():
             return "", "", "", "", "", ""
         anonymize_btn.click(
-            fn=process_local_text,
             inputs=[input_text],
-            outputs=[output_text, statistics_output, quality_output, entities_output, detailed_analysis_output, status_output]
         )
         clear_btn.click(
             fn=clear_all,
-            outputs=[input_text, output_text, statistics_output, quality_output, entities_output, detailed_analysis_output]
         )
         gr.Examples(
             examples=[
-                ["ایران خودرو در اسفندماه سال 1402 حدود 23 هزار و 296 میلیارد تومان درآمد کسب کرد که 4.58 درصد افزایش داشت."],
                 ["مجمع پتروشیمی برگزار شد. وانیا نیک تدبیر را بازرس انتخاب کردند."],
             ],
             inputs=input_text,
@@ -442,5 +421,5 @@ def create_local_interface():
         return interface
 if __name__ == "__main__":
-    interface = create_local_interface()
-    interface.launch(show_error=True)

 from dataclasses import dataclass
 import re
 from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
 @dataclass
 class LocalModelConfig:
     """تنظیمات مدل محلی GGUF - Qwen2.5-32B"""
+    repo_id: str = "Qwen/Qwen2.5-32B-Instruct-GGUF"
+    filename: str = "qwen2.5-32b-instruct-q4_k_m.gguf"
     max_tokens: int = 8000
     temperature: float = 0.3
     top_p: float = 0.8
+    n_ctx: int = 4096
+    n_threads: int = 4  # کمتر برای Spaces
+    n_gpu_layers: int = 50
 class LocalCerebrasAnonymizer:
     """سیستم ناشناس‌سازی متون مالی فارسی با مدل محلی"""
+    def __init__(self):
+        self.config = LocalModelConfig()
+        self.llm = None
+        self.model_loaded = False
+    def load_model(self) -> str:
+        """بارگذاری مدل از HuggingFace"""
         try:
+            print(f"🤖 درحال دانلود مدل از HuggingFace...")
+            print(f"📦 Repo: {self.config.repo_id}")
+            print(f"📄 Filename: {self.config.filename}")
+            # دانلود مدل
+            model_path = hf_hub_download(
+                repo_id=self.config.repo_id,
+                filename=self.config.filename,
+                local_dir="./models",
+                local_dir_use_symlinks=False
+            )
+            print(f"✅ مدل دانلود شد: {model_path}")
+            print(f"🤖 درحال بارگذاری مدل...")
             self.llm = Llama(
+                model_path=model_path,
                 n_ctx=self.config.n_ctx,
                 n_threads=self.config.n_threads,
                 n_gpu_layers=self.config.n_gpu_layers,
                 verbose=False
             )
+            self.model_loaded = True
             print("✅ مدل با موفقیت بارگذاری شد\n")
+            return "✅ مدل آماده است"
         except Exception as e:
+            error_msg = f"❌ خطا: {str(e)}"
+            print(error_msg)
+            return error_msg
+    def _create_system_prompt(self) -> str:
+        """دستورالعمل سیستمی"""
         return """شما یک سیستم ناشناس‌سازی متون مالی فارسی هستید.
 ⚠️ CRITICAL: در پاسخ نهایی خود، فقط و فقط متن ناشناس‌سازی شده را برگردانید، بدون هیچ توضیح، تحلیل، یا تگ اضافی.
 1. **ترتیب پیوسته**: company-01, company-02, ... | person-01, person-02, ... | amount-01, amount-02, ... | percent-01, percent-02, ...
 2. **ثبات**: اگر "همراه اول" → company-01 شد، در تمام متن همان باشد
 3. **نام مستعار**: "فاما" = "فولاد مبارکه" → هر دو company-01
 ## انواع موجودیت:
 - **company-XX**: شرکت‌ها، بانک‌ها، سازمان‌ها
 - **amount-XX**: مبالغ - واحد را حفظ کن
 - **percent-XX**: درصدها
 ## مثال:
 ورودی: ایران خودرو در اسفند 1402 حدود 23 هزار میلیارد درآ��د کسب کرد که 4.58 درصد افزایش داشت.
 خروجی: company-01 در اسفند 1402 حدود amount-01 درآمد کسب کرد که percent-01 افزایش داشت.
+⚠️ یادآوری: فقط متن ناشناس‌شده."""
     def anonymize_text(self, text: str) -> Dict[str, Any]:
+        """ناشناس‌سازی متن"""
+        if not self.model_loaded:
+            return {"success": False, "error": "مدل بارگذاری نشده است"}
         if not text.strip():
+            return {"success": False, "error": "متن ورودی خالی است"}
         try:
             messages = [
+                {"role": "system", "content": self._create_system_prompt()},
                 {"role": "user", "content": text}
             ]
             prompt = self._format_prompt(messages)
             print(f"⏳ پردازش متن... (طول: {len(text)} کاراکتر)")
             response = self.llm(
                 prompt,
                 max_tokens=self.config.max_tokens,
                 temperature=self.config.temperature,
                 top_p=self.config.top_p,
+                stop=["</s>", "[/INST]", "### User:"]
             )
             content = response["choices"][0]["text"].strip()
+            # پاک‌سازی
             content = self._remove_thinking_tags(content)
             content = self._clean_markdown(content)
             content = self._clean_explanations(content)
             content = content.strip()
             analysis = self._analyze_anonymized_text(content)
             return {
                 "entities": analysis["entities"],
                 "statistics": analysis["statistics"],
                 "detailed_analysis": analysis["detailed_analysis"],
                 "quality_check": self._validate_anonymized_text(content)
             }
         except Exception as e:
+            return {"success": False, "error": f"خطا: {str(e)}"}
     def _format_prompt(self, messages: list) -> str:
+        """فرمت prompt برای Qwen2.5"""
         formatted = ""
         for message in messages:
             role = message["role"]
             if role == "system":
                 formatted += f"{content}\n\n"
             elif role == "user":
+                formatted += f"[INST] {content} [/INST]\n"
             elif role == "assistant":
                 formatted += f"{content}\n\n"
         return formatted
     def _remove_thinking_tags(self, content: str) -> str:
         content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
         content = re.sub(r'</?think>', '', content)
         return content.strip()
     def _clean_markdown(self, content: str) -> str:
         if "```" in content:
             lines = content.split('\n')
             clean_lines = []
         return content
     def _clean_explanations(self, content: str) -> str:
         lines = content.split('\n')
         clean_lines = []
         for line in lines:
             if any(word in line.lower() for word in
+                   ['okay', 'let me', 'here is', 'خروجی', 'نتیجه', 'پاسخ:', 'assistant', '[inst]']):
                 continue
             clean_lines.append(line)
         return '\n'.join(clean_lines).strip()
     def _analyze_anonymized_text(self, text: str) -> Dict[str, Any]:
         companies = re.findall(r'company-(\d+)', text)
         persons = re.findall(r'person-(\d+)', text)
         amounts = re.findall(r'amount-(\d+)', text)
             "person": len(set(persons)),
             "amount": len(set(amounts)),
             "percent": len(set(percents)),
+            "total": len(companies) + len(persons) + len(amounts) + len(percents)
         }
         entities = {
         detailed_analysis = {
             "preserved_dates": len(re.findall(r'\d{4}/\d{1,2}/\d{1,2}|\d{1,2}\s+\w+\s+\d{4}', text)),
             "financial_indicators": len(re.findall(r'\b(EPS|P/E|ARPU|NPL|ROE|ROA)\b', text)),
+            "units_preserved": len(re.findall(r'(میلیارد|میلیون|هزار|تومان|ریال|درهم|دلار)', text))
         }
         return {
         }
     def _validate_anonymized_text(self, text: str) -> Dict[str, Any]:
         companies = re.findall(r'company-(\d+)', text)
         persons = re.findall(r'person-(\d+)', text)
         amounts = re.findall(r'amount-(\d+)', text)
         validation_issues = []
+        for entity_type, indices in [("company", companies), ("person", persons),
+                                      ("amount", amounts), ("percent", percents)]:
             if indices:
                 unique_indices = sorted(list(set([int(x) for x in indices])))
                 if unique_indices[0] != 1:
+                    validation_issues.append(f"⚠️ {entity_type} از 01 شروع نشده")
                 expected = list(range(1, len(unique_indices) + 1))
                 if unique_indices != expected:
+                    validation_issues.append(f"⚠️ {entity_type} پیوسته نیست")
         return {
             "is_valid": len(validation_issues) == 0,
             }
         }
+# ========== رابط کاربری ==========
+anonymizer = LocalCerebrasAnonymizer()
+def create_interface():
     custom_css = """
     .gradio-container {
         font-family: 'Tahoma', 'Arial', sans-serif !important;
         max-width: 1400px;
         margin: 0 auto;
     }
+    .info-box {
+        background-color: #e3f2fd;
+        border: 2px solid #2196F3;
         border-radius: 12px;
+        padding: 15px;
+        color: #0d47a1;
         margin: 10px 0;
     }
     .local-box {
         color: #1b5e20;
         margin: 10px 0;
     }
+    .result-box {
+        background-color: #f8f9fa;
+        border: 2px solid #e9ecef;
         border-radius: 12px;
+        padding: 20px;
     }
     """
+    with gr.Blocks(css=custom_css, title="ناشناس‌ساز Qwen2.5", theme=gr.themes.Soft()) as interface:
         gr.Markdown("""
         # 🔒 سیستم ناشناس‌سازی متون مالی فارسی
+        ### 🚀 Qwen 2.5-32B (HuggingFace Spaces)
         """)
+        gr.Markdown("""
         <div class="info-box">
+        📊 <strong>مدل:</strong> Qwen2.5-32B-Instruct-Q4_K_M<br>
+        🌐 <strong>منبع:</strong> HuggingFace Hub<br>
+        💾 <strong>حجم:</strong> ~20 GB (Q4 quantization)<br>
+        ⚡ <strong>سرعت:</strong> بستگی به GPU Spaces دارد
         </div>
         """)
+        status_box = gr.Textbox(label="📋 وضعیت", interactive=False, value="⏳ درحال بارگذاری مدل...")
+        load_btn = gr.Button("🤖 بارگذاری مدل", variant="primary", size="lg")
+        with gr.Row(visible=False) as input_section:
             with gr.Column(scale=1):
                 input_text = gr.Textbox(
                     label="📝 متن ورودی",
                     elem_classes=["result-box"]
                 )
+        with gr.Row(visible=False) as output_section:
+            with gr.Column():
+                statistics_output = gr.Markdown(label="📊 آمار")
+            with gr.Column():
+                quality_output = gr.Markdown(label="✅ کیفیت")
+        with gr.Row(visible=False) as output_section2:
             entities_output = gr.Markdown(label="🏷️ موجودیت‌ها")
+            detailed_output = gr.Markdown(label="🔍 تحلیل")
+        def load_model_action():
+            """بارگذاری مدل"""
+            msg = anonymizer.load_model()
+            return (
+                gr.Textbox(value=msg),
+                gr.Row(visible=True),
+                gr.Row(visible=True),
+                gr.Row(visible=True)
+            )
+        def process_text(text):
             """پردازش متن"""
+            if not text.strip():
+                return ("", "❌ متن خالی است", "", "", "", "")
+            result = anonymizer.anonymize_text(text)
+            if not result["success"]:
+                return ("", f"❌ {result['error']}", "", "", "", "")
+            stats = result.get("statistics", {})
+            stats_md = f"""📊 **آمار:**
 🏢 شرکت: {stats.get('company', 0)}
 👤 اشخاص: {stats.get('person', 0)}
 💰 مبالغ: {stats.get('amount', 0)}
 📊 درصدها: {stats.get('percent', 0)}
+🔢 کل: {stats.get('total', 0)}"""
+            quality = result.get("quality_check", {})
+            quality_md = f"""✅ **کنترل کیفیت:**
+{'✅ موفق' if quality.get('is_valid') else '❌ مشکل'}
 """
+            if quality.get("issues"):
+                quality_md += "\n**مشکلات:**\n"
+                for issue in quality["issues"]:
+                    quality_md += f"• {issue}\n"
+            entities = result.get("entities", {})
+            entities_md = "🏷️ **موجودیت‌ها:**\n"
+            if entities.get("companies"):
+                entities_md += f"\n🏢 company-{', company-'.join(entities['companies'])}"
+            if entities.get("persons"):
+                entities_md += f"\n👤 person-{', person-'.join(entities['persons'])}"
+            if entities.get("amounts"):
+                entities_md += f"\n💰 amount-{', amount-'.join(entities['amounts'])}"
+            if entities.get("percents"):
+                entities_md += f"\n📊 percent-{', percent-'.join(entities['percents'])}"
+            detailed = result.get("detailed_analysis", {})
+            detailed_md = f"""🔍 **تحلیل:**
 📅 تاریخ: {detailed.get('preserved_dates', 0)}
 📈 شاخص: {detailed.get('financial_indicators', 0)}
+📏 واحد: {detailed.get('units_preserved', 0)}"""
+            return (
+                result["anonymized_text"],
+                stats_md,
+                quality_md,
+                entities_md,
+                detailed_md,
+                "✅ موفق"
+            )
         def clear_all():
             return "", "", "", "", "", ""
+        load_btn.click(
+            fn=load_model_action,
+            outputs=[status_box, input_section, output_section, output_section2]
+        )
         anonymize_btn.click(
+            fn=process_text,
             inputs=[input_text],
+            outputs=[output_text, statistics_output, quality_output, entities_output, detailed_output, status_box]
         )
         clear_btn.click(
             fn=clear_all,
+            outputs=[input_text, output_text, statistics_output, quality_output, entities_output, detailed_output]
         )
         gr.Examples(
             examples=[
+                ["ایران خودرو در اسفندماه حدود 23 هزار میلیارد تومان درآمد کسب کرد که 4.58 درصد افزایش داشت."],
                 ["مجمع پتروشیمی برگزار شد. وانیا نیک تدبیر را بازرس انتخاب کردند."],
             ],
             inputs=input_text,
         return interface
 if __name__ == "__main__":
+    interface = create_interface()
+    interface.launch()