Spaces:

leilaghomashchi
/

Benchmark-data-anonymization

Sleeping

App Files Files Community

leilaghomashchi commited on Sep 22, 2025

Commit

cb5a83f

verified ·

1 Parent(s): 989a66d

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -0

app.py CHANGED Viewed

@@ -82,6 +82,57 @@ class AnonymizationBenchmark:
         return max(total_entities, 1)  # حداقل 1 برای جلوگیری از تقسیم بر صفر
     def calculate_accuracy(self, original_text: str, anonymized_text: str) -> float:
         """محاسبه درستی کلی ناشناس‌سازی"""
         entities = self.extract_entities_from_text(anonymized_text)

         return max(total_entities, 1)  # حداقل 1 برای جلوگیری از تقسیم بر صفر
+    def check_indexing_correctness(self, entities: Dict[str, List[str]]) -> float:
+        """بررسی درستی اندیس‌گذاری"""
+        total_checks = 0
+        passed_checks = 0
+        for entity_type, indices in entities.items():
+            if not indices:
+                continue
+            total_checks += 1
+            unique_indices = sorted([int(x) for x in set(indices)])
+            # بررسی شروع از 1
+            if unique_indices[0] == 1:
+                passed_checks += 0.5
+            # بررسی پیوستگی
+            expected = list(range(1, len(unique_indices) + 1))
+            if unique_indices == expected:
+                passed_checks += 0.5
+        return passed_checks / total_checks if total_checks > 0 else 0.0
+    def calculate_structure_preservation(self, original_text: str, anonymized_text: str) -> float:
+        """محاسبه امتیاز حفظ ساختار"""
+        # کلمات مهم که باید حفظ شوند
+        important_words = [
+            'میلیارد', 'میلیون', 'تومان', 'ریال', 'درصد', 'سود', 'زیان',
+            'مدیرعامل', 'شرکت', 'بانک', 'درآمد', 'سال', 'ماه'
+        ]
+        score = 0.0
+        total_checks = len(important_words)
+        for word in important_words:
+            if word in original_text and word in anonymized_text:
+                score += 1.0
+            elif word not in original_text:
+                total_checks -= 1
+        # بررسی حفظ تعداد کلمات (تقریبی)
+        original_words = len(original_text.split())
+        anonymized_words = len(anonymized_text.split())
+        if original_words > 0:
+            word_ratio = min(anonymized_words / original_words, 1.0)
+            score += word_ratio * 2
+            total_checks += 2
+        return score / total_checks if total_checks > 0 else 0.0
     def calculate_accuracy(self, original_text: str, anonymized_text: str) -> float:
         """محاسبه درستی کلی ناشناس‌سازی"""
         entities = self.extract_entities_from_text(anonymized_text)