Spaces:

DomLoyer
/

syscred

Running

App Files Files Community

D Ф m i И i q ц e L Ф y e r commited on Feb 9

Commit

e54c6c5

1 Parent(s): f19f3b4

📈 Add TREC IR metrics calculation (Precision, Recall, MAP, NDCG, TF-IDF, MRR) to backend and dashboard

Browse files

Files changed (1) hide show

syscred/verification_system.py +95 -0

syscred/verification_system.py CHANGED Viewed

@@ -688,6 +688,8 @@ class CredibilityVerificationSystem:
             },
             # [NEW] TREC Evidence section
             'evidences': evidences or [],
             'metadonnees': {}
         }
@@ -754,6 +756,99 @@ class CredibilityVerificationSystem:
         return report
     def _get_score_factors(self, rule_results: Dict, nlp_results: Dict) -> List[Dict]:
         """Get list of factors that influenced the score (For UI)."""
         factors = []

             },
             # [NEW] TREC Evidence section
             'evidences': evidences or [],
+            # [NEW] TREC IR Metrics for dashboard
+            'trec_metrics': self._calculate_trec_metrics(cleaned_text, evidences),
             'metadonnees': {}
         }
         return report
+    def _calculate_trec_metrics(self, text: str, evidences: List[Dict[str, Any]] = None) -> Dict[str, float]:
+        """
+        Calculate TREC-style IR metrics for display on dashboard.
+        Computes:
+        - Precision: Ratio of relevant retrieved documents
+        - Recall: Ratio of relevant documents retrieved
+        - MAP: Mean Average Precision
+        - NDCG: Normalized Discounted Cumulative Gain
+        - TF-IDF: Term Frequency-Inverse Document Frequency score
+        - MRR: Mean Reciprocal Rank
+        """
+        import math
+        metrics = {
+            'precision': 0.0,
+            'recall': 0.0,
+            'map': 0.0,
+            'ndcg': 0.0,
+            'tfidf': 0.0,
+            'mrr': 0.0
+        }
+        if not text:
+            return metrics
+        # TF-IDF based on text analysis
+        words = text.lower().split()
+        if words:
+            # Simple TF calculation
+            word_counts = {}
+            for word in words:
+                word_counts[word] = word_counts.get(word, 0) + 1
+            # Calculate TF-IDF score (simplified)
+            total_words = len(words)
+            unique_words = len(word_counts)
+            # Term frequency normalized
+            tf_scores = [count / total_words for count in word_counts.values()]
+            # IDF approximation based on word distribution
+            idf_approx = math.log((unique_words + 1) / 2)
+            tfidf_sum = sum(tf * idf_approx for tf in tf_scores)
+            metrics['tfidf'] = min(1.0, tfidf_sum / max(1, unique_words) * 10)
+        # If we have evidences, calculate retrieval metrics
+        if evidences and len(evidences) > 0:
+            k = len(evidences)
+            # For now, assume all retrieved evidences have some relevance
+            # based on their retrieval scores
+            scores = [e.get('score', 0) for e in evidences]
+            if scores:
+                avg_score = sum(scores) / len(scores)
+                max_score = max(scores)
+                # Precision at K (proxy: avg relevance score)
+                metrics['precision'] = min(1.0, avg_score if avg_score <= 1.0 else avg_score / max(1, max_score))
+                # Recall (proxy: coverage based on number of evidences)
+                metrics['recall'] = min(1.0, len(evidences) / 10)  # Assuming 10 is target
+                # MAP (proxy using score ranking)
+                ap_sum = 0.0
+                for i, score in enumerate(sorted(scores, reverse=True)):
+                    ap_sum += (i + 1) / (i + 2) * score if score <= 1.0 else (i + 1) / (i + 2)
+                metrics['map'] = ap_sum / len(scores) if scores else 0.0
+                # NDCG (simplified)
+                dcg = sum(
+                    (2 ** (score if score <= 1.0 else 1.0) - 1) / math.log2(i + 2)
+                    for i, score in enumerate(scores[:k])
+                )
+                ideal_scores = sorted(scores, reverse=True)
+                idcg = sum(
+                    (2 ** (score if score <= 1.0 else 1.0) - 1) / math.log2(i + 2)
+                    for i, score in enumerate(ideal_scores[:k])
+                )
+                metrics['ndcg'] = dcg / idcg if idcg > 0 else 0.0
+                # MRR (first relevant result)
+                for i, score in enumerate(scores):
+                    if (score > 0.5 if score <= 1.0 else score > max_score / 2):
+                        metrics['mrr'] = 1.0 / (i + 1)
+                        break
+                if metrics['mrr'] == 0 and len(scores) > 0:
+                    metrics['mrr'] = 1.0  # First result
+        # Round all values
+        return {k: round(v, 4) for k, v in metrics.items()}
     def _get_score_factors(self, rule_results: Dict, nlp_results: Dict) -> List[Dict]:
         """Get list of factors that influenced the score (For UI)."""
         factors = []