issue for Low confidence cluster added
Browse files
coreference_resolution.py
CHANGED
|
@@ -52,7 +52,7 @@ class CoreferenceResolver:
|
|
| 52 |
else:
|
| 53 |
self.nlp = None
|
| 54 |
|
| 55 |
-
def _validate_cluster(self, text: str, cluster_strings: List[str]) -> Dict:
|
| 56 |
"""
|
| 57 |
Validate a coreference cluster for linguistic correctness
|
| 58 |
|
|
@@ -67,6 +67,8 @@ class CoreferenceResolver:
|
|
| 67 |
return {'is_valid': True, 'issues': [], 'severity': None}
|
| 68 |
|
| 69 |
issues = []
|
|
|
|
|
|
|
| 70 |
self.doc = self.nlp(text)
|
| 71 |
|
| 72 |
# Extract POS tags for each mention
|
|
@@ -183,7 +185,7 @@ class CoreferenceResolver:
|
|
| 183 |
avg_prob = self._logit_to_prob(avg_logit)
|
| 184 |
|
| 185 |
# Validate cluster for linguistic correctness
|
| 186 |
-
validation = self._validate_cluster(text, cluster_strings)
|
| 187 |
|
| 188 |
# Determine if cluster needs verification using BOTH thresholds AND validation
|
| 189 |
# Fail if ANY condition is true:
|
|
|
|
| 52 |
else:
|
| 53 |
self.nlp = None
|
| 54 |
|
| 55 |
+
def _validate_cluster(self, text: str, cluster_strings: List[str], min_logit: float, avg_logit: float) -> Dict:
|
| 56 |
"""
|
| 57 |
Validate a coreference cluster for linguistic correctness
|
| 58 |
|
|
|
|
| 67 |
return {'is_valid': True, 'issues': [], 'severity': None}
|
| 68 |
|
| 69 |
issues = []
|
| 70 |
+
if (avg_logit < self.confidence_threshold or min_logit < self.min_confidence_threshold):
|
| 71 |
+
issues.append("Low confidence cluster")
|
| 72 |
self.doc = self.nlp(text)
|
| 73 |
|
| 74 |
# Extract POS tags for each mention
|
|
|
|
| 185 |
avg_prob = self._logit_to_prob(avg_logit)
|
| 186 |
|
| 187 |
# Validate cluster for linguistic correctness
|
| 188 |
+
validation = self._validate_cluster(text, cluster_strings, min_logit, avg_logit)
|
| 189 |
|
| 190 |
# Determine if cluster needs verification using BOTH thresholds AND validation
|
| 191 |
# Fail if ANY condition is true:
|