Spaces:

empirenexus
/

TranscriptWriting

Paused

App Files Files Community

jmisak commited on Oct 19, 2025

Commit

9619c6a

verified ·

1 Parent(s): ae6e4db

Upload validation.py

Browse files

Files changed (1) hide show

validation.py +77 -7

validation.py CHANGED Viewed

@@ -242,33 +242,103 @@ def validate_summary_quality(summary: str, num_transcripts: int) -> Tuple[float,
     """Check summary for rigor and accuracy"""
     issues = []
     score = 1.0
     # Check for quantification
     if not re.search(r'\d+\s*(?:out of|of|participants|%)', summary):
         issues.append("No quantified findings (must include counts/percentages)")
         score -= 0.3
     # Check for vague claims
     vague_terms = ['many', 'most', 'some', 'several', 'often', 'frequently']
     if any(term in summary.lower() for term in vague_terms):
         issues.append("Contains vague terms - should use specific numbers")
         score -= 0.2
     # Check for absolute claims
     absolute_terms = ['all', 'everyone', 'nobody', 'never', 'always']
     for term in absolute_terms:
         if re.search(rf'\b{term}\b', summary.lower()):
             issues.append(f"Absolute claim '{term}' found - likely overgeneralization")
             score -= 0.2
     # Check for evidence markers
     if 'consensus' not in summary.lower() and 'majority' not in summary.lower():
         issues.append("Missing consensus indicators")
         score -= 0.1
     # Check length is substantial
     if len(summary) < 500:
         issues.append("Summary too brief for thorough analysis")
         score -= 0.2
-    return max(0.0, score), issues

     """Check summary for rigor and accuracy"""
     issues = []
     score = 1.0
     # Check for quantification
     if not re.search(r'\d+\s*(?:out of|of|participants|%)', summary):
         issues.append("No quantified findings (must include counts/percentages)")
         score -= 0.3
     # Check for vague claims
     vague_terms = ['many', 'most', 'some', 'several', 'often', 'frequently']
     if any(term in summary.lower() for term in vague_terms):
         issues.append("Contains vague terms - should use specific numbers")
         score -= 0.2
     # Check for absolute claims
     absolute_terms = ['all', 'everyone', 'nobody', 'never', 'always']
     for term in absolute_terms:
         if re.search(rf'\b{term}\b', summary.lower()):
             issues.append(f"Absolute claim '{term}' found - likely overgeneralization")
             score -= 0.2
     # Check for evidence markers
     if 'consensus' not in summary.lower() and 'majority' not in summary.lower():
         issues.append("Missing consensus indicators")
         score -= 0.1
     # Check length is substantial
     if len(summary) < 500:
         issues.append("Summary too brief for thorough analysis")
         score -= 0.2
+    return max(0.0, score), issues
+def verify_consensus_claims(summary: str, valid_results: List[Dict]) -> List[str]:
+    """Cross-check consensus claims against actual data"""
+    warnings = []
+    total = len(valid_results)
+    # Extract consensus claims from summary (e.g., "8 out of 10", "8/10", "8 of 10")
+    consensus_pattern = r'(\d+)\s*(?:out of|of|/)\s*(\d+)\s*(?:participants|transcripts|interviews)?'
+    claims = re.findall(consensus_pattern, summary, re.IGNORECASE)
+    for claim_count, claim_total in claims:
+        count = int(claim_count)
+        claimed_total = int(claim_total)
+        # Verify total matches actual transcript count
+        if claimed_total != total:
+            warnings.append(
+                f"Claimed total '{claimed_total}' doesn't match actual transcript count '{total}'"
+            )
+            continue
+        percentage = (count / total) * 100 if total > 0 else 0
+        # Extract surrounding context to find consensus level labels
+        # Look for labels within 200 chars before the claim
+        for match in re.finditer(consensus_pattern, summary, re.IGNORECASE):
+            match_text = match.group()
+            if claim_count in match_text:
+                start_pos = max(0, match.start() - 200)
+                context = summary[start_pos:match.end()]
+                # Verify consensus level labels match percentages
+                if "STRONG CONSENSUS" in context.upper() and percentage < 80:
+                    warnings.append(
+                        f"Claimed 'STRONG CONSENSUS' but {count}/{total} is only {percentage:.0f}% (needs ≥80%)"
+                    )
+                if "MAJORITY" in context.upper() and "STRONG" not in context.upper() and percentage < 60:
+                    warnings.append(
+                        f"Claimed 'MAJORITY' but {count}/{total} is only {percentage:.0f}% (needs ≥60%)"
+                    )
+                if percentage < 40 and ("CONSENSUS" in context.upper() or "MAJORITY" in context.upper()):
+                    warnings.append(
+                        f"Claimed consensus/majority but {count}/{total} is only {percentage:.0f}% (should be labeled as minority/outlier)"
+                    )
+    # Check for standalone percentage claims
+    pct_pattern = r'(\d+)%'
+    percentages = re.findall(pct_pattern, summary)
+    for pct in percentages:
+        pct_val = int(pct)
+        if pct_val > 100:
+            warnings.append(f"Invalid percentage: {pct}% (exceeds 100%)")
+        elif pct_val < 0:
+            warnings.append(f"Invalid percentage: {pct}% (negative value)")
+    # Check for transcript ID references
+    id_pattern = r'[Tt]ranscript\s+#?(\d+)'
+    referenced_ids = [int(id_num) for id_num in re.findall(id_pattern, summary)]
+    if referenced_ids:
+        for ref_id in referenced_ids:
+            if ref_id < 1 or ref_id > total:
+                warnings.append(f"Referenced Transcript #{ref_id} but only {total} transcripts exist")
+    return warnings