Spaces:
Sleeping
Sleeping
Tawhid Bin Omar commited on
Commit ·
892d4dd
1
Parent(s): 8746a60
cleaned up code comments and docs
Browse files- analysis/consistency_checker.py +6 -4
- analysis/coverage_analyzer.py +2 -1
- analysis/scorer.py +4 -3
analysis/consistency_checker.py
CHANGED
|
@@ -10,11 +10,11 @@ import itertools
|
|
| 10 |
class ConsistencyChecker:
|
| 11 |
def __init__(self):
|
| 12 |
try:
|
| 13 |
-
#
|
| 14 |
self.nli_model = pipeline(
|
| 15 |
"text-classification",
|
| 16 |
-
model="microsoft/deberta-v3-xsmall",
|
| 17 |
-
device=-1 # CPU
|
| 18 |
)
|
| 19 |
self._ready = True
|
| 20 |
except Exception as e:
|
|
@@ -78,6 +78,7 @@ class ConsistencyChecker:
|
|
| 78 |
})
|
| 79 |
|
| 80 |
# Calculate consistency score
|
|
|
|
| 81 |
total_pairs = len(list(itertools.combinations(range(len(claim_texts)), 2)))
|
| 82 |
issues = len(contradictions) + len(circular_refs)
|
| 83 |
consistency_score = max(0, 100 - (issues / max(total_pairs, 1)) * 100)
|
|
@@ -113,7 +114,8 @@ class ConsistencyChecker:
|
|
| 113 |
|
| 114 |
def _is_circular(self, claim1: str, claim2: str) -> bool:
|
| 115 |
"""Simple heuristic to detect circular definitions"""
|
| 116 |
-
#
|
|
|
|
| 117 |
words1 = set(claim1.lower().split())
|
| 118 |
words2 = set(claim2.lower().split())
|
| 119 |
|
|
|
|
| 10 |
class ConsistencyChecker:
|
| 11 |
def __init__(self):
|
| 12 |
try:
|
| 13 |
+
# Using smaller model for speed - might upgrade to base later
|
| 14 |
self.nli_model = pipeline(
|
| 15 |
"text-classification",
|
| 16 |
+
model="microsoft/deberta-v3-xsmall",
|
| 17 |
+
device=-1 # CPU only for now
|
| 18 |
)
|
| 19 |
self._ready = True
|
| 20 |
except Exception as e:
|
|
|
|
| 78 |
})
|
| 79 |
|
| 80 |
# Calculate consistency score
|
| 81 |
+
# TODO: might need to adjust penalty weights based on user feedback
|
| 82 |
total_pairs = len(list(itertools.combinations(range(len(claim_texts)), 2)))
|
| 83 |
issues = len(contradictions) + len(circular_refs)
|
| 84 |
consistency_score = max(0, 100 - (issues / max(total_pairs, 1)) * 100)
|
|
|
|
| 114 |
|
| 115 |
def _is_circular(self, claim1: str, claim2: str) -> bool:
|
| 116 |
"""Simple heuristic to detect circular definitions"""
|
| 117 |
+
# This is pretty basic - just checks word overlap
|
| 118 |
+
# Works okay for most cases but could be improved
|
| 119 |
words1 = set(claim1.lower().split())
|
| 120 |
words2 = set(claim2.lower().split())
|
| 121 |
|
analysis/coverage_analyzer.py
CHANGED
|
@@ -91,7 +91,8 @@ class CoverageAnalyzer:
|
|
| 91 |
explanation: str
|
| 92 |
) -> Dict:
|
| 93 |
"""Check if and how well a concept node is covered"""
|
| 94 |
-
#
|
|
|
|
| 95 |
node_lower = node_label.lower()
|
| 96 |
explanation_lower = explanation.lower()
|
| 97 |
|
|
|
|
| 91 |
explanation: str
|
| 92 |
) -> Dict:
|
| 93 |
"""Check if and how well a concept node is covered"""
|
| 94 |
+
# Started with just keyword matching, works surprisingly well
|
| 95 |
+
# might add semantic similarity later if needed
|
| 96 |
node_lower = node_label.lower()
|
| 97 |
explanation_lower = explanation.lower()
|
| 98 |
|
analysis/scorer.py
CHANGED
|
@@ -7,7 +7,8 @@ from typing import Dict, Optional
|
|
| 7 |
|
| 8 |
class UnderstandingScorer:
|
| 9 |
def __init__(self):
|
| 10 |
-
# Scoring weights
|
|
|
|
| 11 |
self.weights = {
|
| 12 |
'consistency': 0.25,
|
| 13 |
'coverage': 0.35,
|
|
@@ -72,8 +73,8 @@ class UnderstandingScorer:
|
|
| 72 |
High score = explicit about assumptions, boundary conditions, limitations
|
| 73 |
Low score = makes implicit assumptions without stating them
|
| 74 |
"""
|
| 75 |
-
#
|
| 76 |
-
# assumptions are
|
| 77 |
|
| 78 |
consistency_score = consistency_result.get('consistency_score', 0)
|
| 79 |
coverage_score = coverage_result.get('coverage_score', 0)
|
|
|
|
| 7 |
|
| 8 |
class UnderstandingScorer:
|
| 9 |
def __init__(self):
|
| 10 |
+
# Scoring weights - adjusted these a few times based on testing
|
| 11 |
+
# coverage is weighted highest because it's most reliable
|
| 12 |
self.weights = {
|
| 13 |
'consistency': 0.25,
|
| 14 |
'coverage': 0.35,
|
|
|
|
| 73 |
High score = explicit about assumptions, boundary conditions, limitations
|
| 74 |
Low score = makes implicit assumptions without stating them
|
| 75 |
"""
|
| 76 |
+
# Simple heuristic for now - basically if consistency and coverage are good,
|
| 77 |
+
# assumptions are probably okay. Not perfect but works decently.
|
| 78 |
|
| 79 |
consistency_score = consistency_result.get('consistency_score', 0)
|
| 80 |
coverage_score = coverage_result.get('coverage_score', 0)
|