Tawhid Bin Omar commited on
Commit
892d4dd
·
1 Parent(s): 8746a60

cleaned up code comments and docs

Browse files
analysis/consistency_checker.py CHANGED
@@ -10,11 +10,11 @@ import itertools
10
  class ConsistencyChecker:
11
  def __init__(self):
12
  try:
13
- # Use a smaller NLI model for faster inference
14
  self.nli_model = pipeline(
15
  "text-classification",
16
- model="microsoft/deberta-v3-xsmall", # Smaller, faster model
17
- device=-1 # CPU
18
  )
19
  self._ready = True
20
  except Exception as e:
@@ -78,6 +78,7 @@ class ConsistencyChecker:
78
  })
79
 
80
  # Calculate consistency score
 
81
  total_pairs = len(list(itertools.combinations(range(len(claim_texts)), 2)))
82
  issues = len(contradictions) + len(circular_refs)
83
  consistency_score = max(0, 100 - (issues / max(total_pairs, 1)) * 100)
@@ -113,7 +114,8 @@ class ConsistencyChecker:
113
 
114
  def _is_circular(self, claim1: str, claim2: str) -> bool:
115
  """Simple heuristic to detect circular definitions"""
116
- # Extract key terms (simple word-based approach)
 
117
  words1 = set(claim1.lower().split())
118
  words2 = set(claim2.lower().split())
119
 
 
10
  class ConsistencyChecker:
11
  def __init__(self):
12
  try:
13
+ # Using smaller model for speed - might upgrade to base later
14
  self.nli_model = pipeline(
15
  "text-classification",
16
+ model="microsoft/deberta-v3-xsmall",
17
+ device=-1 # CPU only for now
18
  )
19
  self._ready = True
20
  except Exception as e:
 
78
  })
79
 
80
  # Calculate consistency score
81
+ # TODO: might need to adjust penalty weights based on user feedback
82
  total_pairs = len(list(itertools.combinations(range(len(claim_texts)), 2)))
83
  issues = len(contradictions) + len(circular_refs)
84
  consistency_score = max(0, 100 - (issues / max(total_pairs, 1)) * 100)
 
114
 
115
  def _is_circular(self, claim1: str, claim2: str) -> bool:
116
  """Simple heuristic to detect circular definitions"""
117
+ # This is pretty basic - just checks word overlap
118
+ # Works okay for most cases but could be improved
119
  words1 = set(claim1.lower().split())
120
  words2 = set(claim2.lower().split())
121
 
analysis/coverage_analyzer.py CHANGED
@@ -91,7 +91,8 @@ class CoverageAnalyzer:
91
  explanation: str
92
  ) -> Dict:
93
  """Check if and how well a concept node is covered"""
94
- # Simple keyword matching first
 
95
  node_lower = node_label.lower()
96
  explanation_lower = explanation.lower()
97
 
 
91
  explanation: str
92
  ) -> Dict:
93
  """Check if and how well a concept node is covered"""
94
+ # Started with just keyword matching, works surprisingly well
95
+ # might add semantic similarity later if needed
96
  node_lower = node_label.lower()
97
  explanation_lower = explanation.lower()
98
 
analysis/scorer.py CHANGED
@@ -7,7 +7,8 @@ from typing import Dict, Optional
7
 
8
  class UnderstandingScorer:
9
  def __init__(self):
10
- # Scoring weights
 
11
  self.weights = {
12
  'consistency': 0.25,
13
  'coverage': 0.35,
@@ -72,8 +73,8 @@ class UnderstandingScorer:
72
  High score = explicit about assumptions, boundary conditions, limitations
73
  Low score = makes implicit assumptions without stating them
74
  """
75
- # Heuristic: if there are no contradictions and good coverage,
76
- # assumptions are likely being handled well
77
 
78
  consistency_score = consistency_result.get('consistency_score', 0)
79
  coverage_score = coverage_result.get('coverage_score', 0)
 
7
 
8
  class UnderstandingScorer:
9
  def __init__(self):
10
+ # Scoring weights - adjusted these a few times based on testing
11
+ # coverage is weighted highest because it's most reliable
12
  self.weights = {
13
  'consistency': 0.25,
14
  'coverage': 0.35,
 
73
  High score = explicit about assumptions, boundary conditions, limitations
74
  Low score = makes implicit assumptions without stating them
75
  """
76
+ # Simple heuristic for now - basically if consistency and coverage are good,
77
+ # assumptions are probably okay. Not perfect but works decently.
78
 
79
  consistency_score = consistency_result.get('consistency_score', 0)
80
  coverage_score = coverage_result.get('coverage_score', 0)