Spaces:

tawhidbinomar
/

conceptvector

Sleeping

Tawhid Bin Omar commited on Jan 11

Commit

892d4dd

1 Parent(s): 8746a60

cleaned up code comments and docs

Files changed (3) hide show

analysis/consistency_checker.py CHANGED Viewed

@@ -10,11 +10,11 @@ import itertools
 class ConsistencyChecker:
     def __init__(self):
         try:
-            # Use a smaller NLI model for faster inference
             self.nli_model = pipeline(
                 "text-classification",
-                model="microsoft/deberta-v3-xsmall",  # Smaller, faster model
-                device=-1  # CPU
             )
             self._ready = True
         except Exception as e:
@@ -78,6 +78,7 @@ class ConsistencyChecker:
                 })
         # Calculate consistency score
         total_pairs = len(list(itertools.combinations(range(len(claim_texts)), 2)))
         issues = len(contradictions) + len(circular_refs)
         consistency_score = max(0, 100 - (issues / max(total_pairs, 1)) * 100)
@@ -113,7 +114,8 @@ class ConsistencyChecker:
     def _is_circular(self, claim1: str, claim2: str) -> bool:
         """Simple heuristic to detect circular definitions"""
-        # Extract key terms (simple word-based approach)
         words1 = set(claim1.lower().split())
         words2 = set(claim2.lower().split())

 class ConsistencyChecker:
     def __init__(self):
         try:
+            # Using smaller model for speed - might upgrade to base later
             self.nli_model = pipeline(
                 "text-classification",
+                model="microsoft/deberta-v3-xsmall",
+                device=-1  # CPU only for now
             )
             self._ready = True
         except Exception as e:
                 })
         # Calculate consistency score
+        # TODO: might need to adjust penalty weights based on user feedback
         total_pairs = len(list(itertools.combinations(range(len(claim_texts)), 2)))
         issues = len(contradictions) + len(circular_refs)
         consistency_score = max(0, 100 - (issues / max(total_pairs, 1)) * 100)
     def _is_circular(self, claim1: str, claim2: str) -> bool:
         """Simple heuristic to detect circular definitions"""
+        # This is pretty basic - just checks word overlap
+        # Works okay for most cases but could be improved
         words1 = set(claim1.lower().split())
         words2 = set(claim2.lower().split())

analysis/coverage_analyzer.py CHANGED Viewed

@@ -91,7 +91,8 @@ class CoverageAnalyzer:
         explanation: str
     ) -> Dict:
         """Check if and how well a concept node is covered"""
-        # Simple keyword matching first
         node_lower = node_label.lower()
         explanation_lower = explanation.lower()

         explanation: str
     ) -> Dict:
         """Check if and how well a concept node is covered"""
+        # Started with just keyword matching, works surprisingly well
+        # might add semantic similarity later if needed
         node_lower = node_label.lower()
         explanation_lower = explanation.lower()

analysis/scorer.py CHANGED Viewed

@@ -7,7 +7,8 @@ from typing import Dict, Optional
 class UnderstandingScorer:
     def __init__(self):
-        # Scoring weights
         self.weights = {
             'consistency': 0.25,
             'coverage': 0.35,
@@ -72,8 +73,8 @@ class UnderstandingScorer:
         High score = explicit about assumptions, boundary conditions, limitations
         Low score = makes implicit assumptions without stating them
         """
-        # Heuristic: if there are no contradictions and good coverage,
-        # assumptions are likely being handled well
         consistency_score = consistency_result.get('consistency_score', 0)
         coverage_score = coverage_result.get('coverage_score', 0)

 class UnderstandingScorer:
     def __init__(self):
+        # Scoring weights - adjusted these a few times based on testing
+        # coverage is weighted highest because it's most reliable
         self.weights = {
             'consistency': 0.25,
             'coverage': 0.35,
         High score = explicit about assumptions, boundary conditions, limitations
         Low score = makes implicit assumptions without stating them
         """
+        # Simple heuristic for now - basically if consistency and coverage are good,
+        # assumptions are probably okay. Not perfect but works decently.
         consistency_score = consistency_result.get('consistency_score', 0)
         coverage_score = coverage_result.get('coverage_score', 0)