hallucination-detector-project / apply_quick_fix.py
KShoichi's picture
Upload apply_quick_fix.py with huggingface_hub
4c5cf4c verified
#!/usr/bin/env python3
"""
Quick fix: Invert the model predictions since the current model has inverted logic
"""
# Backup the original inference file
import shutil
import os
def create_fixed_inference():
"""Create a fixed inference file that inverts the model predictions"""
print("πŸ”§ APPLYING QUICK FIX")
print("=" * 40)
# Backup original
original_file = "app/model/inference.py"
backup_file = "app/model/inference_backup.py"
if not os.path.exists(backup_file):
shutil.copy2(original_file, backup_file)
print(f"βœ… Backed up original to {backup_file}")
# Read the current file
with open(original_file, 'r') as f:
content = f.read()
# Find and replace the _is_hallucination method
old_method = ''' def _is_hallucination(self, pred_text: str) -> bool:
"""Determine if prediction indicates hallucination"""
pred_text = pred_text.lower().strip()
# Look for explicit "no" responses (meaning factually incorrect/hallucination)
if "no" in pred_text and "yes" not in pred_text:
return True
# Look for explicit "yes" responses (meaning factually correct)
if "yes" in pred_text and "no" not in pred_text:
return False
# Fallback: look for hallucination indicators using word boundaries
hallucination_words = ["incorrect", "wrong", "false", "hallucination", "inaccurate"]
factual_words = ["correct", "accurate", "true", "factual", "right"]
import re
hallucination_score = sum(1 for word in hallucination_words if re.search(r'\\b' + re.escape(word) + r'\\b', pred_text))
factual_score = sum(1 for word in factual_words if re.search(r'\\b' + re.escape(word) + r'\\b', pred_text))
if hallucination_score > factual_score:
return True
elif factual_score > hallucination_score:
return False
# Default to False (not hallucination) if unclear
return False'''
new_method = ''' def _is_hallucination(self, pred_text: str) -> bool:
"""Determine if prediction indicates hallucination - FIXED INVERTED LOGIC"""
pred_text = pred_text.lower().strip()
# πŸ”§ FIX: The model predictions are inverted, so we flip the logic
# Model says "yes" for hallucinations and "no" for correct facts
# Look for explicit "yes" responses (model thinks correct, but it's actually hallucination)
if "yes" in pred_text and "no" not in pred_text:
return True # FLIPPED: Model's "yes" means hallucination
# Look for explicit "no" responses (model thinks hallucination, but it's actually correct)
if "no" in pred_text and "yes" not in pred_text:
return False # FLIPPED: Model's "no" means correct
# Fallback: look for hallucination indicators using word boundaries
hallucination_words = ["incorrect", "wrong", "false", "hallucination", "inaccurate"]
factual_words = ["correct", "accurate", "true", "factual", "right"]
import re
hallucination_score = sum(1 for word in hallucination_words if re.search(r'\\b' + re.escape(word) + r'\\b', pred_text))
factual_score = sum(1 for word in factual_words if re.search(r'\\b' + re.escape(word) + r'\\b', pred_text))
if hallucination_score > factual_score:
return True
elif factual_score > hallucination_score:
return False
# Default to False (not hallucination) if unclear
return False'''
# Replace the method
if old_method in content:
content = content.replace(old_method, new_method)
# Write the fixed file
with open(original_file, 'w') as f:
f.write(content)
print("βœ… Applied prediction inversion fix")
print("πŸ“ The model predictions are now correctly interpreted")
print("πŸ”„ Restart the server to apply changes")
return True
else:
print("❌ Could not find the method to replace")
return False
if __name__ == "__main__":
create_fixed_inference()