Upload apply_quick_fix.py with huggingface_hub

4c5cf4c verified 8 months ago

4.39 kB

	#!/usr/bin/env python3
	"""
	Quick fix: Invert the model predictions since the current model has inverted logic
	"""

	# Backup the original inference file
	import shutil
	import os

	def create_fixed_inference():
	"""Create a fixed inference file that inverts the model predictions"""

	print("🔧 APPLYING QUICK FIX")
	print("=" * 40)

	# Backup original
	original_file = "app/model/inference.py"
	backup_file = "app/model/inference_backup.py"

	if not os.path.exists(backup_file):
	shutil.copy2(original_file, backup_file)
	print(f"✅ Backed up original to {backup_file}")

	# Read the current file
	with open(original_file, 'r') as f:
	content = f.read()

	# Find and replace the _is_hallucination method
	old_method = ''' def _is_hallucination(self, pred_text: str) -> bool:
	"""Determine if prediction indicates hallucination"""
	pred_text = pred_text.lower().strip()

	# Look for explicit "no" responses (meaning factually incorrect/hallucination)
	if "no" in pred_text and "yes" not in pred_text:
	return True

	# Look for explicit "yes" responses (meaning factually correct)
	if "yes" in pred_text and "no" not in pred_text:
	return False

	# Fallback: look for hallucination indicators using word boundaries
	hallucination_words = ["incorrect", "wrong", "false", "hallucination", "inaccurate"]
	factual_words = ["correct", "accurate", "true", "factual", "right"]

	import re
	hallucination_score = sum(1 for word in hallucination_words if re.search(r'\\b' + re.escape(word) + r'\\b', pred_text))
	factual_score = sum(1 for word in factual_words if re.search(r'\\b' + re.escape(word) + r'\\b', pred_text))

	if hallucination_score > factual_score:
	return True
	elif factual_score > hallucination_score:
	return False

	# Default to False (not hallucination) if unclear
	return False'''

	new_method = ''' def _is_hallucination(self, pred_text: str) -> bool:
	"""Determine if prediction indicates hallucination - FIXED INVERTED LOGIC"""
	pred_text = pred_text.lower().strip()

	# 🔧 FIX: The model predictions are inverted, so we flip the logic
	# Model says "yes" for hallucinations and "no" for correct facts

	# Look for explicit "yes" responses (model thinks correct, but it's actually hallucination)
	if "yes" in pred_text and "no" not in pred_text:
	return True # FLIPPED: Model's "yes" means hallucination

	# Look for explicit "no" responses (model thinks hallucination, but it's actually correct)
	if "no" in pred_text and "yes" not in pred_text:
	return False # FLIPPED: Model's "no" means correct

	# Fallback: look for hallucination indicators using word boundaries
	hallucination_words = ["incorrect", "wrong", "false", "hallucination", "inaccurate"]
	factual_words = ["correct", "accurate", "true", "factual", "right"]

	import re
	hallucination_score = sum(1 for word in hallucination_words if re.search(r'\\b' + re.escape(word) + r'\\b', pred_text))
	factual_score = sum(1 for word in factual_words if re.search(r'\\b' + re.escape(word) + r'\\b', pred_text))

	if hallucination_score > factual_score:
	return True
	elif factual_score > hallucination_score:
	return False

	# Default to False (not hallucination) if unclear
	return False'''

	# Replace the method
	if old_method in content:
	content = content.replace(old_method, new_method)

	# Write the fixed file
	with open(original_file, 'w') as f:
	f.write(content)

	print("✅ Applied prediction inversion fix")
	print("📝 The model predictions are now correctly interpreted")
	print("🔄 Restart the server to apply changes")
	return True
	else:
	print("❌ Could not find the method to replace")
	return False

	if __name__ == "__main__":
	create_fixed_inference()