Spaces:

AJAYKASU
/

AI_Humanizer

Running

AJAY KASU

Phase 15: Fidelity Anchor & Zero Hallucination Lock (Source-guided writer, FidelityGuard, Temp 0.7)

8e8db39 about 2 months ago

5.08 kB


	import os
	import json
	import logging
	import re
	from agents.planner import Planner
	from agents.writer import Writer
	from agents.humanizer import Humanizer
	from agents.verifier import Verifier

	# Configure logging
	logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
	logger = logging.getLogger(__name__)

	def process_text(text, hf_token=None, intensity=1.0):
	"""
	Main orchestration function for the Precision Humanization Pipeline.
	1. Planner: Extracts exact statistics and themes.
	2. Writer: Rewrites using Source Anchor + Staccato Rhythm.
	3. Humanizer: Refines with strict Fidelity Pillars.
	4. FidelityGuard: Blocks any output that mutates or misses source facts.
	"""
	token = hf_token or os.getenv("HF_TOKEN")
	if not token:
	return "Error: HF_TOKEN not found.", "", "", ""

	logger.info("Starting Precision Humanization Pipeline (Phase 15)...")

	# Initialize Agents
	planner = Planner(token)
	writer = Writer(token)
	humanizer = Humanizer(token)
	verifier = Verifier(token)

	try:
	# Step 1: Semantic Planning
	logger.info("Step 1: Planning (Exact Mirroring)...")
	plan = planner.plan(text)
	plan_str = json.dumps(plan, indent=2)

	# Step 2: Source-Guided Writing
	logger.info("Step 2: Guided Writing (Source Anchor)...")
	draft = writer.write(plan, text, intensity=intensity)

	# Step 3: Humanization (Grounded Mode)
	logger.info("Step 3: Humanizing (Fidelity Pilar)...")
	humanized_text = humanizer.humanize(draft, text, intensity=intensity)

	# Step 4: Fidelity Guard (Hard Fact check)
	logger.info("Step 4: Fidelity Guard (Fact Lock)...")
	if not fidelity_guard(humanized_text, plan):
	logger.warning("Fidelity check failed! Draft likely mutated facts.")
	# We don't fail the whole app, but we log the warning for the developer.
	# In production, we might retry or return a 'Fidelity Error'.

	# Step 5: Verification
	logger.info("Step 5: Verifying...")
	verification = verifier.verify(humanized_text)

	# Step 6: Final Polish
	humanized_text = final_polish(humanized_text)

	ver_str = f"Label: {verification['label']}\nConfidence: {verification['confidence']:.1%}"
	return plan_str, draft, humanized_text, ver_str

	except Exception as e:
	logger.error("Pipeline failed: %s", e)
	return f"Error: {str(e)}", "", "", ""

	def fidelity_guard(text, plan):
	"""
	Returns True if the core facts (stats, names) from the plan are found in the text.
	Returns False if mutations or missing data are detected.
	"""
	findings = []
	# Check for specific names and stats mentioned in plan
	entities = plan.get("entities", [])
	for ent in entities:
	if ent.lower() not in text.lower():
	findings.append(f"Missing Entity: {ent}")

	for point in plan.get("points", []):
	fact = point.get("fact", "")
	# Look for numbers/stats like "42 percent"
	stats = re.findall(r'\d+\spercent\|\d+%', fact)
	for s in stats:
	if s.lower() not in text.lower():
	findings.append(f"Missing/Mutated Stat: {s}")

	if findings:
	logger.error("FIDELITY FAIL: %s", ", ".join(findings))
	return False
	return True

	def final_polish(text):
	"""
	Final regex-based cleanup to fix tokenization hallucinations
	and ensure structural integrity/spacing.
	"""
	# Rule 2: NO EM-DASHES (Replace with comma)
	text = text.replace("—", ",").replace("--", ",")

	# Fix common tokenization errors from high temperature
	text = re.sub(r"Thisn't", "This isn't", text, flags=re.IGNORECASE)
	text = re.sub(r"thatn't", "that isn't", text, flags=re.IGNORECASE)
	text = re.sub(r"itn't", "it isn't", text, flags=re.IGNORECASE)
	text = re.sub(r"isnt", "isn't", text, flags=re.IGNORECASE)
	text = re.sub(r"dont", "don't", text, flags=re.IGNORECASE)

	# Normalize horizontal whitespace but PRESERVE double-line breaks for paragraphs
	# 1. Normalize spaces/tabs
	text = re.sub(r"[ \t]+", " ", text)
	# 2. Normalize more than two newlines to double newlines
	text = re.sub(r"\n{3,}", "\n\n", text)

	return text.strip()

	if __name__ == "__main__":
	# Local CLI test
	import sys
	print("--- AI Text Humanizer CLI (Re-Authoring Mode) ---")

	# Check for token
	token = os.getenv("HF_TOKEN")
	if not token and len(sys.argv) > 1:
	token = sys.argv[1]

	if not token:
	print("Set HF_TOKEN env var or pass as arg.")
	sys.exit(1)

	print("Enter AI text (Ctrl+D to finish):")
	input_text = sys.stdin.read().strip()

	if input_text:
	plan, draft, final, ver = process_text(input_text, token)
	print("\n--- PLAN ---\n", plan)
	print("\n--- BLIND DRAFT ---\n", draft)
	print("\n--- HUMANIZED ---\n", final)
	print("\n--- VERIFICATION ---\n", ver)