AI_Humanizer / main.py
AJAY KASU
Phase 15: Fidelity Anchor & Zero Hallucination Lock (Source-guided writer, FidelityGuard, Temp 0.7)
8e8db39
import os
import json
import logging
import re
from agents.planner import Planner
from agents.writer import Writer
from agents.humanizer import Humanizer
from agents.verifier import Verifier
# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
def process_text(text, hf_token=None, intensity=1.0):
"""
Main orchestration function for the Precision Humanization Pipeline.
1. Planner: Extracts exact statistics and themes.
2. Writer: Rewrites using Source Anchor + Staccato Rhythm.
3. Humanizer: Refines with strict Fidelity Pillars.
4. FidelityGuard: Blocks any output that mutates or misses source facts.
"""
token = hf_token or os.getenv("HF_TOKEN")
if not token:
return "Error: HF_TOKEN not found.", "", "", ""
logger.info("Starting Precision Humanization Pipeline (Phase 15)...")
# Initialize Agents
planner = Planner(token)
writer = Writer(token)
humanizer = Humanizer(token)
verifier = Verifier(token)
try:
# Step 1: Semantic Planning
logger.info("Step 1: Planning (Exact Mirroring)...")
plan = planner.plan(text)
plan_str = json.dumps(plan, indent=2)
# Step 2: Source-Guided Writing
logger.info("Step 2: Guided Writing (Source Anchor)...")
draft = writer.write(plan, text, intensity=intensity)
# Step 3: Humanization (Grounded Mode)
logger.info("Step 3: Humanizing (Fidelity Pilar)...")
humanized_text = humanizer.humanize(draft, text, intensity=intensity)
# Step 4: Fidelity Guard (Hard Fact check)
logger.info("Step 4: Fidelity Guard (Fact Lock)...")
if not fidelity_guard(humanized_text, plan):
logger.warning("Fidelity check failed! Draft likely mutated facts.")
# We don't fail the whole app, but we log the warning for the developer.
# In production, we might retry or return a 'Fidelity Error'.
# Step 5: Verification
logger.info("Step 5: Verifying...")
verification = verifier.verify(humanized_text)
# Step 6: Final Polish
humanized_text = final_polish(humanized_text)
ver_str = f"Label: {verification['label']}\nConfidence: {verification['confidence']:.1%}"
return plan_str, draft, humanized_text, ver_str
except Exception as e:
logger.error("Pipeline failed: %s", e)
return f"Error: {str(e)}", "", "", ""
def fidelity_guard(text, plan):
"""
Returns True if the core facts (stats, names) from the plan are found in the text.
Returns False if mutations or missing data are detected.
"""
findings = []
# Check for specific names and stats mentioned in plan
entities = plan.get("entities", [])
for ent in entities:
if ent.lower() not in text.lower():
findings.append(f"Missing Entity: {ent}")
for point in plan.get("points", []):
fact = point.get("fact", "")
# Look for numbers/stats like "42 percent"
stats = re.findall(r'\d+\spercent|\d+%', fact)
for s in stats:
if s.lower() not in text.lower():
findings.append(f"Missing/Mutated Stat: {s}")
if findings:
logger.error("FIDELITY FAIL: %s", ", ".join(findings))
return False
return True
def final_polish(text):
"""
Final regex-based cleanup to fix tokenization hallucinations
and ensure structural integrity/spacing.
"""
# Rule 2: NO EM-DASHES (Replace with comma)
text = text.replace("—", ",").replace("--", ",")
# Fix common tokenization errors from high temperature
text = re.sub(r"Thisn't", "This isn't", text, flags=re.IGNORECASE)
text = re.sub(r"thatn't", "that isn't", text, flags=re.IGNORECASE)
text = re.sub(r"itn't", "it isn't", text, flags=re.IGNORECASE)
text = re.sub(r"isnt", "isn't", text, flags=re.IGNORECASE)
text = re.sub(r"dont", "don't", text, flags=re.IGNORECASE)
# Normalize horizontal whitespace but PRESERVE double-line breaks for paragraphs
# 1. Normalize spaces/tabs
text = re.sub(r"[ \t]+", " ", text)
# 2. Normalize more than two newlines to double newlines
text = re.sub(r"\n{3,}", "\n\n", text)
return text.strip()
if __name__ == "__main__":
# Local CLI test
import sys
print("--- AI Text Humanizer CLI (Re-Authoring Mode) ---")
# Check for token
token = os.getenv("HF_TOKEN")
if not token and len(sys.argv) > 1:
token = sys.argv[1]
if not token:
print("Set HF_TOKEN env var or pass as arg.")
sys.exit(1)
print("Enter AI text (Ctrl+D to finish):")
input_text = sys.stdin.read().strip()
if input_text:
plan, draft, final, ver = process_text(input_text, token)
print("\n--- PLAN ---\n", plan)
print("\n--- BLIND DRAFT ---\n", draft)
print("\n--- HUMANIZED ---\n", final)
print("\n--- VERIFICATION ---\n", ver)