import os import json import logging import re from agents.planner import Planner from agents.writer import Writer from agents.humanizer import Humanizer from agents.verifier import Verifier # Configure logging logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) def process_text(text, hf_token=None, intensity=1.0): """ Main orchestration function for the Precision Humanization Pipeline. 1. Planner: Extracts exact statistics and themes. 2. Writer: Rewrites using Source Anchor + Staccato Rhythm. 3. Humanizer: Refines with strict Fidelity Pillars. 4. FidelityGuard: Blocks any output that mutates or misses source facts. """ token = hf_token or os.getenv("HF_TOKEN") if not token: return "Error: HF_TOKEN not found.", "", "", "" logger.info("Starting Precision Humanization Pipeline (Phase 15)...") # Initialize Agents planner = Planner(token) writer = Writer(token) humanizer = Humanizer(token) verifier = Verifier(token) try: # Step 1: Semantic Planning logger.info("Step 1: Planning (Exact Mirroring)...") plan = planner.plan(text) plan_str = json.dumps(plan, indent=2) # Step 2: Source-Guided Writing logger.info("Step 2: Guided Writing (Source Anchor)...") draft = writer.write(plan, text, intensity=intensity) # Step 3: Humanization (Grounded Mode) logger.info("Step 3: Humanizing (Fidelity Pilar)...") humanized_text = humanizer.humanize(draft, text, intensity=intensity) # Step 4: Fidelity Guard (Hard Fact check) logger.info("Step 4: Fidelity Guard (Fact Lock)...") if not fidelity_guard(humanized_text, plan): logger.warning("Fidelity check failed! Draft likely mutated facts.") # We don't fail the whole app, but we log the warning for the developer. # In production, we might retry or return a 'Fidelity Error'. # Step 5: Verification logger.info("Step 5: Verifying...") verification = verifier.verify(humanized_text) # Step 6: Final Polish humanized_text = final_polish(humanized_text) ver_str = f"Label: {verification['label']}\nConfidence: {verification['confidence']:.1%}" return plan_str, draft, humanized_text, ver_str except Exception as e: logger.error("Pipeline failed: %s", e) return f"Error: {str(e)}", "", "", "" def fidelity_guard(text, plan): """ Returns True if the core facts (stats, names) from the plan are found in the text. Returns False if mutations or missing data are detected. """ findings = [] # Check for specific names and stats mentioned in plan entities = plan.get("entities", []) for ent in entities: if ent.lower() not in text.lower(): findings.append(f"Missing Entity: {ent}") for point in plan.get("points", []): fact = point.get("fact", "") # Look for numbers/stats like "42 percent" stats = re.findall(r'\d+\spercent|\d+%', fact) for s in stats: if s.lower() not in text.lower(): findings.append(f"Missing/Mutated Stat: {s}") if findings: logger.error("FIDELITY FAIL: %s", ", ".join(findings)) return False return True def final_polish(text): """ Final regex-based cleanup to fix tokenization hallucinations and ensure structural integrity/spacing. """ # Rule 2: NO EM-DASHES (Replace with comma) text = text.replace("—", ",").replace("--", ",") # Fix common tokenization errors from high temperature text = re.sub(r"Thisn't", "This isn't", text, flags=re.IGNORECASE) text = re.sub(r"thatn't", "that isn't", text, flags=re.IGNORECASE) text = re.sub(r"itn't", "it isn't", text, flags=re.IGNORECASE) text = re.sub(r"isnt", "isn't", text, flags=re.IGNORECASE) text = re.sub(r"dont", "don't", text, flags=re.IGNORECASE) # Normalize horizontal whitespace but PRESERVE double-line breaks for paragraphs # 1. Normalize spaces/tabs text = re.sub(r"[ \t]+", " ", text) # 2. Normalize more than two newlines to double newlines text = re.sub(r"\n{3,}", "\n\n", text) return text.strip() if __name__ == "__main__": # Local CLI test import sys print("--- AI Text Humanizer CLI (Re-Authoring Mode) ---") # Check for token token = os.getenv("HF_TOKEN") if not token and len(sys.argv) > 1: token = sys.argv[1] if not token: print("Set HF_TOKEN env var or pass as arg.") sys.exit(1) print("Enter AI text (Ctrl+D to finish):") input_text = sys.stdin.read().strip() if input_text: plan, draft, final, ver = process_text(input_text, token) print("\n--- PLAN ---\n", plan) print("\n--- BLIND DRAFT ---\n", draft) print("\n--- HUMANIZED ---\n", final) print("\n--- VERIFICATION ---\n", ver)