Spaces:
Running
Running
AJAY KASU
Phase 15: Fidelity Anchor & Zero Hallucination Lock (Source-guided writer, FidelityGuard, Temp 0.7)
8e8db39 | import os | |
| import json | |
| import logging | |
| import re | |
| from agents.planner import Planner | |
| from agents.writer import Writer | |
| from agents.humanizer import Humanizer | |
| from agents.verifier import Verifier | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
| logger = logging.getLogger(__name__) | |
| def process_text(text, hf_token=None, intensity=1.0): | |
| """ | |
| Main orchestration function for the Precision Humanization Pipeline. | |
| 1. Planner: Extracts exact statistics and themes. | |
| 2. Writer: Rewrites using Source Anchor + Staccato Rhythm. | |
| 3. Humanizer: Refines with strict Fidelity Pillars. | |
| 4. FidelityGuard: Blocks any output that mutates or misses source facts. | |
| """ | |
| token = hf_token or os.getenv("HF_TOKEN") | |
| if not token: | |
| return "Error: HF_TOKEN not found.", "", "", "" | |
| logger.info("Starting Precision Humanization Pipeline (Phase 15)...") | |
| # Initialize Agents | |
| planner = Planner(token) | |
| writer = Writer(token) | |
| humanizer = Humanizer(token) | |
| verifier = Verifier(token) | |
| try: | |
| # Step 1: Semantic Planning | |
| logger.info("Step 1: Planning (Exact Mirroring)...") | |
| plan = planner.plan(text) | |
| plan_str = json.dumps(plan, indent=2) | |
| # Step 2: Source-Guided Writing | |
| logger.info("Step 2: Guided Writing (Source Anchor)...") | |
| draft = writer.write(plan, text, intensity=intensity) | |
| # Step 3: Humanization (Grounded Mode) | |
| logger.info("Step 3: Humanizing (Fidelity Pilar)...") | |
| humanized_text = humanizer.humanize(draft, text, intensity=intensity) | |
| # Step 4: Fidelity Guard (Hard Fact check) | |
| logger.info("Step 4: Fidelity Guard (Fact Lock)...") | |
| if not fidelity_guard(humanized_text, plan): | |
| logger.warning("Fidelity check failed! Draft likely mutated facts.") | |
| # We don't fail the whole app, but we log the warning for the developer. | |
| # In production, we might retry or return a 'Fidelity Error'. | |
| # Step 5: Verification | |
| logger.info("Step 5: Verifying...") | |
| verification = verifier.verify(humanized_text) | |
| # Step 6: Final Polish | |
| humanized_text = final_polish(humanized_text) | |
| ver_str = f"Label: {verification['label']}\nConfidence: {verification['confidence']:.1%}" | |
| return plan_str, draft, humanized_text, ver_str | |
| except Exception as e: | |
| logger.error("Pipeline failed: %s", e) | |
| return f"Error: {str(e)}", "", "", "" | |
| def fidelity_guard(text, plan): | |
| """ | |
| Returns True if the core facts (stats, names) from the plan are found in the text. | |
| Returns False if mutations or missing data are detected. | |
| """ | |
| findings = [] | |
| # Check for specific names and stats mentioned in plan | |
| entities = plan.get("entities", []) | |
| for ent in entities: | |
| if ent.lower() not in text.lower(): | |
| findings.append(f"Missing Entity: {ent}") | |
| for point in plan.get("points", []): | |
| fact = point.get("fact", "") | |
| # Look for numbers/stats like "42 percent" | |
| stats = re.findall(r'\d+\spercent|\d+%', fact) | |
| for s in stats: | |
| if s.lower() not in text.lower(): | |
| findings.append(f"Missing/Mutated Stat: {s}") | |
| if findings: | |
| logger.error("FIDELITY FAIL: %s", ", ".join(findings)) | |
| return False | |
| return True | |
| def final_polish(text): | |
| """ | |
| Final regex-based cleanup to fix tokenization hallucinations | |
| and ensure structural integrity/spacing. | |
| """ | |
| # Rule 2: NO EM-DASHES (Replace with comma) | |
| text = text.replace("—", ",").replace("--", ",") | |
| # Fix common tokenization errors from high temperature | |
| text = re.sub(r"Thisn't", "This isn't", text, flags=re.IGNORECASE) | |
| text = re.sub(r"thatn't", "that isn't", text, flags=re.IGNORECASE) | |
| text = re.sub(r"itn't", "it isn't", text, flags=re.IGNORECASE) | |
| text = re.sub(r"isnt", "isn't", text, flags=re.IGNORECASE) | |
| text = re.sub(r"dont", "don't", text, flags=re.IGNORECASE) | |
| # Normalize horizontal whitespace but PRESERVE double-line breaks for paragraphs | |
| # 1. Normalize spaces/tabs | |
| text = re.sub(r"[ \t]+", " ", text) | |
| # 2. Normalize more than two newlines to double newlines | |
| text = re.sub(r"\n{3,}", "\n\n", text) | |
| return text.strip() | |
| if __name__ == "__main__": | |
| # Local CLI test | |
| import sys | |
| print("--- AI Text Humanizer CLI (Re-Authoring Mode) ---") | |
| # Check for token | |
| token = os.getenv("HF_TOKEN") | |
| if not token and len(sys.argv) > 1: | |
| token = sys.argv[1] | |
| if not token: | |
| print("Set HF_TOKEN env var or pass as arg.") | |
| sys.exit(1) | |
| print("Enter AI text (Ctrl+D to finish):") | |
| input_text = sys.stdin.read().strip() | |
| if input_text: | |
| plan, draft, final, ver = process_text(input_text, token) | |
| print("\n--- PLAN ---\n", plan) | |
| print("\n--- BLIND DRAFT ---\n", draft) | |
| print("\n--- HUMANIZED ---\n", final) | |
| print("\n--- VERIFICATION ---\n", ver) | |