import os
import json
import logging
import re
from agents.planner import Planner
from agents.writer import Writer
from agents.humanizer import Humanizer
from agents.verifier import Verifier

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

def process_text(text, hf_token=None, intensity=1.0):
    """
    Main orchestration function for the Precision Humanization Pipeline.
    1. Planner: Extracts exact statistics and themes.
    2. Writer: Rewrites using Source Anchor + Staccato Rhythm.
    3. Humanizer: Refines with strict Fidelity Pillars.
    4. FidelityGuard: Blocks any output that mutates or misses source facts.
    """
    token = hf_token or os.getenv("HF_TOKEN")
    if not token:
        return "Error: HF_TOKEN not found.", "", "", ""

    logger.info("Starting Precision Humanization Pipeline (Phase 15)...")

    # Initialize Agents
    planner = Planner(token)
    writer = Writer(token)
    humanizer = Humanizer(token)
    verifier = Verifier(token)

    try:
        # Step 1: Semantic Planning
        logger.info("Step 1: Planning (Exact Mirroring)...")
        plan = planner.plan(text)
        plan_str = json.dumps(plan, indent=2)

        # Step 2: Source-Guided Writing
        logger.info("Step 2: Guided Writing (Source Anchor)...")
        draft = writer.write(plan, text, intensity=intensity)

        # Step 3: Humanization (Grounded Mode)
        logger.info("Step 3: Humanizing (Fidelity Pilar)...")
        humanized_text = humanizer.humanize(draft, text, intensity=intensity)

        # Step 4: Fidelity Guard (Hard Fact check)
        logger.info("Step 4: Fidelity Guard (Fact Lock)...")
        if not fidelity_guard(humanized_text, plan):
            logger.warning("Fidelity check failed! Draft likely mutated facts.")
            # We don't fail the whole app, but we log the warning for the developer.
            # In production, we might retry or return a 'Fidelity Error'.

        # Step 5: Verification
        logger.info("Step 5: Verifying...")
        verification = verifier.verify(humanized_text)
        
        # Step 6: Final Polish
        humanized_text = final_polish(humanized_text)
        
        ver_str = f"Label: {verification['label']}\nConfidence: {verification['confidence']:.1%}"
        return plan_str, draft, humanized_text, ver_str

    except Exception as e:
        logger.error("Pipeline failed: %s", e)
        return f"Error: {str(e)}", "", "", ""

def fidelity_guard(text, plan):
    """
    Returns True if the core facts (stats, names) from the plan are found in the text.
    Returns False if mutations or missing data are detected.
    """
    findings = []
    # Check for specific names and stats mentioned in plan
    entities = plan.get("entities", [])
    for ent in entities:
        if ent.lower() not in text.lower():
            findings.append(f"Missing Entity: {ent}")
            
    for point in plan.get("points", []):
        fact = point.get("fact", "")
        # Look for numbers/stats like "42 percent"
        stats = re.findall(r'\d+\spercent|\d+%', fact)
        for s in stats:
            if s.lower() not in text.lower():
                findings.append(f"Missing/Mutated Stat: {s}")

    if findings:
        logger.error("FIDELITY FAIL: %s", ", ".join(findings))
        return False
    return True

def final_polish(text):
    """
    Final regex-based cleanup to fix tokenization hallucinations 
    and ensure structural integrity/spacing.
    """
    # Rule 2: NO EM-DASHES (Replace with comma)
    text = text.replace("—", ",").replace("--", ",")
    
    # Fix common tokenization errors from high temperature
    text = re.sub(r"Thisn't", "This isn't", text, flags=re.IGNORECASE)
    text = re.sub(r"thatn't", "that isn't", text, flags=re.IGNORECASE)
    text = re.sub(r"itn't", "it isn't", text, flags=re.IGNORECASE)
    text = re.sub(r"isnt", "isn't", text, flags=re.IGNORECASE)
    text = re.sub(r"dont", "don't", text, flags=re.IGNORECASE)
    
    # Normalize horizontal whitespace but PRESERVE double-line breaks for paragraphs
    # 1. Normalize spaces/tabs
    text = re.sub(r"[ \t]+", " ", text)
    # 2. Normalize more than two newlines to double newlines
    text = re.sub(r"\n{3,}", "\n\n", text)
    
    return text.strip()

if __name__ == "__main__":
    # Local CLI test
    import sys
    print("--- AI Text Humanizer CLI (Re-Authoring Mode) ---")
    
    # Check for token
    token = os.getenv("HF_TOKEN")
    if not token and len(sys.argv) > 1:
        token = sys.argv[1]
    
    if not token:
        print("Set HF_TOKEN env var or pass as arg.")
        sys.exit(1)

    print("Enter AI text (Ctrl+D to finish):")
    input_text = sys.stdin.read().strip()
    
    if input_text:
        plan, draft, final, ver = process_text(input_text, token)
        print("\n--- PLAN ---\n", plan)
        print("\n--- BLIND DRAFT ---\n", draft)
        print("\n--- HUMANIZED ---\n", final)
        print("\n--- VERIFICATION ---\n", ver)