AI_Humanizer / agents /planner.py
AJAY KASU
Phase 14: Temporal Grounding (Lock 2026, ban 2023 drift)
fc1fb2d
import os
import json
import logging
import re
import random
from huggingface_hub import InferenceClient
logger = logging.getLogger(__name__)
class Planner:
"""
Agent 1: Semantic Planner
EXTRACTS core ideas into a detailed JSON outline.
ENSURES no extractive copying (paraphrasing) by checking overlap.
"""
def __init__(self, hf_token=None):
self.token = hf_token or os.getenv("HF_TOKEN", "")
self.client = InferenceClient(token=self.token)
# Using Qwen2.5-7B-Instruct for strong JSON adherence
self.model = "Qwen/Qwen2.5-7B-Instruct"
def plan(self, text):
logger.info("planner: extracting outline from %d chars", len(text))
for attempt in range(3):
try:
# 1. Generate Outline
outline_json = self._generate_outline(text, attempt)
# 2. Parse JSON
plan = self._parse_json(outline_json)
if not plan:
continue
# 3. Check Overlap (Enforce 'No Copying')
overlap = self._check_overlap(text, plan)
if overlap > 0.3: # Threshold for "too much copying" in outline
logger.warning("planner: overlap %.2f too high, retrying...", overlap)
continue
logger.info("planner: success (overlap=%.2f)", overlap)
return plan
except Exception as exc:
logger.error("planner attempt %d failed: %s", attempt, exc)
# Fallback: Return simple structure if all else fails
return {
"topic": "General",
"audience": "General",
"tone": "Casual",
"points": [{"claim": "Could not extract plan.", "support": [], "example_hint": ""}],
"point_order_flex": True
}
def _generate_outline(self, text, attempt):
system_content = (
"TODAY'S DATE: February 11, 2026.\n"
"You are a technical data extraction specialist. Your goal is to extract ONLY hard facts from the source text.\n"
"STRICT FIDELITY: You must mirror the source exactly. Assume the text is current as of 2026.\n"
"1. THEMES: List 3 specific themes found ONLY in the provided text.\n"
"2. DATA (MIRROR RULE): Extract specific statistics and names with EXACT WORDING. If the source says '42 percent', do not write '42%'.\n"
"3. CHRONOLOGY: Entities must appear in a logical or chronological sequence.\n\n"
"JSON Format:\n"
"{\n"
" \"topic\": \"...\",\n"
" \"themes\": [\"...\", \"...\", \"...\"],\n"
" \"entities\": [\"...\", \"...\"],\n"
" \"points\": [\n"
" {\n"
" \"fact\": \"... (Use EXACT statistics and names from source)\",\n"
" \"supporting_data\": [\"...\"],\n"
" \"sequence_order\": 1\n"
" }\n"
" ]\n"
"}\n"
)
user_content = f"Extract hard facts from this text:\n\n{text}"
if attempt > 0:
user_content += "\n\nCRITICAL: BE MORE FACTUAL. REMOVE ALL ADJECTIVES. USE ONLY DATA."
messages = [
{"role": "system", "content": system_content},
{"role": "user", "content": user_content}
]
response = self.client.chat_completion(
messages=messages,
model=self.model,
max_tokens=1024,
temperature=0.7, # Lower temperature for extraction precision
top_p=0.9
)
return response.choices[0].message.content
def _parse_json(self, raw_text):
try:
# Cleanup markdown
cleaned = raw_text.strip()
if "```json" in cleaned:
cleaned = cleaned.split("```json")[1].split("```")[0]
elif "```" in cleaned:
cleaned = cleaned.split("```")[1].split("```")[0]
return json.loads(cleaned)
except json.JSONDecodeError:
logger.error("planner: failed to parse JSON output")
return None
def _check_overlap(self, original, plan):
"""
Calculates Jaccard overlap of bigrams between Original Text and Outline Values.
"""
def get_bigrams(s):
words = re.findall(r'\w+', s.lower())
return set(zip(words, words[1:]))
orig_bigrams = get_bigrams(original)
# Collect all text from plan
plan_text = ""
plan_text += plan.get("topic", "") + " "
for p in plan.get("points", []):
plan_text += p.get("claim", "") + " "
for s in p.get("support", []):
plan_text += s + " "
plan_bigrams = get_bigrams(plan_text)
if not orig_bigrams or not plan_bigrams:
return 0.0
intersection = len(orig_bigrams.intersection(plan_bigrams))
union = len(orig_bigrams.union(plan_bigrams))
return intersection / union if union > 0 else 0.0