Spaces:

AJAYKASU
/

AI_Humanizer

Running

AJAY KASU

Phase 14: Temporal Grounding (Lock 2026, ban 2023 drift)

fc1fb2d 2 months ago

5.22 kB


	import os
	import json
	import logging
	import re
	import random
	from huggingface_hub import InferenceClient

	logger = logging.getLogger(__name__)

	class Planner:
	"""
	Agent 1: Semantic Planner
	EXTRACTS core ideas into a detailed JSON outline.
	ENSURES no extractive copying (paraphrasing) by checking overlap.
	"""

	def __init__(self, hf_token=None):
	self.token = hf_token or os.getenv("HF_TOKEN", "")
	self.client = InferenceClient(token=self.token)
	# Using Qwen2.5-7B-Instruct for strong JSON adherence
	self.model = "Qwen/Qwen2.5-7B-Instruct"

	def plan(self, text):
	logger.info("planner: extracting outline from %d chars", len(text))

	for attempt in range(3):
	try:
	# 1. Generate Outline
	outline_json = self._generate_outline(text, attempt)

	# 2. Parse JSON
	plan = self._parse_json(outline_json)
	if not plan:
	continue

	# 3. Check Overlap (Enforce 'No Copying')
	overlap = self._check_overlap(text, plan)
	if overlap > 0.3: # Threshold for "too much copying" in outline
	logger.warning("planner: overlap %.2f too high, retrying...", overlap)
	continue

	logger.info("planner: success (overlap=%.2f)", overlap)
	return plan

	except Exception as exc:
	logger.error("planner attempt %d failed: %s", attempt, exc)

	# Fallback: Return simple structure if all else fails
	return {
	"topic": "General",
	"audience": "General",
	"tone": "Casual",
	"points": [{"claim": "Could not extract plan.", "support": [], "example_hint": ""}],
	"point_order_flex": True
	}

	def _generate_outline(self, text, attempt):
	system_content = (
	"TODAY'S DATE: February 11, 2026.\n"
	"You are a technical data extraction specialist. Your goal is to extract ONLY hard facts from the source text.\n"
	"STRICT FIDELITY: You must mirror the source exactly. Assume the text is current as of 2026.\n"
	"1. THEMES: List 3 specific themes found ONLY in the provided text.\n"
	"2. DATA (MIRROR RULE): Extract specific statistics and names with EXACT WORDING. If the source says '42 percent', do not write '42%'.\n"
	"3. CHRONOLOGY: Entities must appear in a logical or chronological sequence.\n\n"
	"JSON Format:\n"
	"{\n"
	" \"topic\": \"...\",\n"
	" \"themes\": [\"...\", \"...\", \"...\"],\n"
	" \"entities\": [\"...\", \"...\"],\n"
	" \"points\": [\n"
	" {\n"
	" \"fact\": \"... (Use EXACT statistics and names from source)\",\n"
	" \"supporting_data\": [\"...\"],\n"
	" \"sequence_order\": 1\n"
	" }\n"
	" ]\n"
	"}\n"
	)

	user_content = f"Extract hard facts from this text:\n\n{text}"

	if attempt > 0:
	user_content += "\n\nCRITICAL: BE MORE FACTUAL. REMOVE ALL ADJECTIVES. USE ONLY DATA."

	messages = [
	{"role": "system", "content": system_content},
	{"role": "user", "content": user_content}
	]

	response = self.client.chat_completion(
	messages=messages,
	model=self.model,
	max_tokens=1024,
	temperature=0.7, # Lower temperature for extraction precision
	top_p=0.9
	)
	return response.choices[0].message.content

	def _parse_json(self, raw_text):
	try:
	# Cleanup markdown
	cleaned = raw_text.strip()
	if "```json" in cleaned:
	cleaned = cleaned.split("```json")[1].split("```")[0]
	elif "```" in cleaned:
	cleaned = cleaned.split("```")[1].split("```")[0]
	return json.loads(cleaned)
	except json.JSONDecodeError:
	logger.error("planner: failed to parse JSON output")
	return None

	def _check_overlap(self, original, plan):
	"""
	Calculates Jaccard overlap of bigrams between Original Text and Outline Values.
	"""
	def get_bigrams(s):
	words = re.findall(r'\w+', s.lower())
	return set(zip(words, words[1:]))

	orig_bigrams = get_bigrams(original)

	# Collect all text from plan
	plan_text = ""
	plan_text += plan.get("topic", "") + " "
	for p in plan.get("points", []):
	plan_text += p.get("claim", "") + " "
	for s in p.get("support", []):
	plan_text += s + " "

	plan_bigrams = get_bigrams(plan_text)

	if not orig_bigrams or not plan_bigrams:
	return 0.0

	intersection = len(orig_bigrams.intersection(plan_bigrams))
	union = len(orig_bigrams.union(plan_bigrams))

	return intersection / union if union > 0 else 0.0