Spaces:

nitishkarvekar
/

AI-Text-Humanizer

Running

App Files Files Community

AI-Text-Humanizer / app.py

nitishkarvekar

Update app.py

58428fb verified 28 days ago

raw

history blame contribute delete

10.1 kB

	import os
	import re
	import unicodedata
	import difflib
	import gradio as gr
	from langchain_openai import ChatOpenAI
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_core.output_parsers import StrOutputParser

	# ==========================================
	# 1. CONFIGURATION & SECURITY
	# ==========================================
	OPENROUTER_KEY = os.environ.get('OPENROUTER_API_KEY')
	BASE_URL = "https://openrouter.ai/api/v1"

	APP_USER = os.environ.get('APP_USERNAME')
	APP_PASS = os.environ.get('APP_PASSWORD')

	# Speed-Optimized Model Selection with Fallbacks
	MODEL_OPTIONS = {
	"⚡ Llama 3.1 8B (Ultra-Fast)": "meta-llama/llama-3.1-8b-instruct:free,openrouter/free",
	"⚡ Gemini 2.0 Flash Lite (Google's Fastest)": "google/gemini-2.0-flash-lite-001:free,openrouter/free",
	"⚡ Mistral Small 24B (Fast & Nuanced)": "mistralai/mistral-small-24b-instruct-2501:free,openrouter/free",
	"Step 3.5 Flash (Balanced)": "stepfun/step-3.5-flash:free,openrouter/free"
	}

	# ==========================================
	# 2. THE PROMPTS (3-Pass System with Zero Data Loss Lock)
	# ==========================================

	# Pass 1: Structural Demolition
	pass1_prompt = ChatPromptTemplate.from_template(
	"You are an expert editor. Rewrite the following text to break any predictable, formulaic AI structures and grammar footprints. "
	"CRITICAL DATA RULE: You must retain 100% of the original information. Do NOT drop a single fact, number, name, or nuanced detail. Do NOT summarize. "
	"1. SYNTACTIC SABOTAGE: Maximize sentence length variance (burstiness). Mix very short fragments with longer, complex sentences. "
	"2. POS SHIFT: AI models overuse active voice and linking words. You must occasionally use passive voice to break predictability. "
	"3. NO CLICHES: Delete all AI transition clichés like 'This isn't just X, it's Y', 'Furthermore', or 'It is worth noting'. "
	"Keep all original facts and data completely accurate, but scramble the logical flow so it reads less like a textbook. "
	"OUTPUT ONLY THE REWRITTEN TEXT. No introductions or explanations.\n\nText: {input}"
	)

	# Pass 2: Style & Guardrails
	pass2_prompt = ChatPromptTemplate.from_template(
	"Edit this text to sound perfectly human, plain-spoken, and direct. "
	"Imagine you are writing a practical update to a smart colleague. "
	"CRITICAL RULES: "
	"1. ZERO DATA LOSS: Do not summarize or remove any information. Every fact, number, and entity from the draft must be perfectly preserved. "
	"2. STRICT OUTPUT: OUTPUT ABSOLUTELY NOTHING EXCEPT THE FINAL TEXT. Do not add notes or list changes. "
	"3. NO EMPTY FILLER: Never use meta-commentary like 'I have more to say, though'. Just state the facts. "
	"4. NO DRAMATICS: Absolutely no rhetorical questions. Avoid poetic metaphors and the 'Rule of Three'. "
	"5. NATURAL DATA: Never use the words 'respectively' or 'accounted for'. Blend numbers into the sentence naturally. "
	"6. PERPLEXITY INJECTION: Lower the vocabulary complexity slightly. Choose slightly less common synonyms to avoid predictable word pairings. "
	"7. PUNCTUATION: Absolutely NO m-dashes. Use standard hyphens, commas, periods, or parentheses instead. "
	"8. VOCABULARY: Do not use corporate fluff. "
	"Keep the tone grounded, highly efficient, and slightly informal.\n\nText: {input}"
	)

	# Pass 3: Flow & Cohesion
	pass3_prompt = ChatPromptTemplate.from_template(
	"You are the final proofreader. Your job is to review this text for natural flow and cohesion without losing any data. "
	"CRITICAL RULES: "
	"1. ZERO DATA LOSS: You are strictly forbidden from deleting any facts, names, or numbers. Do not shorten or summarize the text. "
	"2. OUTPUT ONLY THE FINAL TEXT. No intros, no notes. "
	"3. Smooth out any choppy or awkward transitions left by previous edits. Ensure it reads naturally. "
	"4. Maintain all previous rules: No m-dashes, no corporate fluff, no filler phrases. "
	"Make it flow perfectly while preserving 100% of the information.\n\nText: {input}"
	)

	# ==========================================
	# 3. PIPELINE FUNCTIONS
	# ==========================================

	def sanitize_text(text):
	"""Text Normalization & Post-Processing Cleanup"""
	if not text:
	return ""

	cleaned = re.sub(r'[\u200B-\u200D\uFEFF\u200E\u200F\u202A-\u202E]', '', text)
	cleaned = unicodedata.normalize('NFKC', cleaned)

	homoglyphs = {
	'Α': 'A', 'Β': 'B', 'Ε': 'E', 'Η': 'H', 'Ι': 'I', 'Κ': 'K', 'Μ': 'M',
	'Ν': 'N', 'Ο': 'O', 'Ρ': 'P', 'Τ': 'T', 'Χ': 'X', 'Υ': 'Y', 'Ζ': 'Z',
	'а': 'a', 'с': 'c', 'е': 'e', 'о': 'o', 'р': 'p', 'х': 'x', 'у': 'y',
	'А': 'A', 'В': 'B', 'С': 'C', 'Е': 'E', 'Н': 'H', 'О': 'O', 'Р': 'P',
	'Т': 'T', 'Х': 'X', 'У': 'Y', 'і': 'i', 'ј': 'j'
	}
	trans_table = str.maketrans(homoglyphs)
	cleaned = cleaned.translate(trans_table)

	cleaned = cleaned.replace("—", "-").replace("–", "-")

	return cleaned.strip()

	def calculate_similarity(original, final):
	"""Semantic Similarity Check (Lexical Preservation)"""
	seq = difflib.SequenceMatcher(None, original.split(), final.split())
	ratio = seq.ratio() * 100

	if ratio > 80:
	status = "⚠️ Danger: Too similar to original AI text."
	elif ratio < 25:
	status = "⚠️ Warning: High variation. Verify that no data was dropped."
	else:
	status = "✅ Optimal variation achieved."

	return f"Preservation Score: {ratio:.1f}%\n\n{status}"

	# ==========================================
	# 4. MAIN EXECUTION
	# ==========================================

	def execute_pipeline(input_text, model_choice):
	# STEP 1: Text Normalization
	normalized_input = sanitize_text(input_text)

	if not normalized_input:
	return "Please enter some text to humanize.", ""

	try:
	selected_model_id = MODEL_OPTIONS[model_choice]

	active_model = ChatOpenAI(
	model=selected_model_id,
	openai_api_key=OPENROUTER_KEY,
	openai_api_base=BASE_URL,
	temperature=0.85,
	max_tokens=6000
	)

	# Build the 3-Pass Chain
	pipeline_chain = (
	{"input": lambda x: x}
	\| pass1_prompt # STEP 2: Rewrite Pass 1 (Structure)
	\| active_model
	\| StrOutputParser()
	\| (lambda text: {"input": text})
	\| pass2_prompt # STEP 3: Rewrite Pass 2 (Style)
	\| active_model
	\| StrOutputParser()
	\| (lambda text: {"input": text})
	\| pass3_prompt # STEP 4: Rewrite Pass 3 (Flow)
	\| active_model
	\| StrOutputParser()
	)

	# Run the LLM chain
	raw_result = pipeline_chain.invoke(normalized_input)

	# STEP 5: Post-processing Cleanup
	final_output = sanitize_text(raw_result)

	# STEP 6: Semantic Similarity Check
	similarity_metrics = calculate_similarity(normalized_input, final_output)

	# STEP 7: Final Output
	return final_output, similarity_metrics

	except Exception as e:
	return f"An error occurred. Check your API key or try again later.\nError details: {e}", ""

	# UI Helper Functions
	def update_word_count(text):
	count = len(text.split()) if text else 0
	return f"Word Count: {count} words"

	def clear_boxes():
	return "", "", "Word Count: 0 words", "Word Count: 0 words", ""

	# ==========================================
	# 5. USER INTERFACE
	# ==========================================

	with gr.Blocks() as app:
	gr.Markdown("# 🤖 -> 🧑 The AI Humanizer (V2)")
	gr.Markdown("Passes your text through a 3-stage LLM pipeline (Structure > Style > Flow) while sanitizing watermarks.")

	with gr.Row():
	# LEFT COLUMN
	with gr.Column():
	model_dropdown = gr.Dropdown(
	choices=list(MODEL_OPTIONS.keys()),
	value="⚡ Llama 3.1 8B (Ultra-Fast)",
	label="Select AI Model"
	)

	input_box = gr.Textbox(
	lines=12,
	placeholder="Paste your AI text here...",
	label="Original AI Text"
	)
	input_word_count = gr.Markdown("Word Count: 0 words")

	with gr.Row():
	clear_btn = gr.Button("🗑️ Clear Text", variant="secondary")
	submit_btn = gr.Button("✨ Run Pipeline", variant="primary")

	# RIGHT COLUMN
	with gr.Column():
	output_box = gr.Textbox(
	lines=12,
	label="Humanized Output"
	)

	with gr.Row():
	output_word_count = gr.Markdown("Word Count: 0 words")
	similarity_display = gr.Markdown("Preservation Score: N/A")

	copy_btn = gr.Button("📋 Copy Output", variant="secondary")

	# Connect the UI elements
	input_box.change(fn=update_word_count, inputs=input_box, outputs=input_word_count)
	output_box.change(fn=update_word_count, inputs=output_box, outputs=output_word_count)

	submit_btn.click(
	fn=execute_pipeline,
	inputs=[input_box, model_dropdown],
	outputs=[output_box, similarity_display]
	)

	clear_btn.click(
	fn=clear_boxes,
	inputs=[],
	outputs=[input_box, output_box, input_word_count, output_word_count, similarity_display]
	)

	copy_btn.click(
	fn=None,
	inputs=[output_box],
	outputs=None,
	js="(text) => { navigator.clipboard.writeText(text); }"
	)

	# ==========================================
	# 6. SECURE LAUNCH
	# ==========================================
	if APP_USER and APP_PASS:
	app.launch(auth=(APP_USER, APP_PASS), theme=gr.themes.Soft())
	else:
	app.launch(theme=gr.themes.Soft())