Update app.py
Browse files
app.py
CHANGED
|
@@ -25,12 +25,13 @@ MODEL_OPTIONS = {
|
|
| 25 |
}
|
| 26 |
|
| 27 |
# ==========================================
|
| 28 |
-
# 2. THE PROMPTS (3-Pass System)
|
| 29 |
# ==========================================
|
| 30 |
|
| 31 |
# Pass 1: Structural Demolition
|
| 32 |
pass1_prompt = ChatPromptTemplate.from_template(
|
| 33 |
"You are an expert editor. Rewrite the following text to break any predictable, formulaic AI structures and grammar footprints. "
|
|
|
|
| 34 |
"1. SYNTACTIC SABOTAGE: Maximize sentence length variance (burstiness). Mix very short fragments with longer, complex sentences. "
|
| 35 |
"2. POS SHIFT: AI models overuse active voice and linking words. You must occasionally use passive voice to break predictability. "
|
| 36 |
"3. NO CLICHES: Delete all AI transition clichés like 'This isn't just X, it's Y', 'Furthermore', or 'It is worth noting'. "
|
|
@@ -43,25 +44,26 @@ pass2_prompt = ChatPromptTemplate.from_template(
|
|
| 43 |
"Edit this text to sound perfectly human, plain-spoken, and direct. "
|
| 44 |
"Imagine you are writing a practical update to a smart colleague. "
|
| 45 |
"CRITICAL RULES: "
|
| 46 |
-
"1.
|
| 47 |
-
"2.
|
| 48 |
-
"3. NO
|
| 49 |
-
"4.
|
| 50 |
-
"5.
|
| 51 |
-
"6.
|
| 52 |
-
"7.
|
|
|
|
| 53 |
"Keep the tone grounded, highly efficient, and slightly informal.\n\nText: {input}"
|
| 54 |
)
|
| 55 |
|
| 56 |
# Pass 3: Flow & Cohesion
|
| 57 |
pass3_prompt = ChatPromptTemplate.from_template(
|
| 58 |
-
"You are the final proofreader. Your job is to review this text for natural flow and cohesion. "
|
| 59 |
"CRITICAL RULES: "
|
| 60 |
-
"1.
|
| 61 |
-
"2.
|
| 62 |
-
"3.
|
| 63 |
"4. Maintain all previous rules: No m-dashes, no corporate fluff, no filler phrases. "
|
| 64 |
-
"Make it flow perfectly.\n\nText: {input}"
|
| 65 |
)
|
| 66 |
|
| 67 |
# ==========================================
|
|
@@ -73,11 +75,9 @@ def sanitize_text(text):
|
|
| 73 |
if not text:
|
| 74 |
return ""
|
| 75 |
|
| 76 |
-
# Strip invisible watermarks
|
| 77 |
cleaned = re.sub(r'[\u200B-\u200D\uFEFF\u200E\u200F\u202A-\u202E]', '', text)
|
| 78 |
-
# Normalize standard unicode
|
| 79 |
cleaned = unicodedata.normalize('NFKC', cleaned)
|
| 80 |
-
|
| 81 |
homoglyphs = {
|
| 82 |
'Α': 'A', 'Β': 'B', 'Ε': 'E', 'Η': 'H', 'Ι': 'I', 'Κ': 'K', 'Μ': 'M',
|
| 83 |
'Ν': 'N', 'Ο': 'O', 'Ρ': 'P', 'Τ': 'T', 'Χ': 'X', 'Υ': 'Y', 'Ζ': 'Z',
|
|
@@ -88,14 +88,12 @@ def sanitize_text(text):
|
|
| 88 |
trans_table = str.maketrans(homoglyphs)
|
| 89 |
cleaned = cleaned.translate(trans_table)
|
| 90 |
|
| 91 |
-
# Enforce standard hyphens over m-dashes as a final fail-safe
|
| 92 |
cleaned = cleaned.replace("��", "-").replace("–", "-")
|
| 93 |
|
| 94 |
return cleaned.strip()
|
| 95 |
|
| 96 |
def calculate_similarity(original, final):
|
| 97 |
"""Semantic Similarity Check (Lexical Preservation)"""
|
| 98 |
-
# Uses SequenceMatcher to calculate how much of the original string structure survived
|
| 99 |
seq = difflib.SequenceMatcher(None, original.split(), final.split())
|
| 100 |
ratio = seq.ratio() * 100
|
| 101 |
|
|
@@ -126,7 +124,7 @@ def execute_pipeline(input_text, model_choice):
|
|
| 126 |
model=selected_model_id,
|
| 127 |
openai_api_key=OPENROUTER_KEY,
|
| 128 |
openai_api_base=BASE_URL,
|
| 129 |
-
temperature=0.85,
|
| 130 |
max_tokens=6000
|
| 131 |
)
|
| 132 |
|
|
@@ -174,7 +172,7 @@ def clear_boxes():
|
|
| 174 |
# ==========================================
|
| 175 |
|
| 176 |
with gr.Blocks() as app:
|
| 177 |
-
gr.Markdown("# 🤖 -> 🧑 The AI Humanizer (
|
| 178 |
gr.Markdown("Passes your text through a 3-stage LLM pipeline (Structure > Style > Flow) while sanitizing watermarks.")
|
| 179 |
|
| 180 |
with gr.Row():
|
|
|
|
| 25 |
}
|
| 26 |
|
| 27 |
# ==========================================
|
| 28 |
+
# 2. THE PROMPTS (3-Pass System with Zero Data Loss Lock)
|
| 29 |
# ==========================================
|
| 30 |
|
| 31 |
# Pass 1: Structural Demolition
|
| 32 |
pass1_prompt = ChatPromptTemplate.from_template(
|
| 33 |
"You are an expert editor. Rewrite the following text to break any predictable, formulaic AI structures and grammar footprints. "
|
| 34 |
+
"CRITICAL DATA RULE: You must retain 100% of the original information. Do NOT drop a single fact, number, name, or nuanced detail. Do NOT summarize. "
|
| 35 |
"1. SYNTACTIC SABOTAGE: Maximize sentence length variance (burstiness). Mix very short fragments with longer, complex sentences. "
|
| 36 |
"2. POS SHIFT: AI models overuse active voice and linking words. You must occasionally use passive voice to break predictability. "
|
| 37 |
"3. NO CLICHES: Delete all AI transition clichés like 'This isn't just X, it's Y', 'Furthermore', or 'It is worth noting'. "
|
|
|
|
| 44 |
"Edit this text to sound perfectly human, plain-spoken, and direct. "
|
| 45 |
"Imagine you are writing a practical update to a smart colleague. "
|
| 46 |
"CRITICAL RULES: "
|
| 47 |
+
"1. ZERO DATA LOSS: Do not summarize or remove any information. Every fact, number, and entity from the draft must be perfectly preserved. "
|
| 48 |
+
"2. STRICT OUTPUT: OUTPUT ABSOLUTELY NOTHING EXCEPT THE FINAL TEXT. Do not add notes or list changes. "
|
| 49 |
+
"3. NO EMPTY FILLER: Never use meta-commentary like 'I have more to say, though'. Just state the facts. "
|
| 50 |
+
"4. NO DRAMATICS: Absolutely no rhetorical questions. Avoid poetic metaphors and the 'Rule of Three'. "
|
| 51 |
+
"5. NATURAL DATA: Never use the words 'respectively' or 'accounted for'. Blend numbers into the sentence naturally. "
|
| 52 |
+
"6. PERPLEXITY INJECTION: Lower the vocabulary complexity slightly. Choose slightly less common synonyms to avoid predictable word pairings. "
|
| 53 |
+
"7. PUNCTUATION: Absolutely NO m-dashes. Use standard hyphens, commas, periods, or parentheses instead. "
|
| 54 |
+
"8. VOCABULARY: Do not use corporate fluff. "
|
| 55 |
"Keep the tone grounded, highly efficient, and slightly informal.\n\nText: {input}"
|
| 56 |
)
|
| 57 |
|
| 58 |
# Pass 3: Flow & Cohesion
|
| 59 |
pass3_prompt = ChatPromptTemplate.from_template(
|
| 60 |
+
"You are the final proofreader. Your job is to review this text for natural flow and cohesion without losing any data. "
|
| 61 |
"CRITICAL RULES: "
|
| 62 |
+
"1. ZERO DATA LOSS: You are strictly forbidden from deleting any facts, names, or numbers. Do not shorten or summarize the text. "
|
| 63 |
+
"2. OUTPUT ONLY THE FINAL TEXT. No intros, no notes. "
|
| 64 |
+
"3. Smooth out any choppy or awkward transitions left by previous edits. Ensure it reads naturally. "
|
| 65 |
"4. Maintain all previous rules: No m-dashes, no corporate fluff, no filler phrases. "
|
| 66 |
+
"Make it flow perfectly while preserving 100% of the information.\n\nText: {input}"
|
| 67 |
)
|
| 68 |
|
| 69 |
# ==========================================
|
|
|
|
| 75 |
if not text:
|
| 76 |
return ""
|
| 77 |
|
|
|
|
| 78 |
cleaned = re.sub(r'[\u200B-\u200D\uFEFF\u200E\u200F\u202A-\u202E]', '', text)
|
|
|
|
| 79 |
cleaned = unicodedata.normalize('NFKC', cleaned)
|
| 80 |
+
|
| 81 |
homoglyphs = {
|
| 82 |
'Α': 'A', 'Β': 'B', 'Ε': 'E', 'Η': 'H', 'Ι': 'I', 'Κ': 'K', 'Μ': 'M',
|
| 83 |
'Ν': 'N', 'Ο': 'O', 'Ρ': 'P', 'Τ': 'T', 'Χ': 'X', 'Υ': 'Y', 'Ζ': 'Z',
|
|
|
|
| 88 |
trans_table = str.maketrans(homoglyphs)
|
| 89 |
cleaned = cleaned.translate(trans_table)
|
| 90 |
|
|
|
|
| 91 |
cleaned = cleaned.replace("��", "-").replace("–", "-")
|
| 92 |
|
| 93 |
return cleaned.strip()
|
| 94 |
|
| 95 |
def calculate_similarity(original, final):
|
| 96 |
"""Semantic Similarity Check (Lexical Preservation)"""
|
|
|
|
| 97 |
seq = difflib.SequenceMatcher(None, original.split(), final.split())
|
| 98 |
ratio = seq.ratio() * 100
|
| 99 |
|
|
|
|
| 124 |
model=selected_model_id,
|
| 125 |
openai_api_key=OPENROUTER_KEY,
|
| 126 |
openai_api_base=BASE_URL,
|
| 127 |
+
temperature=0.85,
|
| 128 |
max_tokens=6000
|
| 129 |
)
|
| 130 |
|
|
|
|
| 172 |
# ==========================================
|
| 173 |
|
| 174 |
with gr.Blocks() as app:
|
| 175 |
+
gr.Markdown("# 🤖 -> 🧑 The AI Humanizer (V2)")
|
| 176 |
gr.Markdown("Passes your text through a 3-stage LLM pipeline (Structure > Style > Flow) while sanitizing watermarks.")
|
| 177 |
|
| 178 |
with gr.Row():
|