nitishkarvekar commited on
Commit
58428fb
·
verified ·
1 Parent(s): f826f1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -20
app.py CHANGED
@@ -25,12 +25,13 @@ MODEL_OPTIONS = {
25
  }
26
 
27
  # ==========================================
28
- # 2. THE PROMPTS (3-Pass System)
29
  # ==========================================
30
 
31
  # Pass 1: Structural Demolition
32
  pass1_prompt = ChatPromptTemplate.from_template(
33
  "You are an expert editor. Rewrite the following text to break any predictable, formulaic AI structures and grammar footprints. "
 
34
  "1. SYNTACTIC SABOTAGE: Maximize sentence length variance (burstiness). Mix very short fragments with longer, complex sentences. "
35
  "2. POS SHIFT: AI models overuse active voice and linking words. You must occasionally use passive voice to break predictability. "
36
  "3. NO CLICHES: Delete all AI transition clichés like 'This isn't just X, it's Y', 'Furthermore', or 'It is worth noting'. "
@@ -43,25 +44,26 @@ pass2_prompt = ChatPromptTemplate.from_template(
43
  "Edit this text to sound perfectly human, plain-spoken, and direct. "
44
  "Imagine you are writing a practical update to a smart colleague. "
45
  "CRITICAL RULES: "
46
- "1. STRICT OUTPUT: OUTPUT ABSOLUTELY NOTHING EXCEPT THE FINAL TEXT. Do not add notes or list changes. "
47
- "2. NO EMPTY FILLER: Never use meta-commentary like 'I have more to say, though,' 'Let's look at,' or conversational throat-clearing like 'Sure, but...'. Just state the facts. "
48
- "3. NO DRAMATICS: Absolutely no rhetorical questions. Avoid poetic metaphors and the 'Rule of Three' (listing three punchy examples in a row). "
49
- "4. NATURAL DATA: Never use the words 'respectively' or 'accounted for'. Blend numbers into the sentence naturally. "
50
- "5. PERPLEXITY INJECTION: Lower the vocabulary complexity slightly. Choose slightly less common synonyms to avoid predictable word pairings. "
51
- "6. PUNCTUATION: Absolutely NO m-dashes. Use standard hyphens, commas, periods, or parentheses instead. "
52
- "7. VOCABULARY: Do not use corporate fluff (e.g., 'streamlined', 'foster', 'crucial', 'testament'). "
 
53
  "Keep the tone grounded, highly efficient, and slightly informal.\n\nText: {input}"
54
  )
55
 
56
  # Pass 3: Flow & Cohesion
57
  pass3_prompt = ChatPromptTemplate.from_template(
58
- "You are the final proofreader. Your job is to review this text for natural flow and cohesion. "
59
  "CRITICAL RULES: "
60
- "1. OUTPUT ONLY THE FINAL TEXT. No intros, no notes. "
61
- "2. Smooth out any choppy or awkward transitions left by the previous edits. Ensure it reads naturally. "
62
- "3. Ensure the text remains completely factual. Do not hallucinate or drop key numbers. "
63
  "4. Maintain all previous rules: No m-dashes, no corporate fluff, no filler phrases. "
64
- "Make it flow perfectly.\n\nText: {input}"
65
  )
66
 
67
  # ==========================================
@@ -73,11 +75,9 @@ def sanitize_text(text):
73
  if not text:
74
  return ""
75
 
76
- # Strip invisible watermarks
77
  cleaned = re.sub(r'[\u200B-\u200D\uFEFF\u200E\u200F\u202A-\u202E]', '', text)
78
- # Normalize standard unicode
79
  cleaned = unicodedata.normalize('NFKC', cleaned)
80
- # Replace homoglyphs
81
  homoglyphs = {
82
  'Α': 'A', 'Β': 'B', 'Ε': 'E', 'Η': 'H', 'Ι': 'I', 'Κ': 'K', 'Μ': 'M',
83
  'Ν': 'N', 'Ο': 'O', 'Ρ': 'P', 'Τ': 'T', 'Χ': 'X', 'Υ': 'Y', 'Ζ': 'Z',
@@ -88,14 +88,12 @@ def sanitize_text(text):
88
  trans_table = str.maketrans(homoglyphs)
89
  cleaned = cleaned.translate(trans_table)
90
 
91
- # Enforce standard hyphens over m-dashes as a final fail-safe
92
  cleaned = cleaned.replace("��", "-").replace("–", "-")
93
 
94
  return cleaned.strip()
95
 
96
  def calculate_similarity(original, final):
97
  """Semantic Similarity Check (Lexical Preservation)"""
98
- # Uses SequenceMatcher to calculate how much of the original string structure survived
99
  seq = difflib.SequenceMatcher(None, original.split(), final.split())
100
  ratio = seq.ratio() * 100
101
 
@@ -126,7 +124,7 @@ def execute_pipeline(input_text, model_choice):
126
  model=selected_model_id,
127
  openai_api_key=OPENROUTER_KEY,
128
  openai_api_base=BASE_URL,
129
- temperature=0.85, # High temp for creativity
130
  max_tokens=6000
131
  )
132
 
@@ -174,7 +172,7 @@ def clear_boxes():
174
  # ==========================================
175
 
176
  with gr.Blocks() as app:
177
- gr.Markdown("# 🤖 -> 🧑 The AI Humanizer (Enterprise Pipeline)")
178
  gr.Markdown("Passes your text through a 3-stage LLM pipeline (Structure > Style > Flow) while sanitizing watermarks.")
179
 
180
  with gr.Row():
 
25
  }
26
 
27
  # ==========================================
28
+ # 2. THE PROMPTS (3-Pass System with Zero Data Loss Lock)
29
  # ==========================================
30
 
31
  # Pass 1: Structural Demolition
32
  pass1_prompt = ChatPromptTemplate.from_template(
33
  "You are an expert editor. Rewrite the following text to break any predictable, formulaic AI structures and grammar footprints. "
34
+ "CRITICAL DATA RULE: You must retain 100% of the original information. Do NOT drop a single fact, number, name, or nuanced detail. Do NOT summarize. "
35
  "1. SYNTACTIC SABOTAGE: Maximize sentence length variance (burstiness). Mix very short fragments with longer, complex sentences. "
36
  "2. POS SHIFT: AI models overuse active voice and linking words. You must occasionally use passive voice to break predictability. "
37
  "3. NO CLICHES: Delete all AI transition clichés like 'This isn't just X, it's Y', 'Furthermore', or 'It is worth noting'. "
 
44
  "Edit this text to sound perfectly human, plain-spoken, and direct. "
45
  "Imagine you are writing a practical update to a smart colleague. "
46
  "CRITICAL RULES: "
47
+ "1. ZERO DATA LOSS: Do not summarize or remove any information. Every fact, number, and entity from the draft must be perfectly preserved. "
48
+ "2. STRICT OUTPUT: OUTPUT ABSOLUTELY NOTHING EXCEPT THE FINAL TEXT. Do not add notes or list changes. "
49
+ "3. NO EMPTY FILLER: Never use meta-commentary like 'I have more to say, though'. Just state the facts. "
50
+ "4. NO DRAMATICS: Absolutely no rhetorical questions. Avoid poetic metaphors and the 'Rule of Three'. "
51
+ "5. NATURAL DATA: Never use the words 'respectively' or 'accounted for'. Blend numbers into the sentence naturally. "
52
+ "6. PERPLEXITY INJECTION: Lower the vocabulary complexity slightly. Choose slightly less common synonyms to avoid predictable word pairings. "
53
+ "7. PUNCTUATION: Absolutely NO m-dashes. Use standard hyphens, commas, periods, or parentheses instead. "
54
+ "8. VOCABULARY: Do not use corporate fluff. "
55
  "Keep the tone grounded, highly efficient, and slightly informal.\n\nText: {input}"
56
  )
57
 
58
  # Pass 3: Flow & Cohesion
59
  pass3_prompt = ChatPromptTemplate.from_template(
60
+ "You are the final proofreader. Your job is to review this text for natural flow and cohesion without losing any data. "
61
  "CRITICAL RULES: "
62
+ "1. ZERO DATA LOSS: You are strictly forbidden from deleting any facts, names, or numbers. Do not shorten or summarize the text. "
63
+ "2. OUTPUT ONLY THE FINAL TEXT. No intros, no notes. "
64
+ "3. Smooth out any choppy or awkward transitions left by previous edits. Ensure it reads naturally. "
65
  "4. Maintain all previous rules: No m-dashes, no corporate fluff, no filler phrases. "
66
+ "Make it flow perfectly while preserving 100% of the information.\n\nText: {input}"
67
  )
68
 
69
  # ==========================================
 
75
  if not text:
76
  return ""
77
 
 
78
  cleaned = re.sub(r'[\u200B-\u200D\uFEFF\u200E\u200F\u202A-\u202E]', '', text)
 
79
  cleaned = unicodedata.normalize('NFKC', cleaned)
80
+
81
  homoglyphs = {
82
  'Α': 'A', 'Β': 'B', 'Ε': 'E', 'Η': 'H', 'Ι': 'I', 'Κ': 'K', 'Μ': 'M',
83
  'Ν': 'N', 'Ο': 'O', 'Ρ': 'P', 'Τ': 'T', 'Χ': 'X', 'Υ': 'Y', 'Ζ': 'Z',
 
88
  trans_table = str.maketrans(homoglyphs)
89
  cleaned = cleaned.translate(trans_table)
90
 
 
91
  cleaned = cleaned.replace("��", "-").replace("–", "-")
92
 
93
  return cleaned.strip()
94
 
95
  def calculate_similarity(original, final):
96
  """Semantic Similarity Check (Lexical Preservation)"""
 
97
  seq = difflib.SequenceMatcher(None, original.split(), final.split())
98
  ratio = seq.ratio() * 100
99
 
 
124
  model=selected_model_id,
125
  openai_api_key=OPENROUTER_KEY,
126
  openai_api_base=BASE_URL,
127
+ temperature=0.85,
128
  max_tokens=6000
129
  )
130
 
 
172
  # ==========================================
173
 
174
  with gr.Blocks() as app:
175
+ gr.Markdown("# 🤖 -> 🧑 The AI Humanizer (V2)")
176
  gr.Markdown("Passes your text through a 3-stage LLM pipeline (Structure > Style > Flow) while sanitizing watermarks.")
177
 
178
  with gr.Row():