onkar-waghmode commited on
Commit
7796047
·
1 Parent(s): 961e68c
Files changed (1) hide show
  1. app.py +530 -0
app.py ADDED
@@ -0,0 +1,530 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import random
3
+ import nltk
4
+ import re
5
+ import spacy
6
+ from nltk.corpus import wordnet, stopwords
7
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
8
+ from sentence_transformers import SentenceTransformer
9
+ import torch
10
+ import numpy as np
11
+ from typing import List, Dict, Tuple
12
+ import logging
13
+
14
+ # Setup logging
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Download NLTK data
19
+ print("Downloading NLTK data...")
20
+ for data in ['punkt', 'wordnet', 'averaged_perceptron_tagger', 'stopwords', 'omw-1.4', 'averaged_perceptron_tagger_eng']:
21
+ try:
22
+ nltk.data.find(f'{data}')
23
+ except:
24
+ nltk.download(data, quiet=True)
25
+
26
+ # Load models globally
27
+ print("Loading models...")
28
+ device = "cuda" if torch.cuda.is_available() else "cpu"
29
+ print(f"Using device: {device}")
30
+
31
+ t5_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
32
+ t5_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
33
+ t5_model.to(device)
34
+
35
+ similarity_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device=device)
36
+ nlp = spacy.load("en_core_web_sm")
37
+
38
+ print("Models loaded successfully!")
39
+
40
+ # ============================================================================
41
+ # STAGE 1: PARAPHRASING WITH T5 MODEL
42
+ # ============================================================================
43
+ def paraphrase_text(text: str, max_length: int = 512, num_beams: int = 4,
44
+ temperature: float = 0.7, top_p: float = 0.9,
45
+ repetition_penalty: float = 1.2, length_penalty: float = 1.0) -> str:
46
+ """Paraphrase text using T5 model"""
47
+ try:
48
+ input_text = f"paraphrase: {text.strip()}"
49
+ inputs = t5_tokenizer(input_text, return_tensors="pt",
50
+ max_length=512, truncation=True, padding=True).to(device)
51
+
52
+ with torch.no_grad():
53
+ outputs = t5_model.generate(
54
+ **inputs,
55
+ max_length=max_length,
56
+ num_beams=num_beams,
57
+ num_return_sequences=1,
58
+ temperature=temperature,
59
+ do_sample=True if temperature > 0 else False,
60
+ top_p=top_p,
61
+ repetition_penalty=repetition_penalty,
62
+ length_penalty=length_penalty,
63
+ early_stopping=True
64
+ )
65
+
66
+ result = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
67
+ return result.strip()
68
+
69
+ except Exception as e:
70
+ logger.warning(f"Paraphrasing failed: {e}. Returning original text.")
71
+ return text
72
+
73
+ def paraphrase_long_text(text: str, max_length: int = 512, num_beams: int = 4,
74
+ temperature: float = 0.7, top_p: float = 0.9,
75
+ repetition_penalty: float = 1.2, length_penalty: float = 1.0) -> str:
76
+ """Handle long texts by breaking them into chunks"""
77
+ sentences = nltk.sent_tokenize(text)
78
+ paraphrased_sentences = []
79
+ current_chunk = ""
80
+
81
+ for sentence in sentences:
82
+ if len((current_chunk + " " + sentence).split()) > 80:
83
+ if current_chunk:
84
+ paraphrased = paraphrase_text(current_chunk, max_length, num_beams,
85
+ temperature, top_p, repetition_penalty, length_penalty)
86
+ paraphrased_sentences.append(paraphrased)
87
+ current_chunk = sentence
88
+ else:
89
+ current_chunk += " " + sentence if current_chunk else sentence
90
+
91
+ if current_chunk:
92
+ paraphrased = paraphrase_text(current_chunk, max_length, num_beams,
93
+ temperature, top_p, repetition_penalty, length_penalty)
94
+ paraphrased_sentences.append(paraphrased)
95
+
96
+ return " ".join(paraphrased_sentences)
97
+
98
+ # ============================================================================
99
+ # STAGE 2: SYNONYM REPLACEMENT
100
+ # ============================================================================
101
+ def get_synonyms(word: str, pos: str, max_synonyms: int = 3) -> List[str]:
102
+ """Get WordNet synonyms"""
103
+ pos_mapping = {
104
+ 'NN': wordnet.NOUN, 'NNS': wordnet.NOUN, 'NNP': wordnet.NOUN, 'NNPS': wordnet.NOUN,
105
+ 'VB': wordnet.VERB, 'VBD': wordnet.VERB, 'VBG': wordnet.VERB, 'VBN': wordnet.VERB,
106
+ 'VBP': wordnet.VERB, 'VBZ': wordnet.VERB,
107
+ 'JJ': wordnet.ADJ, 'JJR': wordnet.ADJ, 'JJS': wordnet.ADJ,
108
+ 'RB': wordnet.ADV, 'RBR': wordnet.ADV, 'RBS': wordnet.ADV
109
+ }
110
+
111
+ wn_pos = pos_mapping.get(pos, wordnet.NOUN)
112
+ synsets = wordnet.synsets(word.lower(), pos=wn_pos)
113
+
114
+ if not synsets:
115
+ synsets = wordnet.synsets(word.lower())
116
+
117
+ synonyms = []
118
+ for synset in synsets[:max_synonyms]:
119
+ for lemma in synset.lemmas()[:5]:
120
+ syn = lemma.name().replace('_', ' ')
121
+ if len(syn.split()) == 1 and syn.lower() != word.lower():
122
+ synonyms.append(syn)
123
+
124
+ return list(set(synonyms))
125
+
126
+ def synonym_replace(text: str, prob: float = 0.3, min_word_length: int = 3,
127
+ max_synonyms: int = 3) -> str:
128
+ """Replace words with synonyms"""
129
+ from nltk import pos_tag, word_tokenize
130
+
131
+ stop_words = set(stopwords.words('english'))
132
+ words = word_tokenize(text)
133
+ pos_tags = pos_tag(words)
134
+ new_words = []
135
+
136
+ for word, pos in pos_tags:
137
+ if not word.isalpha():
138
+ new_words.append(word)
139
+ continue
140
+
141
+ if word.lower() in stop_words or len(word) <= min_word_length:
142
+ new_words.append(word)
143
+ continue
144
+
145
+ if random.random() > prob:
146
+ new_words.append(word)
147
+ continue
148
+
149
+ synonyms = get_synonyms(word, pos, max_synonyms)
150
+ candidates = [s for s in synonyms if s.lower() != word.lower()]
151
+
152
+ if candidates:
153
+ replacement = random.choice(candidates)
154
+ new_words.append(replacement)
155
+ else:
156
+ new_words.append(word)
157
+
158
+ return ' '.join(new_words)
159
+
160
+ # ============================================================================
161
+ # STAGE 3: ACADEMIC DISCOURSE
162
+ # ============================================================================
163
+ def add_academic_discourse(text: str, hedge_prob: float = 0.2, booster_prob: float = 0.15,
164
+ connector_prob: float = 0.25, starter_prob: float = 0.1) -> str:
165
+ """Add academic discourse elements"""
166
+
167
+ contractions = {
168
+ "don't": "do not", "doesn't": "does not", "didn't": "did not",
169
+ "can't": "cannot", "couldn't": "could not", "shouldn't": "should not",
170
+ "wouldn't": "would not", "won't": "will not", "aren't": "are not",
171
+ "isn't": "is not", "wasn't": "was not", "weren't": "were not",
172
+ "haven't": "have not", "hasn't": "has not", "hadn't": "had not",
173
+ "I'm": "I am", "I've": "I have", "I'll": "I will", "I'd": "I would",
174
+ "you're": "you are", "you've": "you have", "you'll": "you will",
175
+ "we're": "we are", "we've": "we have", "we'll": "we will",
176
+ "they're": "they are", "they've": "they have", "they'll": "they will",
177
+ "it's": "it is", "that's": "that is", "there's": "there is", "what's": "what is"
178
+ }
179
+
180
+ hedges = [
181
+ "it appears that", "it is possible that", "the results suggest",
182
+ "it seems that", "there is evidence that", "it may be the case that",
183
+ "to some extent", "in general terms", "one could argue that"
184
+ ]
185
+
186
+ boosters = [
187
+ "clearly", "indeed", "in fact", "undoubtedly",
188
+ "without doubt", "it is evident that", "there is no question that"
189
+ ]
190
+
191
+ connectors = {
192
+ "contrast": ["however", "on the other hand", "in contrast", "nevertheless"],
193
+ "addition": ["moreover", "furthermore", "in addition", "what is more"],
194
+ "cause_effect": ["therefore", "thus", "as a result", "consequently", "hence"],
195
+ "example": ["for instance", "for example", "to illustrate"],
196
+ "conclusion": ["in conclusion", "overall", "in summary", "to sum up"]
197
+ }
198
+
199
+ sentence_starters = [
200
+ "It is important to note that",
201
+ "A key implication is that",
202
+ "The evidence indicates that",
203
+ "The findings suggest that",
204
+ "This demonstrates that",
205
+ "It should be emphasized that",
206
+ "From these observations, it follows that"
207
+ ]
208
+
209
+ # Expand contractions
210
+ for contraction, expansion in contractions.items():
211
+ pattern = re.compile(r'\b' + re.escape(contraction) + r'\b', re.IGNORECASE)
212
+ text = pattern.sub(expansion, text)
213
+
214
+ sentences = nltk.sent_tokenize(text)
215
+ modified = []
216
+
217
+ for i, sent in enumerate(sentences):
218
+ # Add hedge
219
+ if random.random() < hedge_prob and i > 0:
220
+ hedge = random.choice(hedges)
221
+ sent = f"{hedge}, {sent[0].lower() + sent[1:]}"
222
+
223
+ # Add booster
224
+ elif random.random() < booster_prob:
225
+ booster = random.choice(boosters)
226
+ sent = f"{booster.capitalize()}, {sent}"
227
+
228
+ # Add starter
229
+ elif random.random() < starter_prob and i > 0:
230
+ starter = random.choice(sentence_starters)
231
+ sent = f"{starter} {sent[0].lower() + sent[1:]}"
232
+
233
+ # Add connector
234
+ if i > 0 and random.random() < connector_prob:
235
+ conn_type = random.choice(list(connectors.keys()))
236
+ connector = random.choice(connectors[conn_type])
237
+ sent = f"{connector.capitalize()}, {sent[0].lower() + sent[1:]}"
238
+
239
+ modified.append(sent)
240
+
241
+ return ' '.join(modified)
242
+
243
+ # ============================================================================
244
+ # STAGE 4: SENTENCE STRUCTURE VARIATION
245
+ # ============================================================================
246
+ def vary_sentence_structure(text: str, split_prob: float = 0.4, merge_prob: float = 0.3,
247
+ min_split_length: int = 20, max_merge_length: int = 10) -> str:
248
+ """Vary sentence structure"""
249
+
250
+ connectors = {
251
+ "contrast": ["however", "nevertheless", "nonetheless", "in contrast"],
252
+ "addition": ["moreover", "furthermore", "in addition", "what is more"],
253
+ "cause_effect": ["therefore", "thus", "consequently", "as a result"],
254
+ "example": ["for example", "for instance", "to illustrate"],
255
+ "conclusion": ["in conclusion", "overall", "in summary"]
256
+ }
257
+
258
+ all_connectors = {c.lower() for group in connectors.values() for c in group}
259
+
260
+ def already_has_connector(sentence: str) -> bool:
261
+ lower_sent = sentence.strip().lower()
262
+ return any(lower_sent.startswith(conn) for conn in all_connectors)
263
+
264
+ def choose_connector_type(prev_sent: str, curr_sent: str) -> str:
265
+ curr_lower = curr_sent.lower()
266
+
267
+ if any(phrase in curr_lower for phrase in ["such as", "including", "for instance"]):
268
+ return "example"
269
+ elif curr_lower.startswith(("but", "although", "however")):
270
+ return "contrast"
271
+ elif any(phrase in curr_lower for phrase in ["because", "due to", "as a result"]):
272
+ return "cause_effect"
273
+
274
+ # Semantic similarity fallback
275
+ if prev_sent:
276
+ emb = similarity_model.encode([prev_sent, curr_sent])
277
+ score = np.dot(emb[0], emb[1]) / (np.linalg.norm(emb[0]) * np.linalg.norm(emb[1]))
278
+ return "addition" if score > 0.6 else "contrast"
279
+
280
+ return "addition"
281
+
282
+ doc = nlp(text)
283
+ sentences = list(doc.sents)
284
+ modified = []
285
+
286
+ for idx, sent in enumerate(sentences):
287
+ sent_text = sent.text.strip()
288
+ words = sent_text.split()
289
+
290
+ # Split long sentences
291
+ if len(words) > min_split_length and random.random() < split_prob:
292
+ split_points = [tok.i - sent.start for tok in sent if tok.dep_ in ("cc", "mark")]
293
+ if split_points:
294
+ split_point = random.choice(split_points)
295
+ tokens = list(sent)
296
+ if 0 < split_point < len(tokens):
297
+ first = ' '.join([t.text for t in tokens[:split_point]]).strip()
298
+ second = ' '.join([t.text for t in tokens[split_point+1:]]).strip()
299
+ if first and second and len(second.split()) > 3:
300
+ if random.random() < 0.5 and not already_has_connector(second):
301
+ conn_type = choose_connector_type(first, second)
302
+ connector = random.choice(connectors[conn_type])
303
+ second = f"{connector.capitalize()}, {second[0].lower() + second[1:]}"
304
+ modified.extend([first + '.', second])
305
+ continue
306
+
307
+ # Merge short sentences
308
+ if (modified and len(words) < max_merge_length and
309
+ len(modified[-1].split()) < max_merge_length and random.random() < merge_prob):
310
+ prev_sent = modified[-1]
311
+ if not already_has_connector(sent_text):
312
+ conn_type = choose_connector_type(prev_sent, sent_text)
313
+ connector = random.choice(connectors[conn_type])
314
+ combined = f"{prev_sent.rstrip('.')}; {connector}, {sent_text[0].lower() + sent_text[1:]}"
315
+ modified[-1] = combined
316
+ continue
317
+
318
+ modified.append(sent_text)
319
+
320
+ return ' '.join(modified)
321
+
322
+ # ============================================================================
323
+ # QUALITY CHECK
324
+ # ============================================================================
325
+ def calculate_similarity(text1: str, text2: str) -> float:
326
+ """Calculate semantic similarity between two texts"""
327
+ try:
328
+ embeddings = similarity_model.encode([text1.strip(), text2.strip()])
329
+ similarity = float(np.dot(embeddings[0], embeddings[1]) / (
330
+ np.linalg.norm(embeddings[0]) * np.linalg.norm(embeddings[1])
331
+ ))
332
+ return similarity
333
+ except Exception as e:
334
+ logger.error(f"Similarity calculation failed: {e}")
335
+ return 0.0
336
+
337
+ # ============================================================================
338
+ # MAIN HUMANIZER FUNCTION
339
+ # ============================================================================
340
+ def humanize_text(
341
+ input_text: str,
342
+ # Stage toggles
343
+ enable_stage1: bool,
344
+ enable_stage2: bool,
345
+ enable_stage3: bool,
346
+ enable_stage4: bool,
347
+ # Stage 1 parameters
348
+ temperature: float,
349
+ top_p: float,
350
+ num_beams: int,
351
+ max_length: int,
352
+ repetition_penalty: float,
353
+ length_penalty: float,
354
+ # Stage 2 parameters
355
+ synonym_prob: float,
356
+ min_word_length: int,
357
+ max_synonyms: int,
358
+ # Stage 3 parameters
359
+ hedge_prob: float,
360
+ booster_prob: float,
361
+ connector_prob: float,
362
+ starter_prob: float,
363
+ # Stage 4 parameters
364
+ split_prob: float,
365
+ merge_prob: float,
366
+ min_split_length: int,
367
+ max_merge_length: int
368
+ ):
369
+ """Main humanizer function that processes text through all enabled stages"""
370
+
371
+ if not input_text.strip():
372
+ return "", 0.0, "Please enter some text to humanize."
373
+
374
+ try:
375
+ result = input_text
376
+ stages_applied = []
377
+
378
+ # Stage 1: Paraphrasing
379
+ if enable_stage1:
380
+ word_count = len(result.split())
381
+ if word_count > 100:
382
+ result = paraphrase_long_text(result, max_length, num_beams, temperature,
383
+ top_p, repetition_penalty, length_penalty)
384
+ else:
385
+ result = paraphrase_text(result, max_length, num_beams, temperature,
386
+ top_p, repetition_penalty, length_penalty)
387
+ stages_applied.append("Paraphrasing")
388
+
389
+ # Stage 2: Synonym Replacement
390
+ if enable_stage2:
391
+ result = synonym_replace(result, synonym_prob, min_word_length, max_synonyms)
392
+ stages_applied.append("Synonym Replacement")
393
+
394
+ # Stage 3: Academic Discourse
395
+ if enable_stage3:
396
+ result = add_academic_discourse(result, hedge_prob, booster_prob,
397
+ connector_prob, starter_prob)
398
+ stages_applied.append("Academic Discourse")
399
+
400
+ # Stage 4: Sentence Structure
401
+ if enable_stage4:
402
+ result = vary_sentence_structure(result, split_prob, merge_prob,
403
+ min_split_length, max_merge_length)
404
+ stages_applied.append("Sentence Structure")
405
+
406
+ # Calculate similarity
407
+ similarity = calculate_similarity(input_text, result)
408
+
409
+ # Generate status message
410
+ if not stages_applied:
411
+ status = "⚠️ No stages enabled. Please enable at least one stage."
412
+ else:
413
+ status = f"✅ Successfully applied: {', '.join(stages_applied)}"
414
+
415
+ return result, similarity, status
416
+
417
+ except Exception as e:
418
+ logger.error(f"Error in humanization: {e}")
419
+ import traceback
420
+ traceback.print_exc()
421
+ return "", 0.0, f"❌ Error: {str(e)}"
422
+
423
+ # ============================================================================
424
+ # GRADIO INTERFACE
425
+ # ============================================================================
426
+ def create_gradio_interface():
427
+ """Create the Gradio interface"""
428
+
429
+ with gr.Blocks(theme=gr.themes.Soft(), title="Text Humanizer Pro") as demo:
430
+ gr.Markdown(
431
+ """
432
+ # 🤖 Text Humanizer Pro
433
+ Transform AI-generated text into more natural, human-like content with full control over the transformation pipeline.
434
+ """
435
+ )
436
+
437
+ with gr.Row():
438
+ with gr.Column(scale=2):
439
+ input_text = gr.Textbox(
440
+ label="Input Text",
441
+ placeholder="Enter your text here to humanize...",
442
+ lines=10
443
+ )
444
+
445
+ with gr.Row():
446
+ submit_btn = gr.Button("🚀 Transform Text", variant="primary", size="lg")
447
+ clear_btn = gr.Button("🔄 Clear", size="lg")
448
+
449
+ output_text = gr.Textbox(
450
+ label="Humanized Output",
451
+ lines=10,
452
+ interactive=False
453
+ )
454
+
455
+ with gr.Row():
456
+ similarity_output = gr.Number(label="Similarity Score", precision=4)
457
+ status_output = gr.Textbox(label="Status", interactive=False)
458
+
459
+ with gr.Column(scale=1):
460
+ gr.Markdown("## 🎛️ Pipeline Configuration")
461
+
462
+ with gr.Accordion("Stage Selection", open=True):
463
+ enable_stage1 = gr.Checkbox(label="Stage 1: Paraphrasing (T5)", value=True)
464
+ enable_stage2 = gr.Checkbox(label="Stage 2: Synonym Replacement", value=True)
465
+ enable_stage3 = gr.Checkbox(label="Stage 3: Academic Discourse", value=True)
466
+ enable_stage4 = gr.Checkbox(label="Stage 4: Sentence Structure", value=True)
467
+
468
+ with gr.Accordion("Stage 1: Paraphrasing Parameters", open=False):
469
+ temperature = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
470
+ top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
471
+ num_beams = gr.Slider(1, 10, value=4, step=1, label="Num Beams")
472
+ max_length = gr.Slider(128, 1024, value=512, step=64, label="Max Length")
473
+ repetition_penalty = gr.Slider(1.0, 2.0, value=1.2, step=0.1, label="Repetition Penalty")
474
+ length_penalty = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Length Penalty")
475
+
476
+ with gr.Accordion("Stage 2: Synonym Replacement Parameters", open=False):
477
+ synonym_prob = gr.Slider(0.0, 1.0, value=0.3, step=0.05, label="Replacement Probability")
478
+ min_word_length = gr.Slider(2, 8, value=3, step=1, label="Min Word Length")
479
+ max_synonyms = gr.Slider(1, 10, value=3, step=1, label="Max Synonyms")
480
+
481
+ with gr.Accordion("Stage 3: Academic Discourse Parameters", open=False):
482
+ hedge_prob = gr.Slider(0.0, 0.5, value=0.2, step=0.05, label="Hedge Probability")
483
+ booster_prob = gr.Slider(0.0, 0.5, value=0.15, step=0.05, label="Booster Probability")
484
+ connector_prob = gr.Slider(0.0, 0.5, value=0.25, step=0.05, label="Connector Probability")
485
+ starter_prob = gr.Slider(0.0, 0.3, value=0.1, step=0.05, label="Starter Probability")
486
+
487
+ with gr.Accordion("Stage 4: Sentence Structure Parameters", open=False):
488
+ split_prob = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Split Probability")
489
+ merge_prob = gr.Slider(0.0, 1.0, value=0.3, step=0.05, label="Merge Probability")
490
+ min_split_length = gr.Slider(10, 40, value=20, step=5, label="Min Split Length (words)")
491
+ max_merge_length = gr.Slider(5, 20, value=10, step=1, label="Max Merge Length (words)")
492
+
493
+ # Event handlers
494
+ submit_btn.click(
495
+ fn=humanize_text,
496
+ inputs=[
497
+ input_text,
498
+ enable_stage1, enable_stage2, enable_stage3, enable_stage4,
499
+ temperature, top_p, num_beams, max_length, repetition_penalty, length_penalty,
500
+ synonym_prob, min_word_length, max_synonyms,
501
+ hedge_prob, booster_prob, connector_prob, starter_prob,
502
+ split_prob, merge_prob, min_split_length, max_merge_length
503
+ ],
504
+ outputs=[output_text, similarity_output, status_output]
505
+ )
506
+
507
+ clear_btn.click(
508
+ fn=lambda: ("", "", 0.0, ""),
509
+ inputs=[],
510
+ outputs=[input_text, output_text, similarity_output, status_output]
511
+ )
512
+
513
+ gr.Markdown(
514
+ """
515
+ ### 📊 Similarity Score Guide:
516
+ - **0.90-1.00**: Nearly identical (excellent paraphrase)
517
+ - **0.70-0.89**: Good paraphrase (recommended range)
518
+ - **0.50-0.69**: Moderate similarity
519
+ - **0.00-0.49**: Low similarity (meaning may have changed)
520
+ """
521
+ )
522
+
523
+ return demo
524
+
525
+ # ============================================================================
526
+ # LAUNCH
527
+ # ============================================================================
528
+ if __name__ == "__main__":
529
+ demo = create_gradio_interface()
530
+ demo.launch(share=True, server_name="0.0.0.0", server_port=7860)