VictorM-Coder commited on
Commit
e4ac1f5
·
verified ·
1 Parent(s): c95d469

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -21
app.py CHANGED
@@ -11,6 +11,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
  model.to(device)
12
  model.eval()
13
 
 
14
  def split_sentences(text):
15
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
16
  return [s for s in sentences if s]
@@ -21,33 +22,40 @@ def clean_sentence(sent):
21
  sent += "."
22
  return sent
23
 
 
24
  def paraphrase_fn(text, num_return_sequences=1, temperature=0.8, top_p=0.9):
25
  if not text.strip():
26
  return "⚠️ Please enter some text"
27
 
28
- num_return_sequences = int(num_return_sequences)
29
  sentences = split_sentences(text)
30
- paraphrased_sentences = []
 
 
 
 
 
 
 
31
 
32
- for sent in sentences:
33
- prompt = "paraphraser: " + sent
34
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(device)
 
 
 
 
 
 
 
35
 
36
- outputs = model.generate(
37
- **inputs,
38
- max_new_tokens=128,
39
- num_return_sequences=num_return_sequences,
40
- do_sample=True,
41
- top_p=top_p,
42
- temperature=temperature,
43
- no_repeat_ngram_size=2,
44
- early_stopping=True
45
- )
46
 
47
- # Take the first unique paraphrase
48
- decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
49
- clean = [clean_sentence(d) for d in decoded]
50
- paraphrased_sentences.append(clean[0])
 
 
51
 
52
  return " ".join(paraphrased_sentences)
53
 
@@ -61,8 +69,8 @@ iface = gr.Interface(
61
  gr.Slider(0.6, 1.0, step=0.05, value=0.9, label="Top-p"),
62
  ],
63
  outputs=gr.Textbox(label="Paraphrased Text"),
64
- title="Text Rewriter Paraphraser (T5-Base)",
65
- description="High-quality model fine-tuned on 430K examples for natural, non-AI-detectable paraphrasing."
66
  )
67
 
68
  iface.launch()
 
11
  model.to(device)
12
  model.eval()
13
 
14
+ # --- Helpers ---
15
  def split_sentences(text):
16
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
17
  return [s for s in sentences if s]
 
22
  sent += "."
23
  return sent
24
 
25
+ # --- Main function (Batch Processing) ---
26
  def paraphrase_fn(text, num_return_sequences=1, temperature=0.8, top_p=0.9):
27
  if not text.strip():
28
  return "⚠️ Please enter some text"
29
 
 
30
  sentences = split_sentences(text)
31
+ prompts = ["paraphraser: " + s for s in sentences]
32
+
33
+ inputs = tokenizer(
34
+ prompts,
35
+ return_tensors="pt",
36
+ truncation=True,
37
+ padding=True
38
+ ).to(device)
39
 
40
+ outputs = model.generate(
41
+ **inputs,
42
+ max_new_tokens=64, # smaller for speed
43
+ num_return_sequences=int(num_return_sequences),
44
+ do_sample=True,
45
+ top_p=top_p,
46
+ temperature=temperature,
47
+ no_repeat_ngram_size=2,
48
+ early_stopping=True
49
+ )
50
 
51
+ decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
52
 
53
+ # Since we may get multiple return sequences, group by sentence
54
+ paraphrased_sentences = []
55
+ step = int(num_return_sequences)
56
+ for i in range(0, len(decoded), step):
57
+ first_variant = clean_sentence(decoded[i]) # take the first variant only
58
+ paraphrased_sentences.append(first_variant)
59
 
60
  return " ".join(paraphrased_sentences)
61
 
 
69
  gr.Slider(0.6, 1.0, step=0.05, value=0.9, label="Top-p"),
70
  ],
71
  outputs=gr.Textbox(label="Paraphrased Text"),
72
+ title="Text Rewriter Paraphraser (Batch Optimized)",
73
+ description="Fast paraphrasing powered by T5-base. Now optimized with batch processing 🚀"
74
  )
75
 
76
  iface.launch()