VictorM-Coder commited on
Commit
dc81ef5
·
verified ·
1 Parent(s): 432272a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -18
app.py CHANGED
@@ -1,5 +1,6 @@
1
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
  import torch, gradio as gr
 
3
 
4
  # Load Model (lighter + faster)
5
  model_name = "humarin/chatgpt_paraphraser_on_T5_base"
@@ -10,38 +11,48 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
  model = model.to(device)
11
  model.eval()
12
 
13
- # Paraphrasing Function with Paragraph Splitting
 
 
 
 
 
 
14
  def paraphrase_t5(text, temperature=0.9, top_p=0.92):
15
  if not text.strip():
16
  return "⚠️ Please enter some text"
17
 
18
- # Split by paragraphs
19
  paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
20
  paraphrased_paragraphs = []
21
 
22
  for p in paragraphs:
23
- # Stronger paraphrasing prompt
24
- prompt = f"Paraphrase this in a more natural, human style while keeping meaning:\n{p}"
25
 
26
- inputs = tokenizer([prompt], return_tensors="pt", truncation=True, padding=True).to(device)
 
27
 
28
- outputs = model.generate(
29
- **inputs,
30
- max_new_tokens=256,
31
- do_sample=True,
32
- top_p=float(top_p),
33
- temperature=float(temperature),
34
- num_return_sequences=1,
35
- no_repeat_ngram_size=3 # avoid repeats like 'inequality, inequality'
36
- )
37
 
38
- paraphrased = tokenizer.decode(outputs[0], skip_special_tokens=True)
39
- paraphrased_paragraphs.append(paraphrased)
 
 
 
 
 
 
 
40
 
41
- return "\n\n".join(paraphrased_paragraphs)
 
42
 
 
 
 
 
43
 
44
- # Gradio UI
45
  iface = gr.Interface(
46
  fn=paraphrase_t5,
47
  inputs=[
 
1
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
  import torch, gradio as gr
3
+ import re
4
 
5
  # Load Model (lighter + faster)
6
  model_name = "humarin/chatgpt_paraphraser_on_T5_base"
 
11
  model = model.to(device)
12
  model.eval()
13
 
14
+ # --- Helpers ---
15
+ def split_sentences(paragraph):
16
+ # Split into sentences based on punctuation + space
17
+ sentences = re.split(r'(?<=[.!?])\s+', paragraph.strip())
18
+ return [s for s in sentences if s]
19
+
20
+ # --- Main Paraphrasing Function ---
21
  def paraphrase_t5(text, temperature=0.9, top_p=0.92):
22
  if not text.strip():
23
  return "⚠️ Please enter some text"
24
 
 
25
  paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
26
  paraphrased_paragraphs = []
27
 
28
  for p in paragraphs:
29
+ sentences = split_sentences(p)
30
+ paraphrased_sentences = []
31
 
32
+ for s in sentences:
33
+ prompt = f"Paraphrase this in a more natural, human style while keeping meaning:\n{s}"
34
 
35
+ inputs = tokenizer([prompt], return_tensors="pt", truncation=True, padding=True).to(device)
 
 
 
 
 
 
 
 
36
 
37
+ outputs = model.generate(
38
+ **inputs,
39
+ max_new_tokens=512, # allow long outputs
40
+ do_sample=True,
41
+ top_p=float(top_p),
42
+ temperature=float(temperature),
43
+ num_return_sequences=1,
44
+ no_repeat_ngram_size=3
45
+ )
46
 
47
+ paraphrased = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
48
+ paraphrased_sentences.append(paraphrased)
49
 
50
+ # Rejoin sentences into a paragraph
51
+ paraphrased_paragraphs.append(" ".join(paraphrased_sentences))
52
+
53
+ return "\n\n".join(paraphrased_paragraphs)
54
 
55
+ # --- Gradio UI ---
56
  iface = gr.Interface(
57
  fn=paraphrase_t5,
58
  inputs=[