VictorM-Coder commited on
Commit
c95d469
·
verified ·
1 Parent(s): 4c0b50d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -26
app.py CHANGED
@@ -2,16 +2,15 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
  import torch, gradio as gr
3
  import re
4
 
5
- # --- Load Model (Option 1: FLAN-T5-Paraphraser) ---
6
- model_name = "alykassem/FLAN-T5-Paraphraser"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
9
 
10
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
- model = model.to(device)
12
  model.eval()
13
 
14
- # --- Helpers ---
15
  def split_sentences(text):
16
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
17
  return [s for s in sentences if s]
@@ -22,39 +21,35 @@ def clean_sentence(sent):
22
  sent += "."
23
  return sent
24
 
25
- # --- Main function ---
26
- def paraphrase_fn(text, num_return_sequences=1, temperature=1.0, top_p=0.9):
27
  if not text.strip():
28
  return "⚠️ Please enter some text"
29
 
30
  num_return_sequences = int(num_return_sequences)
31
  sentences = split_sentences(text)
32
- all_outputs = []
33
 
34
  for sent in sentences:
35
- input_text = "paraphrase: " + sent + " </s>"
36
- inputs = tokenizer([input_text], return_tensors="pt", truncation=True, padding=True).to(device)
37
 
38
  outputs = model.generate(
39
  **inputs,
40
  max_new_tokens=128,
41
  num_return_sequences=num_return_sequences,
42
  do_sample=True,
43
- top_p=float(top_p),
44
- temperature=float(temperature),
 
 
45
  )
46
- decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
47
-
48
- seen, unique = set(), []
49
- for d in decoded:
50
- d = clean_sentence(d)
51
- if d not in seen:
52
- unique.append(d)
53
- seen.add(d)
54
 
55
- all_outputs.append(unique[0])
 
 
 
56
 
57
- return " ".join(all_outputs).strip()
58
 
59
  # --- Gradio Interface ---
60
  iface = gr.Interface(
@@ -62,12 +57,12 @@ iface = gr.Interface(
62
  inputs=[
63
  gr.Textbox(lines=8, placeholder="Paste text here..."),
64
  gr.Slider(1, 3, step=1, value=1, label="Variants"),
65
- gr.Slider(0.5, 2.0, step=0.1, value=1.0, label="Temperature"),
66
- gr.Slider(0.6, 1.0, step=0.01, value=0.9, label="Top-p"),
67
  ],
68
- outputs=gr.Textbox(label="Output"),
69
- title="📝 Writenix Paraphraser (FLAN-T5)",
70
- description="Paraphrasing powered by FLAN-T5, fine-tuned on high-quality datasets."
71
  )
72
 
73
  iface.launch()
 
2
  import torch, gradio as gr
3
  import re
4
 
5
+ # --- Load Model ---
6
+ model_name = "Ateeqq/Text-Rewriter-Paraphraser"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
9
 
10
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
+ model.to(device)
12
  model.eval()
13
 
 
14
  def split_sentences(text):
15
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
16
  return [s for s in sentences if s]
 
21
  sent += "."
22
  return sent
23
 
24
+ def paraphrase_fn(text, num_return_sequences=1, temperature=0.8, top_p=0.9):
 
25
  if not text.strip():
26
  return "⚠️ Please enter some text"
27
 
28
  num_return_sequences = int(num_return_sequences)
29
  sentences = split_sentences(text)
30
+ paraphrased_sentences = []
31
 
32
  for sent in sentences:
33
+ prompt = "paraphraser: " + sent
34
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(device)
35
 
36
  outputs = model.generate(
37
  **inputs,
38
  max_new_tokens=128,
39
  num_return_sequences=num_return_sequences,
40
  do_sample=True,
41
+ top_p=top_p,
42
+ temperature=temperature,
43
+ no_repeat_ngram_size=2,
44
+ early_stopping=True
45
  )
 
 
 
 
 
 
 
 
46
 
47
+ # Take the first unique paraphrase
48
+ decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
49
+ clean = [clean_sentence(d) for d in decoded]
50
+ paraphrased_sentences.append(clean[0])
51
 
52
+ return " ".join(paraphrased_sentences)
53
 
54
  # --- Gradio Interface ---
55
  iface = gr.Interface(
 
57
  inputs=[
58
  gr.Textbox(lines=8, placeholder="Paste text here..."),
59
  gr.Slider(1, 3, step=1, value=1, label="Variants"),
60
+ gr.Slider(0.1, 1.5, step=0.1, value=0.8, label="Temperature"),
61
+ gr.Slider(0.6, 1.0, step=0.05, value=0.9, label="Top-p"),
62
  ],
63
+ outputs=gr.Textbox(label="Paraphrased Text"),
64
+ title="Text Rewriter Paraphraser (T5-Base)",
65
+ description="High-quality model fine-tuned on 430K examples for natural, non-AI-detectable paraphrasing."
66
  )
67
 
68
  iface.launch()