VictorM-Coder commited on
Commit
29fe724
·
verified ·
1 Parent(s): 8e9c524

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -13
app.py CHANGED
@@ -10,23 +10,32 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
  model = model.to(device)
11
  model.eval()
12
 
13
- # Paraphrasing Function
14
  def paraphrase_t5(text, temperature=0.9, top_p=0.92):
15
  if not text.strip():
16
  return "⚠️ Please enter some text"
17
 
18
- inputs = tokenizer([f"paraphrase: {text}"], return_tensors="pt", truncation=True, padding=True).to(device)
19
-
20
- outputs = model.generate(
21
- **inputs,
22
- max_new_tokens=256,
23
- do_sample=True,
24
- top_p=float(top_p), # nucleus sampling
25
- temperature=float(temperature), # creativity
26
- num_return_sequences=1
27
- )
28
-
29
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
30
 
31
  # Gradio UI
32
  iface = gr.Interface(
 
10
  model = model.to(device)
11
  model.eval()
12
 
13
+ # Paraphrasing Function with Paragraph Splitting
14
  def paraphrase_t5(text, temperature=0.9, top_p=0.92):
15
  if not text.strip():
16
  return "⚠️ Please enter some text"
17
 
18
+ # Split by paragraphs
19
+ paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
20
+ paraphrased_paragraphs = []
21
+
22
+ for p in paragraphs:
23
+ inputs = tokenizer([f"paraphrase: {p}"], return_tensors="pt", truncation=True, padding=True).to(device)
24
+
25
+ outputs = model.generate(
26
+ **inputs,
27
+ max_new_tokens=256,
28
+ do_sample=True,
29
+ top_p=float(top_p), # nucleus sampling
30
+ temperature=float(temperature), # creativity
31
+ num_return_sequences=1
32
+ )
33
+
34
+ paraphrased = tokenizer.decode(outputs[0], skip_special_tokens=True)
35
+ paraphrased_paragraphs.append(paraphrased)
36
+
37
+ # Join paraphrased paragraphs with newlines
38
+ return "\n\n".join(paraphrased_paragraphs)
39
 
40
  # Gradio UI
41
  iface = gr.Interface(