VictorM-Coder commited on
Commit
c417d0a
·
verified ·
1 Parent(s): 6066278

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -39
app.py CHANGED
@@ -1,6 +1,4 @@
1
  import gradio as gr
2
- from docx import Document
3
- import io
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
  import re
6
 
@@ -8,20 +6,7 @@ import re
8
  tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
9
  model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
10
 
11
- def fix_punctuation(text):
12
- """
13
- Fix spacing around commas, periods, semicolons, colons, exclamation and question marks.
14
- Removes space before punctuation and ensures exactly one space after punctuation.
15
- """
16
- # Remove space before punctuation
17
- text = re.sub(r'\s+([,.!?;:])', r'\1', text)
18
- # Ensure single space after punctuation if not end of line
19
- text = re.sub(r'([,.!?;:])([^\s\n])', r'\1 \2', text)
20
- # Normalize multiple spaces
21
- text = re.sub(r'\s+', ' ', text)
22
- # Remove spaces at start/end
23
- return text.strip()
24
-
25
  def paraphrase_text(text):
26
  input_text = f"paraphrase: {text} </s>"
27
  input_ids = tokenizer.encode(input_text, return_tensors="pt", truncation=True)
@@ -31,43 +16,33 @@ def paraphrase_text(text):
31
  do_sample=True,
32
  top_k=120,
33
  top_p=0.95,
34
- temperature=1.5
35
  )
36
- paraphrased = tokenizer.decode(output_ids[0], skip_special_tokens=True)
37
- return fix_punctuation(paraphrased)
38
 
 
39
  def chunk_text(text, max_sentences=4):
40
  sentences = re.split(r'(?<=[.!?]) +', text.strip())
41
  return [' '.join(sentences[i:i+max_sentences]) for i in range(0, len(sentences), max_sentences)]
42
 
 
43
  def full_article_paraphrase(text):
44
  chunks = chunk_text(text)
45
  return "\n\n".join(paraphrase_text(chunk.strip()) for chunk in chunks if chunk.strip())
46
 
47
- def extract_text_from_docx(file_obj):
48
- file_bytes = file_obj.read() if hasattr(file_obj, "read") else file_obj
49
- doc = Document(io.BytesIO(file_bytes))
50
- return "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
51
-
52
- def full_pipeline(input_text=None, file=None):
53
- if file is not None:
54
- input_text = extract_text_from_docx(file)
55
  if not input_text or len(input_text.strip()) < 10:
56
- return "Please enter or upload valid text."
57
- result = full_article_paraphrase(input_text)
58
- return result
59
 
 
60
  demo = gr.Interface(
61
- fn=full_pipeline,
62
- inputs=[
63
- gr.Textbox(label="Paste Text (optional)", lines=20, placeholder="Or upload a .docx file below..."),
64
- gr.File(label="Upload .docx File (optional)", file_types=[".docx"])
65
- ],
66
- outputs=[
67
- gr.Textbox(label="Paraphrased Output")
68
- ],
69
  title="Smart Paraphraser",
70
- description="Paste or upload your article. Get paraphrased output."
71
  )
72
 
73
  if __name__ == "__main__":
 
1
  import gradio as gr
 
 
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
  import re
4
 
 
6
  tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
7
  model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
8
 
9
+ # Function to paraphrase a single chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def paraphrase_text(text):
11
  input_text = f"paraphrase: {text} </s>"
12
  input_ids = tokenizer.encode(input_text, return_tensors="pt", truncation=True)
 
16
  do_sample=True,
17
  top_k=120,
18
  top_p=0.95,
19
+ temperature=1.3
20
  )
21
+ return tokenizer.decode(output_ids[0], skip_special_tokens=True)
 
22
 
23
+ # Split text into chunks (4 sentences each)
24
  def chunk_text(text, max_sentences=4):
25
  sentences = re.split(r'(?<=[.!?]) +', text.strip())
26
  return [' '.join(sentences[i:i+max_sentences]) for i in range(0, len(sentences), max_sentences)]
27
 
28
+ # Paraphrase the full text
29
  def full_article_paraphrase(text):
30
  chunks = chunk_text(text)
31
  return "\n\n".join(paraphrase_text(chunk.strip()) for chunk in chunks if chunk.strip())
32
 
33
+ # Gradio pipeline
34
+ def paraphrase_pipeline(input_text):
 
 
 
 
 
 
35
  if not input_text or len(input_text.strip()) < 10:
36
+ return "Please enter valid text."
37
+ return full_article_paraphrase(input_text)
 
38
 
39
+ # Gradio interface
40
  demo = gr.Interface(
41
+ fn=paraphrase_pipeline,
42
+ inputs=gr.Textbox(label="Paste Text Here", lines=20, placeholder="Enter your text..."),
43
+ outputs=gr.Textbox(label="Paraphrased Text"),
 
 
 
 
 
44
  title="Smart Paraphraser",
45
+ description="Paste your text and get paraphrased output instantly."
46
  )
47
 
48
  if __name__ == "__main__":