VictorM-Coder commited on
Commit
718df1d
·
verified ·
1 Parent(s): da07e2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -54
app.py CHANGED
@@ -1,66 +1,64 @@
 
1
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
- import torch, gradio as gr, re
3
 
4
- # --- Load Model ---
5
- model_name = "prithivida/parrot_paraphraser_on_T5"
6
- tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
8
 
9
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
- model = model.to(device)
11
- model.eval()
12
-
13
- # --- Helpers ---
14
- def split_paragraphs(text):
15
- return [p.strip() for p in text.split("\n") if p.strip()]
16
-
17
- def split_sentences(text):
18
- return re.split(r'(?<=[.!?])\s+', text.strip())
19
-
20
- def clean_text(text):
21
- return re.sub(r'\s+', ' ', text).strip()
22
-
23
- def paraphrase_chunk(text_chunk):
24
- inputs = tokenizer([text_chunk], return_tensors="pt", truncation=True, padding=True).to(device)
25
- outputs = model.generate(
26
- **inputs,
27
- max_new_tokens=100, # small chunks only
28
- num_beams=4,
29
- do_sample=False
30
  )
31
- return clean_text(tokenizer.decode(outputs[0], skip_special_tokens=True))
32
-
33
- # --- Main Humanizer ---
34
- def humanize_text(text):
35
- if not text.strip():
36
- return "⚠️ Please enter some text"
37
 
38
- paragraphs = split_paragraphs(text)
39
- humanized_paragraphs = []
 
40
 
41
- for para in paragraphs:
42
- sentences = split_sentences(para)
43
- paraphrased_sentences = []
44
 
45
- # Paraphrase each sentence separately for accuracy
46
- for sent in sentences:
47
- paraphrased_sentences.append(paraphrase_chunk("paraphrase: " + sent))
 
 
 
48
 
49
- # Rebuild paragraph and optionally add light filler once
50
- new_para = " ".join(paraphrased_sentences)
51
- if len(paraphrased_sentences) > 2:
52
- new_para = "In fact, " + new_para[0].lower() + new_para[1:]
53
- humanized_paragraphs.append(new_para)
54
 
55
- return "\n\n".join(humanized_paragraphs)
 
 
 
 
 
 
 
56
 
57
- # --- Gradio Interface ---
58
- iface = gr.Interface(
59
- fn=humanize_text,
60
- inputs=gr.Textbox(lines=15, placeholder="Paste text here..."),
61
- outputs=gr.Textbox(label="Humanized Output"),
62
- title="✨ Writenix Humanizer v3",
63
- description="Paraphrases large text paragraph-by-paragraph for natural flow, keeps full content, adds light filler per paragraph."
 
 
 
 
 
64
  )
65
 
66
- iface.launch()
 
 
1
+ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import re
4
 
5
+ # Load model
6
+ tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
7
+ model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
 
8
 
9
+ def paraphrase_text(text):
10
+ input_text = f"paraphrase: {text} </s>"
11
+ input_ids = tokenizer.encode(input_text, return_tensors="pt", truncation=True)
12
+ output_ids = model.generate(
13
+ input_ids,
14
+ max_length=256,
15
+ do_sample=True,
16
+ top_k=120,
17
+ top_p=0.95,
18
+ temperature=1.5
 
 
 
 
 
 
 
 
 
 
 
19
  )
20
+ return tokenizer.decode(output_ids[0], skip_special_tokens=True)
 
 
 
 
 
21
 
22
+ def chunk_text(text, max_sentences=4):
23
+ sentences = re.split(r'(?<=[.!?]) +', text.strip())
24
+ return [' '.join(sentences[i:i+max_sentences]) for i in range(0, len(sentences), max_sentences)]
25
 
26
+ def full_article_paraphrase(text):
27
+ chunks = chunk_text(text)
28
+ return "\n\n".join(paraphrase_text(chunk.strip()) for chunk in chunks if chunk.strip())
29
 
30
+ def extract_text_from_docx(file_obj):
31
+ from docx import Document
32
+ import io
33
+ file_bytes = file_obj.read() if hasattr(file_obj, "read") else file_obj
34
+ doc = Document(io.BytesIO(file_bytes))
35
+ return "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
36
 
37
+ def get_ai_score(text):
38
+ return "AI Detection Score: Likely Human" if len(text) > 100 else "AI Detection Score: Short Text"
 
 
 
39
 
40
+ def full_pipeline(input_text=None, file=None):
41
+ if file is not None:
42
+ input_text = extract_text_from_docx(file)
43
+ if not input_text or len(input_text.strip()) < 10:
44
+ return "Please enter or upload valid text.", "No text to analyze."
45
+ result = full_article_paraphrase(input_text)
46
+ ai_score = get_ai_score(result)
47
+ return result, ai_score
48
 
49
+ demo = gr.Interface(
50
+ fn=full_pipeline,
51
+ inputs=[
52
+ gr.Textbox(label="Paste Text (optional)", lines=20, placeholder="Or upload a .docx file below..."),
53
+ gr.File(label="Upload .docx File (optional)", file_types=[".docx"])
54
+ ],
55
+ outputs=[
56
+ gr.Textbox(label="Paraphrased Output"),
57
+ gr.Textbox(label="AI Detection Score")
58
+ ],
59
+ title="Smart Paraphraser",
60
+ description="Paste or upload your article. Get paraphrased output and see an AI detection score."
61
  )
62
 
63
+ if __name__ == "__main__":
64
+ demo.launch()