tyfsadik commited on
Commit
1903f1b
Β·
verified Β·
1 Parent(s): b536917

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -390
app.py CHANGED
@@ -1,118 +1,118 @@
1
- # app.py β€” TYF Sadik AI Studio v2.0
2
- # Professional Paraphrasing β€’ Grammar Correction β€’ AI Detection
3
- # πŸ”— https://tyfsadik.org | πŸ’Ό MD. Taki Yasir Faraji Sadik | πŸ“ North York, ON, Canada
4
-
5
- import os, re, string, random, time, json
6
  import gradio as gr
7
- import nltk
8
  import spacy
9
  import subprocess
10
- from nltk.corpus import wordnet, stopwords
11
- from nltk.tokenize import word_tokenize, sent_tokenize
 
 
12
  from spellchecker import SpellChecker
13
-
14
- # ─────────────────────────────────────────────────────────────
15
- # 🎨 BRANDING & METADATA β€” MAKE TYF SADIK FAMOUS
16
- # ─────────────────────────────────────────────────────────────
17
- BRANDING = {
18
- "name": "TYF Sadik",
19
- "full_name": "MD. Taki Yasir Faraji Sadik",
20
- "title": "Cybersecurity & IT Professional",
21
- "roles": ["SOC Analyst", "Network Analyst", "Cloud Infrastructure"],
22
- "location": "North York, ON, Canada",
23
- "education": "Diploma in Computer Networking & Cybersecurity β€” Seneca Polytechnic",
24
- "certs": ["CompTIA A+", "Azure Fundamentals", "AWS Cloud Practitioner", "ISC2 CC"],
25
- "website": "https://tyfsadik.org",
26
- "github": "https://github.com/TYFSADIK",
27
- "linkedin": "https://linkedin.com/in/tyfsadik",
28
- "version": "v2.0-PRO",
29
- "tagline": "Empowering Secure, Human-Centric AI Solutions"
30
- }
31
-
32
- # ─────────────────────────────────────────────────────────────
33
- # πŸ“¦ DEPENDENCIES & SETUP
34
- # ─────────────────────────────────────────────────────────────
35
- nltk.download('punkt', quiet=True)
36
- nltk.download('stopwords', quiet=True)
37
- nltk.download('averaged_perceptron_tagger', quiet=True)
38
- nltk.download('wordnet', quiet=True)
39
- nltk.download('omw-1.4', quiet=True)
40
- nltk.download('punkt_tab', quiet=True)
41
-
42
  stop_words = set(stopwords.words("english"))
 
 
43
  exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
44
  exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
45
 
46
- # Spell checker
 
 
 
47
  spell = SpellChecker()
48
 
49
- # SpaCy model
50
  try:
51
  nlp = spacy.load("en_core_web_sm")
52
  except OSError:
53
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], capture_output=True)
54
  nlp = spacy.load("en_core_web_sm")
55
 
56
- # ─────────────────────────────────────────────────────────────
57
- # ✨ CORE FUNCTIONS β€” ENHANCED & OPTIMIZED
58
- # ─────────────────────────────────────────────────────────────
59
  def plagiarism_removal(text):
60
- """Replace words with synonyms while preserving grammar and style"""
61
- def replace_word(word):
62
  if word.lower() in stop_words or word.lower() in exclude_words or word in string.punctuation:
63
  return word
64
 
 
65
  synonyms = set()
66
  for syn in wordnet.synsets(word):
67
  for lemma in syn.lemmas():
68
- name = lemma.name()
69
- if "_" not in name and name.isalpha() and name.lower() != word.lower():
70
- synonyms.add(name)
71
-
72
- pos = nltk.pos_tag([word])[0][1]
73
- if pos in exclude_tags:
74
  return word
75
 
76
- filtered = [s for s in synonyms if nltk.pos_tag([s])[0][1] == pos]
77
- if not filtered:
 
78
  return word
79
-
80
- choice = random.choice(filtered)
81
- return choice.title() if word.istitle() else choice
82
 
83
- tokens = word_tokenize(text)
84
- result = [replace_word(t) for t in tokens]
 
 
 
 
 
 
85
 
86
- # Fix punctuation spacing
87
- cleaned = []
88
- for i, token in enumerate(result):
89
- if token in string.punctuation and i > 0 and cleaned:
90
- cleaned[-1] += token
91
  else:
92
- cleaned.append(token)
93
-
94
- return " ".join(cleaned)
 
 
 
 
95
 
96
  def remove_redundant_words(text):
97
- """Remove filler words that weaken writing"""
98
  doc = nlp(text)
99
- fillers = {"actually", "basically", "literally", "really", "very", "just", "simply", "quite"}
100
- return ' '.join([t.text for t in doc if t.text.lower() not in fillers])
 
101
 
102
  def fix_punctuation_spacing(text):
103
- """Ensure proper spacing around punctuation"""
104
- text = re.sub(r'\s+([,.!?;:\'"])', r'\1', text)
105
- text = re.sub(r'([,.!?;:\'"])\s+', r'\1 ', text)
106
- return text.strip()
 
 
 
 
 
 
 
 
107
 
108
  def fix_possessives(text):
109
- """Correct possessive apostrophes"""
110
- return re.sub(r"(\w)\s*'\s*s", r"\1's", text)
111
 
112
- def capitalize_properly(text):
113
- """Capitalize sentences and proper nouns"""
114
  doc = nlp(text)
115
- result = []
 
116
  for sent in doc.sents:
117
  sentence = []
118
  for token in sent:
@@ -122,324 +122,110 @@ def capitalize_properly(text):
122
  sentence.append(token.text.capitalize())
123
  else:
124
  sentence.append(token.text)
125
- result.append(' '.join(sentence))
126
- return ' '.join(result)
127
-
128
- def ensure_proper_sentences(text):
129
- """Ensure every sentence ends with punctuation and starts capitalized"""
130
- sentences = re.split(r'(?<=[.!?])\s+', text.strip())
131
- fixed = []
132
- for s in sentences:
133
- if s:
134
- s = s[0].upper() + s[1:] if len(s) > 1 else s.upper()
135
- if not re.search(r'[.!?]$', s):
136
- s += '.'
137
- fixed.append(s)
138
- return ' '.join(fixed)
139
-
140
- def correct_articles(text):
141
- """Fix a/an usage"""
 
142
  doc = nlp(text)
143
- result = []
144
  for token in doc:
145
- if token.text.lower() in ['a', 'an'] and token.nbor(1, default=None):
146
- next_word = token.nbor(1).text.lower()
147
- if token.text.lower() == "a" and next_word[0] in "aeiou":
148
- result.append("An" if token.text[0].isupper() else "an")
149
- elif token.text.lower() == "an" and next_word[0] not in "aeiou":
150
- result.append("A" if token.text[0].isupper() else "a")
 
 
 
 
 
 
 
 
 
 
 
151
  else:
152
- result.append(token.text)
153
  else:
154
- result.append(token.text)
155
- return ' '.join(result)
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
  def correct_spelling(text):
158
- """Basic spell correction using pyspellchecker"""
159
  words = word_tokenize(text)
160
- corrected = []
 
161
  for word in words:
162
- if word in string.punctuation or word.isdigit():
163
- corrected.append(word)
 
164
  else:
165
- candidates = spell.candidates(word)
166
- corrected.append(candidates.pop() if candidates else word)
167
- return ' '.join(corrected)
168
-
169
- def full_pipeline(text):
170
- """Execute complete paraphrasing + correction pipeline"""
171
- if not text or len(text.strip()) < 10:
172
- return "[!] Please enter at least 10 characters of text."
173
-
174
- # Process paragraph by paragraph to preserve structure
175
- paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
176
- results = []
177
-
178
- for para in paragraphs:
179
- cleaned = remove_redundant_words(para)
180
- synonymed = plagiarism_removal(cleaned)
181
- capped = capitalize_properly(synonymed)
182
- sent_fixed = ensure_proper_sentences(capped)
183
- articles_fixed = correct_articles(sent_fixed)
184
- spelling_fixed = correct_spelling(articles_fixed)
185
- possessives_fixed = fix_possessives(spelling_fixed)
186
- punct_fixed = fix_punctuation_spacing(possessives_fixed)
187
- results.append(punct_fixed)
188
-
189
- return '\n\n'.join(results)
190
-
191
- def get_text_stats(text):
192
- """Generate useful metrics about the text"""
193
- if not text:
194
- return {}
195
- words = word_tokenize(text)
196
- sents = sent_tokenize(text)
197
- return {
198
- "words": len([w for w in words if w.isalpha()]),
199
- "sentences": len(sents),
200
- "avg_sentence_length": round(len(words) / max(len(sents), 1), 1),
201
- "unique_words": len(set(w.lower() for w in words if w.isalpha())),
202
- "readability": "Easy" if len(words) < 100 else "Medium" if len(words) < 300 else "Advanced"
203
- }
204
-
205
- # ─────────────────────────────────────────────────────────────
206
- # 🎨 CYBERPUNK TERMINAL UI β€” TYF SADIK STYLE
207
- # ─────────────────────────────────────────────────────────────
208
- TERMINAL_CSS = f"""
209
- @import url('https://fonts.googleapis.com/css2?family=Share+Tech+Mono&family=Orbitron:wght@700;900&display=swap');
210
- *, body, .gradio-container {{
211
- font-family: 'Share Tech Mono', monospace !important;
212
- background: #030803 !important;
213
- color: #00ff41 !important;
214
- }}
215
- .gr-button {{
216
- font-family: 'Orbitron', monospace !important;
217
- text-transform: uppercase !important;
218
- letter-spacing: 2px !important;
219
- font-weight: 700 !important;
220
- border-radius: 2px !important;
221
- transition: all 0.15s ease !important;
222
- }}
223
- .gr-button-primary {{
224
- background: #002200 !important;
225
- border: 1px solid #00ff41 !important;
226
- color: #00ff41 !important;
227
- }}
228
- .gr-button-primary:hover {{
229
- background: #00ff41 !important;
230
- color: #000 !important;
231
- box-shadow: 0 0 22px rgba(0,255,65,0.7) !important;
232
- transform: translateY(-1px) !important;
233
- }}
234
- .gr-button-secondary {{
235
- background: #000 !important;
236
- border: 1px solid #004400 !important;
237
- color: #00aa33 !important;
238
- }}
239
- .gr-button-secondary:hover {{
240
- border-color: #00ff41 !important;
241
- color: #00ff41 !important;
242
- }}
243
- label, .gr-form-label {{
244
- color: #00cc44 !important;
245
- text-transform: uppercase !important;
246
- letter-spacing: 1px !important;
247
- font-size: 0.8em !important;
248
- }}
249
- textarea, input, select {{
250
- background: #000 !important;
251
- color: #00ff41 !important;
252
- border: 1px solid #003300 !important;
253
- border-radius: 2px !important;
254
- font-family: inherit !important;
255
- }}
256
- textarea:focus, input:focus, select:focus {{
257
- border-color: #00ff41 !important;
258
- box-shadow: 0 0 12px rgba(0,255,65,0.25) !important;
259
- }}
260
- .gr-panel, .gr-box, .gr-form {{
261
- background: rgba(0,12,0,0.92) !important;
262
- border: 1px solid #001a00 !important;
263
- }}
264
- .gr-accordion {{
265
- background: rgba(0,8,0,0.95) !important;
266
- border: 1px solid #001a00 !important;
267
- }}
268
- footer {{ display: none !important; }}
269
- ::-webkit-scrollbar {{ width: 5px; background: #000; }}
270
- ::-webkit-scrollbar-thumb {{ background: #003300; border-radius: 2px; }}
271
- ::-webkit-scrollbar-thumb:hover {{ background: #00ff41; }}
272
- .brand-highlight {{ color: #00ff41 !important; font-weight: bold; }}
273
- .cert-badge {{
274
- display: inline-block;
275
- background: #001a00;
276
- border: 1px solid #004400;
277
- padding: 2px 8px;
278
- margin: 2px;
279
- border-radius: 2px;
280
- font-size: 0.75em;
281
- }}
282
- """
283
-
284
- def create_interface():
285
- with gr.Blocks(title=f"{BRANDING['name']} AI Studio β€’ {BRANDING['version']}", css=TERMINAL_CSS) as demo:
286
-
287
- # ─── HEADER: PERSONAL BRANDING ───
288
- gr.HTML(f"""
289
- <div style="text-align:center; padding:24px 0 16px 0; border-bottom:1px solid #002200; margin-bottom:16px;">
290
- <div style="font-family:'Orbitron',monospace; font-size:2.4em; font-weight:900;
291
- color:#00ff41; letter-spacing:4px; text-shadow:0 0 12px rgba(0,255,65,0.5);">
292
- {BRANDING['name'].upper()}
293
- </div>
294
- <div style="font-family:'Orbitron',monospace; font-size:1.1em; color:#00cc44;
295
- margin-top:4px; letter-spacing:3px;">{BRANDING['version']} β€’ {BRANDING['tagline']}</div>
296
- <div style="color:#005522; font-size:0.82em; margin-top:12px; line-height:1.9;">
297
- <span class="brand-highlight">{BRANDING['full_name']}</span><br>
298
- {BRANDING['title']} β€’ {' | '.join(BRANDING['roles'])}<br>
299
- πŸŽ“ {BRANDING['education']}<br>
300
- {' '.join(f'<span class="cert-badge">{c}</span>' for c in BRANDING['certs'])}<br>
301
- πŸ“ {BRANDING['location']}<br><br>
302
- πŸ”— <a href="{BRANDING['website']}" target="_blank" style="color:#00aa44; text-decoration:none;">{BRANDING['website']}</a>
303
- &nbsp;&nbsp;
304
- πŸ’» <a href="{BRANDING['github']}" target="_blank" style="color:#00aa44; text-decoration:none;">{BRANDING['github']}</a>
305
- &nbsp;&nbsp;
306
- πŸ”— <a href="{BRANDING['linkedin']}" target="_blank" style="color:#00aa44; text-decoration:none;">LinkedIn</a>
307
- </div>
308
- </div>""")
309
-
310
- # ─── TAB 1: PARAPHRASING & GRAMMAR CORRECTION ───
311
- with gr.Tab("✍️ Paraphrase & Correct"):
312
- gr.HTML("<div style='color:#005522; font-size:0.78em; letter-spacing:2px; text-transform:uppercase; margin-bottom:8px;'>[INPUT] Your Text</div>")
313
- input_text = gr.Textbox(
314
- label="",
315
- placeholder="Paste your text here...\n\nβ€’ Minimum 10 characters\nβ€’ Supports paragraphs\nβ€’ Academic, professional, or casual content",
316
- lines=12,
317
- elem_classes="terminal-input"
318
- )
319
-
320
- with gr.Row():
321
- process_btn = gr.Button("β–Ά EXECUTE PARAPHRASE", variant="primary", size="lg")
322
- clear_btn = gr.Button("πŸ—‘ Clear", variant="secondary", size="sm")
323
-
324
- gr.HTML("<div style='color:#005522; font-size:0.78em; letter-spacing:2px; text-transform:uppercase; margin:16px 0 8px 0;'>[OUTPUT] Enhanced Text</div>")
325
- output_text = gr.Textbox(label="", lines=12, interactive=False, elem_classes="terminal-output")
326
-
327
- with gr.Row():
328
- copy_btn = gr.Button("πŸ“‹ Copy Result", variant="secondary", size="sm")
329
- stats_btn = gr.Button("πŸ“Š View Stats", variant="secondary", size="sm")
330
-
331
- stats_output = gr.JSON(label="Text Statistics", visible=False)
332
-
333
- # Examples to showcase quality
334
- with gr.Accordion("✨ Example Inputs", open=False):
335
- gr.Examples(
336
- examples=[
337
- "I am excited to apply for the Support Engineer position. As a cybersecurity professional, I bring hands-on experience in troubleshooting and customer support.",
338
- "Furthermore, it is important to note that the implementation of this strategy will facilitate substantial improvements in operational efficiency.",
339
- "The company utilized advanced algorithms to leverage big data analytics, consequently enhancing their decision-making processes."
340
- ],
341
- inputs=[input_text],
342
- label="Click to load example"
343
- )
344
-
345
- # ─── TAB 2: AI DETECTION (Placeholder for future expansion) ───
346
- with gr.Tab("πŸ€– AI Detection"):
347
- gr.HTML("""
348
- <div style="text-align:center; padding:40px; color:#006633;">
349
- <div style="font-size:1.2em; margin-bottom:12px;">πŸ”’ Advanced AI Detection Module</div>
350
- <div style="margin-bottom:20px;">Coming soon in v2.1 β€” Powered by custom ensemble models</div>
351
- <div style="font-size:0.85em; color:#004422;">
352
- Follow <a href="https://github.com/TYFSADIK" target="_blank" style="color:#00aa44;">@TYFSADIK</a>
353
- for updates on new features!
354
- </div>
355
- </div>""")
356
- detect_input = gr.Textbox(label="Text to Analyze", lines=5, interactive=False)
357
- detect_btn = gr.Button("πŸ” Analyze (Coming Soon)", interactive=False)
358
- detect_output = gr.Textbox(label="Result", interactive=False)
359
-
360
- # ─── FOOTER: CALL TO ACTION & CREDITS ───
361
- gr.HTML(f"""
362
- <div style="background:#000800; border:1px solid #001a00; border-radius:2px; padding:20px; margin:24px 0; text-align:center; font-family:'Share Tech Mono',monospace;">
363
- <div style="color:#004422; font-family:'Orbitron',monospace; font-size:0.85em; margin-bottom:12px; letter-spacing:2px;">
364
- πŸ”— CONNECT WITH TYF SADIK
365
- </div>
366
- <div style="display:flex; flex-wrap:wrap; justify-content:center; gap:12px; margin-bottom:16px;">
367
- <a href="{BRANDING['website']}" target="_blank" style="background:#000; color:#00ff41; border:1px solid #004400; border-radius:2px; padding:8px 16px; text-decoration:none; font-weight:bold;">🌐 {BRANDING['website']}</a>
368
- <a href="{BRANDING['github']}" target="_blank" style="background:#000; color:#00ff41; border:1px solid #004400; border-radius:2px; padding:8px 16px; text-decoration:none; font-weight:bold;">πŸ’» GitHub</a>
369
- <a href="{BRANDING['linkedin']}" target="_blank" style="background:#000; color:#00ff41; border:1px solid #004400; border-radius:2px; padding:8px 16px; text-decoration:none; font-weight:bold;">πŸ”— LinkedIn</a>
370
- </div>
371
- <div style="color:#003311; font-size:0.80em; line-height:1.8;">
372
- {BRANDING['full_name']} ({BRANDING['name']}) β€’ {BRANDING['location']}<br>
373
- Cybersecurity & IT Professional β€’ SOC/Network Analyst β€’ Cloud Infrastructure<br>
374
- <span style="color:#002208;">Educational Tool β€’ Open Source β€’ Use Responsibly</span><br>
375
- <span style="color:#004422; font-size:0.85em;">⭐ If you found this useful, star the repo on GitHub!</span>
376
- </div>
377
- </div>""")
378
-
379
- # ─── EVENT HANDLERS ───
380
- def process_with_stats(text):
381
- result = full_pipeline(text)
382
- stats = get_text_stats(result)
383
- return result, stats, gr.update(visible=True)
384
-
385
- process_btn.click(
386
- fn=process_with_stats,
387
- inputs=[input_text],
388
- outputs=[output_text, stats_output, stats_output]
389
- )
390
-
391
- input_text.submit(
392
- fn=process_with_stats,
393
- inputs=[input_text],
394
- outputs=[output_text, stats_output, stats_output]
395
- )
396
-
397
- clear_btn.click(
398
- fn=lambda: ("", None, gr.update(visible=False)),
399
- inputs=[],
400
- outputs=[output_text, stats_output, stats_output]
401
- )
402
-
403
- copy_btn.click(
404
- fn=None,
405
- inputs=[output_text],
406
- outputs=[],
407
- js="""
408
- async (text) => {
409
- if (!text) return ['βœ— Nothing to copy'];
410
- try {
411
- await navigator.clipboard.writeText(text);
412
- return ['βœ“ Copied to clipboard!'];
413
- } catch (err) {
414
- return ['βœ— Copy failed'];
415
- }
416
- }
417
- """
418
- )
419
-
420
- stats_btn.click(
421
- fn=lambda t: (get_text_stats(t), gr.update(visible=True)) if t else (None, gr.update(visible=False)),
422
- inputs=[output_text],
423
- outputs=[stats_output, stats_output]
424
- )
425
-
426
- return demo
427
-
428
- # ─────────────────────────────────────────────────────────────
429
- # πŸš€ LAUNCH
430
- # ─────────────────────────────────────────────────────────────
431
- if __name__ == "__main__":
432
- print(f"\n{'='*60}")
433
- print(f" {BRANDING['name']} AI Studio β€’ {BRANDING['version']}")
434
- print(f" {BRANDING['tagline']}")
435
- print(f" πŸ”— {BRANDING['website']}")
436
- print(f"{'='*60}\n")
437
-
438
- app = create_interface()
439
- app.launch(
440
- server_name="0.0.0.0",
441
- server_port=7860,
442
- show_error=True,
443
- quiet=True,
444
- share=False # Set to True for public link
445
- )
 
1
+ import os
 
 
 
 
2
  import gradio as gr
3
+ from transformers import pipeline
4
  import spacy
5
  import subprocess
6
+ import nltk
7
+ from nltk.corpus import wordnet
8
+ from nltk.corpus import stopwords
9
+ from nltk.tokenize import word_tokenize
10
  from spellchecker import SpellChecker
11
+ import re
12
+ import string
13
+ import random
14
+
15
+ # Download necessary NLTK data
16
+ nltk.download('punkt')
17
+ nltk.download('stopwords')
18
+ nltk.download('averaged_perceptron_tagger')
19
+ nltk.download('averaged_perceptron_tagger_eng')
20
+ nltk.download('wordnet')
21
+ nltk.download('omw-1.4')
22
+ nltk.download('punkt_tab')
23
+
24
+ # Initialize stopwords
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  stop_words = set(stopwords.words("english"))
26
+
27
+ # Words we don't want to replace
28
  exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
29
  exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
30
 
31
+ # Initialize the English text classification pipeline for AI detection
32
+ pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
33
+
34
+ # Initialize the spell checker
35
  spell = SpellChecker()
36
 
37
+ # Ensure the SpaCy model is installed
38
  try:
39
  nlp = spacy.load("en_core_web_sm")
40
  except OSError:
41
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
42
  nlp = spacy.load("en_core_web_sm")
43
 
 
 
 
44
  def plagiarism_removal(text):
45
+ def plagiarism_remover(word):
 
46
  if word.lower() in stop_words or word.lower() in exclude_words or word in string.punctuation:
47
  return word
48
 
49
+ # Find synonyms
50
  synonyms = set()
51
  for syn in wordnet.synsets(word):
52
  for lemma in syn.lemmas():
53
+ if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
54
+ synonyms.add(lemma.name())
55
+
56
+ pos_tag_word = nltk.pos_tag([word])[0]
57
+
58
+ if pos_tag_word[1] in exclude_tags:
59
  return word
60
 
61
+ filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag_word[1]]
62
+
63
+ if not filtered_synonyms:
64
  return word
 
 
 
65
 
66
+ synonym_choice = random.choice(filtered_synonyms)
67
+
68
+ if word.istitle():
69
+ return synonym_choice.title()
70
+ return synonym_choice
71
+
72
+ para_split = word_tokenize(text)
73
+ final_text = [plagiarism_remover(word) for word in para_split]
74
 
75
+ corrected_text = []
76
+ for i in range(len(final_text)):
77
+ if final_text[i] in string.punctuation and i > 0:
78
+ corrected_text[-1] += final_text[i]
 
79
  else:
80
+ corrected_text.append(final_text[i])
81
+
82
+ return " ".join(corrected_text)
83
+
84
+ def predict_en(text):
85
+ res = pipeline_en(text)[0]
86
+ return res['label'], res['score']
87
 
88
  def remove_redundant_words(text):
 
89
  doc = nlp(text)
90
+ meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
91
+ filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
92
+ return ' '.join(filtered_text)
93
 
94
  def fix_punctuation_spacing(text):
95
+ words = text.split(' ')
96
+ cleaned_words = []
97
+ punctuation_marks = {',', '.', "'", '!', '?', ':'}
98
+
99
+ for word in words:
100
+ if cleaned_words and word and word[0] in punctuation_marks:
101
+ cleaned_words[-1] += word
102
+ else:
103
+ cleaned_words.append(word)
104
+
105
+ return ' '.join(cleaned_words).replace(' ,', ',').replace(' .', '.').replace(" '", "'") \
106
+ .replace(' !', '!').replace(' ?', '?').replace(' :', ':')
107
 
108
  def fix_possessives(text):
109
+ text = re.sub(r'(\w)\s\'\s?s', r"\1's", text)
110
+ return text
111
 
112
+ def capitalize_sentences_and_nouns(text):
 
113
  doc = nlp(text)
114
+ corrected_text = []
115
+
116
  for sent in doc.sents:
117
  sentence = []
118
  for token in sent:
 
122
  sentence.append(token.text.capitalize())
123
  else:
124
  sentence.append(token.text)
125
+ corrected_text.append(' '.join(sentence))
126
+
127
+ return ' '.join(corrected_text)
128
+
129
+ def force_first_letter_capital(text):
130
+ sentences = re.split(r'(?<=\w[.!?])\s+', text)
131
+ capitalized_sentences = []
132
+
133
+ for sentence in sentences:
134
+ if sentence:
135
+ capitalized_sentence = sentence[0].capitalize() + sentence[1:]
136
+ if not re.search(r'[.!?]$', capitalized_sentence):
137
+ capitalized_sentence += '.'
138
+ capitalized_sentences.append(capitalized_sentence)
139
+
140
+ return " ".join(capitalized_sentences)
141
+
142
+ def correct_tense_errors(text):
143
  doc = nlp(text)
144
+ corrected_text = []
145
  for token in doc:
146
+ if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
147
+ lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
148
+ corrected_text.append(lemma)
149
+ else:
150
+ corrected_text.append(token.text)
151
+ return ' '.join(corrected_text)
152
+
153
+ def correct_article_errors(text):
154
+ doc = nlp(text)
155
+ corrected_text = []
156
+ for token in doc:
157
+ if token.text in ['a', 'an']:
158
+ next_token = token.nbor(1)
159
+ if token.text == "a" and next_token.text[0].lower() in "aeiou":
160
+ corrected_text.append("an")
161
+ elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
162
+ corrected_text.append("a")
163
  else:
164
+ corrected_text.append(token.text)
165
  else:
166
+ corrected_text.append(token.text)
167
+ return ' '.join(corrected_text)
168
+
169
+ def ensure_subject_verb_agreement(text):
170
+ doc = nlp(text)
171
+ corrected_text = []
172
+ for token in doc:
173
+ if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
174
+ if token.tag_ == "NN" and token.head.tag_ != "VBZ":
175
+ corrected_text.append(token.head.lemma_ + "s")
176
+ elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":
177
+ corrected_text.append(token.head.lemma_)
178
+ corrected_text.append(token.text)
179
+ return ' '.join(corrected_text)
180
 
181
  def correct_spelling(text):
 
182
  words = word_tokenize(text)
183
+ corrected_words = []
184
+
185
  for word in words:
186
+ corrected_word = spell.candidates(word)
187
+ if corrected_word:
188
+ corrected_words.append(spell.candidates(word).pop()) # Choose the first candidate as the correction
189
  else:
190
+ corrected_words.append(word) # If it's not misspelled, keep the original word
191
+
192
+ return ' '.join(corrected_words)
193
+
194
+ def paraphrase_and_correct(text):
195
+ paragraphs = text.split("\n\n") # Split by paragraphs
196
+
197
+ # Process each paragraph separately
198
+ processed_paragraphs = []
199
+ for paragraph in paragraphs:
200
+ cleaned_text = remove_redundant_words(paragraph)
201
+ plag_removed = plagiarism_removal(cleaned_text)
202
+ paraphrased_text = capitalize_sentences_and_nouns(plag_removed)
203
+ paraphrased_text = force_first_letter_capital(paraphrased_text)
204
+ paraphrased_text = correct_article_errors(paraphrased_text)
205
+ paraphrased_text = correct_tense_errors(paraphrased_text)
206
+ paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
207
+ paraphrased_text = fix_possessives(paraphrased_text)
208
+ paraphrased_text = correct_spelling(paraphrased_text) # Spelling correction
209
+ paraphrased_text = fix_punctuation_spacing(paraphrased_text)
210
+ processed_paragraphs.append(paraphrased_text)
211
+
212
+ return "\n\n".join(processed_paragraphs) # Reassemble the text with paragraphs
213
+
214
+ # Gradio app setup
215
+ with gr.Blocks() as demo:
216
+ with gr.Tab("AI Detection"):
217
+ t1 = gr.Textbox(lines=5, label='Text')
218
+ button1 = gr.Button("πŸ€– Predict!")
219
+ label1 = gr.Textbox(lines=1, label='Predicted Label πŸŽƒ')
220
+ score1 = gr.Textbox(lines=1, label='Prob')
221
+
222
+ button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
223
+
224
+ with gr.Tab("Paraphrasing & Grammar Correction"):
225
+ t2 = gr.Textbox(lines=5, label='Enter text for paraphrasing and grammar correction')
226
+ button2 = gr.Button("πŸ”„ Paraphrase and Correct")
227
+ result2 = gr.Textbox(lines=5, label='Corrected Text')
228
+
229
+ button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
230
+
231
+ demo.launch(share=True)