Eaz123 commited on
Commit
1e32c00
·
verified ·
1 Parent(s): ed54ab4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -129
app.py CHANGED
@@ -1,17 +1,31 @@
1
  import gradio as gr
2
  from transformers import T5ForConditionalGeneration, T5Tokenizer
3
  import pandas as pd
 
 
 
 
4
 
5
  # Load the model
6
  model_name = "ramsrigouthamg/t5_paraphraser"
7
  tokenizer = T5Tokenizer.from_pretrained(model_name)
8
  model = T5ForConditionalGeneration.from_pretrained(model_name)
9
 
 
 
 
 
 
 
 
 
 
 
 
10
  def paraphrase_text(text, creativity_level=3, tone="neutral"):
11
  if not text.strip():
12
- return "", "", 0, 0
13
 
14
- # Adjust generation parameters
15
  num_beams = 3 + creativity_level
16
  temperature = 0.7 + (creativity_level * 0.15)
17
 
@@ -37,161 +51,187 @@ def paraphrase_text(text, creativity_level=3, tone="neutral"):
37
 
38
  paraphrased_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
39
 
40
- # Calculate word counts
41
  original_count = len(text.split())
42
  paraphrased_count = len(paraphrased_text.split())
 
43
 
44
- return paraphrased_text, original_count, paraphrased_count
 
 
 
 
 
45
 
46
- def create_comparison(original, paraphrased, orig_count, para_count):
47
- comparison = pd.DataFrame({
48
- "Metric": ["Text", "Word Count"],
49
- "Original": [original, orig_count],
50
- "Paraphrased": [paraphrased, para_count]
51
  })
52
- return comparison
53
 
54
- # Custom HTML with Tailwind CSS
55
- custom_html = """
56
- <script src="https://cdn.tailwindcss.com"></script>
57
- <style>
58
- .gradio-container {
 
 
 
59
  font-family: 'Inter', sans-serif;
60
- max-width: 100% !important;
61
- }
62
- .gradio-interface {
63
- padding: 0 !important;
64
- }
65
- .gradio-input, .gradio-output {
66
- border: 1px solid #e5e7eb !important;
67
- border-radius: 0.5rem !important;
68
- }
69
- .gradio-input textarea, .gradio-output textarea {
70
- padding: 1rem !important;
71
- }
72
- .gradio-button {
73
- background-color: #4f46e5 !important;
74
- color: white !important;
75
- border-radius: 0.375rem !important;
76
- padding: 0.5rem 1rem !important;
77
- font-weight: 500 !important;
78
- }
79
- .gradio-button:hover {
80
- background-color: #4338ca !important;
81
- }
82
- .gradio-slider .gradio-slider-handle {
83
- background: #4f46e5 !important;
84
- }
85
- .gradio-slider .gradio-slider-bar {
86
- background: #c7d2fe !important;
87
- }
88
- </style>
 
 
 
89
  """
90
 
91
- with gr.Blocks(theme=gr.themes.Base(), css=custom_html) as demo:
92
- # Header Section
93
- gr.HTML("""
94
- <div class="bg-indigo-600 text-white py-6 px-4 rounded-t-lg">
95
- <div class="max-w-4xl mx-auto text-center">
96
- <h1 class="text-3xl font-bold mb-2">Professional AI Paraphraser</h1>
97
- <p class="text-indigo-100">Transform your text while preserving meaning with advanced T5 AI</p>
98
- </div>
99
- </div>
100
- """)
101
 
102
  # Main Content
103
- with gr.Row().style(equal_height=True):
 
104
  with gr.Column(scale=1):
105
- # Input Section
106
- gr.HTML("""<div class="p-4 bg-white rounded-lg shadow-sm">""")
107
- input_text = gr.Textbox(
108
- lines=8,
109
- placeholder="Enter text to paraphrase...",
110
- label="Original Text",
111
- elem_classes=["border", "rounded-lg", "p-4", "w-full"]
112
- ).style(container=False)
113
-
114
- with gr.Row():
115
- creativity = gr.Slider(
116
- 1, 5, value=3,
117
- label="Creativity Level",
118
- info="1 = Conservative, 5 = Highly Creative",
119
- interactive=True
120
  )
121
- tone = gr.Dropdown(
122
- ["neutral", "formal", "casual", "academic"],
123
- value="neutral",
124
- label="Output Tone",
125
- interactive=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  )
127
-
128
- submit_btn = gr.Button(
129
- "Paraphrase",
130
- variant="primary"
131
- ).style(full_width=True)
132
- gr.HTML("""</div>""")
133
 
 
134
  with gr.Column(scale=1):
135
- # Output Section
136
- gr.HTML("""<div class="p-4 bg-white rounded-lg shadow-sm h-full">""")
137
- output_text = gr.Textbox(
138
- lines=8,
139
- label="Paraphrased Text",
140
- interactive=False,
141
- elem_classes=["border", "rounded-lg", "p-4", "w-full"]
142
- ).style(container=False)
143
-
144
- # Stats Section
145
- gr.HTML("""
146
- <div class="mt-4 p-4 bg-gray-50 rounded-lg">
147
- <h3 class="font-medium text-gray-900 mb-3">Statistics</h3>
148
- <div class="grid grid-cols-3 gap-4">
149
- """)
150
-
151
- with gr.Row():
152
- orig_count = gr.Number(
153
- label="Original Words",
154
- interactive=False
155
- ).style(container=False)
156
- para_count = gr.Number(
157
- label="Paraphrased Words",
158
- interactive=False
159
- ).style(container=False)
160
- similarity = gr.Number(
161
- label="Similarity Score",
 
 
162
  interactive=False,
163
- visible=False # Placeholder for future feature
164
- ).style(container=False)
165
 
166
- gr.HTML("""</div></div></div>""")
167
-
168
- # Comparison Section
169
- gr.HTML("""<div class="mt-4 p-4 bg-white rounded-lg shadow-sm">""")
170
- with gr.Accordion("Detailed Comparison", open=False):
171
- comparison_table = gr.DataFrame(
172
- headers=["Metric", "Original", "Paraphrased"],
173
- datatype=["str", "str", "str"],
174
- interactive=False,
175
- elem_classes=["w-full"]
176
- )
177
- gr.HTML("""</div>""")
178
 
179
  # Footer
180
  gr.HTML("""
181
- <div class="mt-6 py-4 text-center text-gray-500 text-sm">
182
- <p>Powered by T5 Transformer | Embeddable in any website</p>
183
  </div>
184
  """)
185
 
186
- # Event handlers
187
  submit_btn.click(
188
- fn=paraphrase_text,
189
- inputs=[input_text, creativity, tone],
190
- outputs=[output_text, orig_count, para_count]
191
  ).then(
192
  fn=create_comparison,
193
- inputs=[input_text, output_text, orig_count, para_count],
194
  outputs=comparison_table
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  )
196
 
197
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  from transformers import T5ForConditionalGeneration, T5Tokenizer
3
  import pandas as pd
4
+ import pdfplumber
5
+ from io import BytesIO
6
+ import difflib
7
+ import time
8
 
9
  # Load the model
10
  model_name = "ramsrigouthamg/t5_paraphraser"
11
  tokenizer = T5Tokenizer.from_pretrained(model_name)
12
  model = T5ForConditionalGeneration.from_pretrained(model_name)
13
 
14
+ def extract_text_from_file(file):
15
+ if file.name.endswith('.pdf'):
16
+ with pdfplumber.open(file) as pdf:
17
+ return "\n".join([page.extract_text() for page in pdf.pages])
18
+ elif file.name.endswith(('.txt', '.docx')):
19
+ return open(file.name, 'r', encoding='utf-8').read()
20
+ return ""
21
+
22
+ def calculate_similarity(original, paraphrased):
23
+ return round(difflib.SequenceMatcher(None, original, paraphrased).ratio() * 100)
24
+
25
  def paraphrase_text(text, creativity_level=3, tone="neutral"):
26
  if not text.strip():
27
+ return "", "", 0, 0, 0
28
 
 
29
  num_beams = 3 + creativity_level
30
  temperature = 0.7 + (creativity_level * 0.15)
31
 
 
51
 
52
  paraphrased_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
53
 
 
54
  original_count = len(text.split())
55
  paraphrased_count = len(paraphrased_text.split())
56
+ similarity = calculate_similarity(text, paraphrased_text)
57
 
58
+ return paraphrased_text, original_count, paraphrased_count, similarity
59
+
60
+ def process_input(text, file, creativity, tone):
61
+ if file is not None:
62
+ text = extract_text_from_file(file)
63
+ return paraphrase_text(text, creativity, tone)
64
 
65
+ def create_comparison(original, paraphrased, orig_count, para_count, similarity):
66
+ return pd.DataFrame({
67
+ "Metric": ["Text", "Word Count", "Similarity"],
68
+ "Original": [original, orig_count, f"{similarity}%"],
69
+ "Paraphrased": [paraphrased, para_count, f"{similarity}%"]
70
  })
 
71
 
72
+ custom_css = """
73
+ :root {
74
+ --primary: #4f46e5;
75
+ --primary-dark: #4338ca;
76
+ --text: #1f2937;
77
+ --light-bg: #f9fafb;
78
+ }
79
+ .gradio-container {
80
  font-family: 'Inter', sans-serif;
81
+ max-width: 1200px !important;
82
+ margin: 0 auto !important;
83
+ }
84
+ .header {
85
+ background: linear-gradient(135deg, var(--primary) 0%, var(--primary-dark) 100%);
86
+ border-radius: 12px 12px 0 0;
87
+ }
88
+ .card {
89
+ background: white;
90
+ border-radius: 12px;
91
+ box-shadow: 0 4px 24px rgba(0,0,0,0.08);
92
+ padding: 24px;
93
+ margin-bottom: 24px;
94
+ }
95
+ .stats-card {
96
+ background: var(--light-bg);
97
+ border-radius: 12px;
98
+ padding: 16px;
99
+ }
100
+ .gradio-button {
101
+ background: var(--primary) !important;
102
+ transition: all 0.2s ease !important;
103
+ }
104
+ .gradio-button:hover {
105
+ background: var(--primary-dark) !important;
106
+ transform: translateY(-1px);
107
+ }
108
+ .file-upload {
109
+ border: 2px dashed #d1d5db !important;
110
+ border-radius: 8px !important;
111
+ padding: 20px !important;
112
+ }
113
  """
114
 
115
+ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css) as demo:
116
+ # Header
117
+ with gr.Column(elem_classes=["header"]):
118
+ gr.HTML("""
119
+ <div class="text-center py-8 text-white">
120
+ <h1 class="text-3xl font-bold mb-2">Advanced AI Paraphraser Pro</h1>
121
+ <p class="text-indigo-100 opacity-90">Enterprise-grade text transformation with semantic analysis</p>
122
+ </div>
123
+ """)
 
124
 
125
  # Main Content
126
+ with gr.Row():
127
+ # Input Panel
128
  with gr.Column(scale=1):
129
+ with gr.Column(elem_classes=["card"]):
130
+ gr.Markdown("### Input Text")
131
+ input_text = gr.Textbox(
132
+ lines=8,
133
+ placeholder="Paste your content here...",
134
+ label="Original Text",
135
+ elem_id="input-text"
 
 
 
 
 
 
 
 
136
  )
137
+
138
+ # File Upload
139
+ file_upload = gr.File(
140
+ label="Or upload document (PDF/TXT)",
141
+ file_types=[".pdf", ".txt", ".docx"],
142
+ elem_classes=["file-upload"]
143
+ )
144
+
145
+ with gr.Row():
146
+ creativity = gr.Slider(
147
+ 1, 5, value=3,
148
+ label="Creativity Level",
149
+ interactive=True,
150
+ info="Higher values produce more creative rewrites"
151
+ )
152
+ tone = gr.Dropdown(
153
+ ["neutral", "formal", "casual", "academic", "professional"],
154
+ value="professional",
155
+ label="Output Tone",
156
+ interactive=True
157
+ )
158
+
159
+ submit_btn = gr.Button(
160
+ "Paraphrase Content",
161
+ variant="primary",
162
+ size="lg"
163
  )
 
 
 
 
 
 
164
 
165
+ # Output Panel
166
  with gr.Column(scale=1):
167
+ with gr.Column(elem_classes=["card"]):
168
+ gr.Markdown("### Paraphrased Output")
169
+ output_text = gr.Textbox(
170
+ lines=8,
171
+ label="Rewritten Text",
172
+ interactive=True,
173
+ elem_id="output-text"
174
+ )
175
+
176
+ # Action Buttons
177
+ with gr.Row():
178
+ copy_btn = gr.Button("Copy to Clipboard", size="sm")
179
+ download_btn = gr.Button("Download Result", size="sm")
180
+
181
+ # Stats Card
182
+ with gr.Column(elem_classes=["stats-card"]):
183
+ gr.Markdown("**Text Analysis**")
184
+ with gr.Row():
185
+ orig_count = gr.Number(label="Original Words", precision=0)
186
+ para_count = gr.Number(label="Paraphrased Words", precision=0)
187
+ similarity = gr.Number(label="Similarity Score", suffix="%")
188
+
189
+ # Advanced Analysis
190
+ with gr.Column(elem_classes=["card"]):
191
+ with gr.Tabs():
192
+ with gr.TabItem("Detailed Comparison"):
193
+ comparison_table = gr.DataFrame(
194
+ headers=["Metric", "Original", "Paraphrased"],
195
+ datatype=["str", "str", "str"],
196
  interactive=False,
197
+ wrap=True
198
+ )
199
 
200
+ with gr.TabItem("Text Diff"):
201
+ diff_display = gr.HTML()
 
 
 
 
 
 
 
 
 
 
202
 
203
  # Footer
204
  gr.HTML("""
205
+ <div class="text-center py-4 text-gray-500 text-sm">
206
+ <p>© 2024 AI Paraphraser Pro | Scholar Writer</p>
207
  </div>
208
  """)
209
 
210
+ # Event Handlers
211
  submit_btn.click(
212
+ fn=process_input,
213
+ inputs=[input_text, file_upload, creativity, tone],
214
+ outputs=[output_text, orig_count, para_count, similarity]
215
  ).then(
216
  fn=create_comparison,
217
+ inputs=[input_text, output_text, orig_count, para_count, similarity],
218
  outputs=comparison_table
219
+ ).then(
220
+ fn=lambda o, p: difflib.HtmlDiff().make_table(o.splitlines(), p.splitlines()),
221
+ inputs=[input_text, output_text],
222
+ outputs=diff_display
223
+ )
224
+
225
+ copy_btn.click(
226
+ fn=None,
227
+ inputs=output_text,
228
+ js="(text) => { navigator.clipboard.writeText(text); return 'Copied!'; }"
229
+ )
230
+
231
+ download_btn.click(
232
+ fn=lambda t: (t, "paraphrased_result.txt"),
233
+ inputs=output_text,
234
+ outputs=gr.File(label="Downloading...", visible=False)
235
  )
236
 
237
  if __name__ == "__main__":