Eaz123 commited on
Commit
d31a082
·
verified ·
1 Parent(s): 87542ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -53
app.py CHANGED
@@ -6,96 +6,149 @@ import tempfile
6
  from pathlib import Path
7
  import difflib
8
  import time
9
- from typing import Optional
 
 
 
 
 
 
 
 
 
10
 
11
  # ========== MODEL SETUP ==========
12
- device = "cuda" if torch.cuda.is_available() else "cpu"
13
- model_name = "ramsrigouthamg/t5_paraphraser"
14
- tokenizer = T5Tokenizer.from_pretrained(model_name)
15
- model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)
16
- model.eval()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # ========== UTILITIES ==========
19
- def cleanup_file(file_path: Optional[str]):
20
- """Securely delete temporary files"""
21
  if file_path and Path(file_path).exists():
22
  try:
23
  Path(file_path).unlink()
 
24
  except Exception as e:
25
- print(f"Cleanup error: {e}")
26
 
27
- def extract_text(file_obj) -> tuple[str, Optional[str]]:
28
- """Handle file uploads with automatic cleanup"""
29
  temp_path = None
30
  try:
31
  if file_obj.name.endswith('.pdf'):
32
- # Create temp file (auto-deleted later)
33
- temp_path = Path(tempfile.mktemp(suffix='.pdf'))
34
- temp_path.write_bytes(file_obj.read())
 
35
 
36
  with pdfplumber.open(temp_path) as pdf:
37
- text = "\n".join(page.extract_text() or "" for page in pdf.pages[:3]) # Limit to 3 pages
38
- return text[:5000], str(temp_path) # Limit to 5000 chars
 
 
 
39
 
40
  # Handle text files
41
- return file_obj.read().decode('utf-8')[:5000], None
 
42
 
43
  except Exception as e:
 
44
  if temp_path:
45
  cleanup_file(temp_path)
46
- raise gr.Error(f"File processing error: {str(e)}")
47
 
48
  # ========== CORE FUNCTION ==========
49
- def process_request(file_obj, text_input, creativity=3, tone="professional"):
50
- """Main processing pipeline with progress tracking"""
 
 
 
 
 
51
  start_time = time.time()
52
  temp_file = None
53
  progress = []
54
 
55
  try:
 
 
 
 
56
  # Process input
57
  if file_obj:
58
  text, temp_file = extract_text(file_obj)
59
- progress.append("📄 File processed")
60
  else:
61
- text = text_input[:5000] # Character limit
62
- progress.append("📝 Text received")
63
 
64
  if not text.strip():
65
  return "", 0, 0, 0, progress
66
 
67
- # Chunk processing
68
  chunks = [text[i:i+400] for i in range(0, len(text), 400)]
69
  outputs = []
70
 
71
- with torch.no_grad():
72
- for i, chunk in enumerate(chunks):
73
- inputs = tokenizer(
74
- f"paraphrase: {chunk} </s>",
75
- max_length=256,
76
- padding="max_length",
77
- return_tensors="pt",
78
- truncation=True
79
- ).to(device)
80
-
81
- outputs.append(tokenizer.decode(
82
- model.generate(
83
- **inputs,
84
- max_length=256,
85
- num_beams=3 + creativity,
86
- temperature=0.7 + (creativity * 0.15),
87
- early_stopping=True
88
- )[0],
89
- skip_special_tokens=True
90
- ))
91
- progress.append(f"✍️ Processed chunk {i+1}/{len(chunks)}")
 
 
 
 
92
 
93
  result = " ".join(outputs)
94
  similarity = int(difflib.SequenceMatcher(None, text, result).ratio() * 100)
95
- progress.append(f"✅ Completed in {time.time()-start_time:.1f}s")
 
 
 
96
 
97
  return result, len(text.split()), len(result.split()), similarity, progress
98
 
 
 
 
 
 
99
  finally:
100
  if temp_file:
101
  cleanup_file(temp_file)
@@ -117,6 +170,7 @@ custom_css = """
117
  background: linear-gradient(135deg, var(--primary) 0%, var(--primary-dark) 100%);
118
  border-radius: 12px 12px 0 0;
119
  padding: 2rem 1rem;
 
120
  }
121
  .card {
122
  background: white;
@@ -130,19 +184,28 @@ custom_css = """
130
  color: #64748b;
131
  max-height: 120px;
132
  overflow-y: auto;
 
 
 
133
  }
134
  .file-upload {
135
  border: 2px dashed #e2e8f0 !important;
136
  border-radius: 8px !important;
137
  padding: 1.5rem !important;
138
  }
 
 
 
 
 
 
139
  """
140
 
141
  with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Paraphraser Pro") as demo:
142
  # ========== HEADER ==========
143
  with gr.Column(elem_classes=["header"]):
144
  gr.Markdown("""
145
- <div style="text-align: center; color: white">
146
  <h1 style="font-weight: 700; margin-bottom: 0.5rem">AI Paraphraser Pro</h1>
147
  <p style="opacity: 0.9">Enterprise-grade text transformation with semantic preservation</p>
148
  </div>
@@ -212,16 +275,16 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Paraphraser Pro
212
  with gr.Row():
213
  input_words = gr.Number(label="Original Words", precision=0)
214
  output_words = gr.Number(label="New Words", precision=0)
215
- similarity_score = gr.Number(label="Similarity", suffix="%")
216
 
217
  with gr.Accordion("Processing Log", open=False):
218
  progress_log = gr.HTML(elem_classes=["progress-log"])
219
 
220
  # ========== FOOTER ==========
221
  gr.HTML("""
222
- <div style="text-align: center; padding: 1rem; color: #64748b; font-size: 0.9em">
223
  <p>© 2024 AI Paraphraser Pro | Secure Processing | Files Never Stored</p>
224
- </div>
225
  """)
226
 
227
  # ========== EVENT HANDLERS ==========
@@ -236,7 +299,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Paraphraser Pro
236
  None,
237
  [output_text],
238
  None,
239
- js="(text) => { navigator.clipboard.writeText(text); }"
240
  )
241
 
242
  download_btn.click(
@@ -247,8 +310,9 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Paraphraser Pro
247
 
248
  # ========== LAUNCH SETTINGS ==========
249
  if __name__ == "__main__":
250
- demo.queue(concurrency_count=3).launch(
251
  server_name="0.0.0.0",
252
  server_port=7860,
253
- show_api=False
 
254
  )
 
6
  from pathlib import Path
7
  import difflib
8
  import time
9
+ from typing import Optional, Tuple
10
+ import logging
11
+ from concurrent.futures import ThreadPoolExecutor
12
+
13
+ # ========== LOGGING SETUP ==========
14
+ logging.basicConfig(
15
+ level=logging.INFO,
16
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
17
+ )
18
+ logger = logging.getLogger(__name__)
19
 
20
  # ========== MODEL SETUP ==========
21
+ def load_model() -> Tuple[T5ForConditionalGeneration, T5Tokenizer]:
22
+ """Load model with error handling and progress tracking"""
23
+ device = "cuda" if torch.cuda.is_available() else "cpu"
24
+ model_name = "ramsrigouthamg/t5_paraphraser"
25
+
26
+ try:
27
+ logger.info("Loading tokenizer...")
28
+ tokenizer = T5Tokenizer.from_pretrained(model_name, legacy=False)
29
+
30
+ logger.info("Loading model...")
31
+ model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)
32
+ model.eval()
33
+
34
+ logger.info("Model loaded successfully")
35
+ return model, tokenizer
36
+ except Exception as e:
37
+ logger.error(f"Model loading failed: {str(e)}")
38
+ raise gr.Error("Failed to initialize the AI model. Please try again later.")
39
+
40
+ model, tokenizer = load_model()
41
+ device = next(model.parameters()).device
42
 
43
  # ========== UTILITIES ==========
44
+ def cleanup_file(file_path: Optional[str]) -> None:
45
+ """Securely delete temporary files with error handling"""
46
  if file_path and Path(file_path).exists():
47
  try:
48
  Path(file_path).unlink()
49
+ logger.info(f"Cleaned up temporary file: {file_path}")
50
  except Exception as e:
51
+ logger.warning(f"File cleanup error: {e}")
52
 
53
+ def extract_text(file_obj) -> Tuple[str, Optional[str]]:
54
+ """Handle file uploads with comprehensive error handling"""
55
  temp_path = None
56
  try:
57
  if file_obj.name.endswith('.pdf'):
58
+ # Create temp file with secure permissions
59
+ with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp:
60
+ temp_path = tmp.name
61
+ tmp.write(file_obj.read())
62
 
63
  with pdfplumber.open(temp_path) as pdf:
64
+ text = "\n".join(
65
+ page.extract_text() or ""
66
+ for page in pdf.pages[:3] # Limit to 3 pages for performance
67
+ )
68
+ return text[:5000], temp_path # Limit to 5000 chars
69
 
70
  # Handle text files
71
+ text = file_obj.read().decode('utf-8')[:5000]
72
+ return text, None
73
 
74
  except Exception as e:
75
+ logger.error(f"File processing error: {str(e)}")
76
  if temp_path:
77
  cleanup_file(temp_path)
78
+ raise gr.Error(f"File processing failed: {str(e)}")
79
 
80
  # ========== CORE FUNCTION ==========
81
+ def process_request(
82
+ file_obj,
83
+ text_input: str,
84
+ creativity: int = 3,
85
+ tone: str = "professional"
86
+ ) -> Tuple[str, int, int, int, list]:
87
+ """Main processing pipeline with enhanced error handling"""
88
  start_time = time.time()
89
  temp_file = None
90
  progress = []
91
 
92
  try:
93
+ # Input validation
94
+ if not (file_obj or text_input):
95
+ raise gr.Error("Please provide either text or a file")
96
+
97
  # Process input
98
  if file_obj:
99
  text, temp_file = extract_text(file_obj)
100
+ progress.append("📄 File processed successfully")
101
  else:
102
+ text = text_input[:5000]
103
+ progress.append("📝 Text input received")
104
 
105
  if not text.strip():
106
  return "", 0, 0, 0, progress
107
 
108
+ # Chunk processing with parallelization
109
  chunks = [text[i:i+400] for i in range(0, len(text), 400)]
110
  outputs = []
111
 
112
+ def process_chunk(chunk: str) -> str:
113
+ """Process a single text chunk"""
114
+ inputs = tokenizer(
115
+ f"paraphrase: {chunk} </s>",
116
+ max_length=256,
117
+ padding="max_length",
118
+ return_tensors="pt",
119
+ truncation=True
120
+ ).to(device)
121
+
122
+ outputs = model.generate(
123
+ **inputs,
124
+ max_length=256,
125
+ num_beams=3 + creativity,
126
+ temperature=0.7 + (creativity * 0.15),
127
+ early_stopping=True,
128
+ num_return_sequences=1
129
+ )
130
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
131
+
132
+ # Process chunks in parallel (limited threads)
133
+ with ThreadPoolExecutor(max_workers=2) as executor:
134
+ outputs = list(executor.map(process_chunk, chunks))
135
+ progress.extend(f"✍️ Processed chunk {i+1}/{len(chunks)}"
136
+ for i in range(len(chunks)))
137
 
138
  result = " ".join(outputs)
139
  similarity = int(difflib.SequenceMatcher(None, text, result).ratio() * 100)
140
+ elapsed = time.time() - start_time
141
+
142
+ progress.append(f"✅ Completed in {elapsed:.1f} seconds")
143
+ logger.info(f"Processed {len(text.split())} words in {elapsed:.2f}s")
144
 
145
  return result, len(text.split()), len(result.split()), similarity, progress
146
 
147
+ except Exception as e:
148
+ logger.error(f"Processing error: {str(e)}")
149
+ progress.append(f"❌ Error: {str(e)}")
150
+ raise gr.Error(f"Processing failed: {str(e)}")
151
+
152
  finally:
153
  if temp_file:
154
  cleanup_file(temp_file)
 
170
  background: linear-gradient(135deg, var(--primary) 0%, var(--primary-dark) 100%);
171
  border-radius: 12px 12px 0 0;
172
  padding: 2rem 1rem;
173
+ color: white;
174
  }
175
  .card {
176
  background: white;
 
184
  color: #64748b;
185
  max-height: 120px;
186
  overflow-y: auto;
187
+ background: #f8fafc;
188
+ padding: 0.75rem;
189
+ border-radius: 8px;
190
  }
191
  .file-upload {
192
  border: 2px dashed #e2e8f0 !important;
193
  border-radius: 8px !important;
194
  padding: 1.5rem !important;
195
  }
196
+ footer {
197
+ text-align: center;
198
+ padding: 1rem;
199
+ color: #64748b;
200
+ font-size: 0.9em;
201
+ }
202
  """
203
 
204
  with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Paraphraser Pro") as demo:
205
  # ========== HEADER ==========
206
  with gr.Column(elem_classes=["header"]):
207
  gr.Markdown("""
208
+ <div style="text-align: center">
209
  <h1 style="font-weight: 700; margin-bottom: 0.5rem">AI Paraphraser Pro</h1>
210
  <p style="opacity: 0.9">Enterprise-grade text transformation with semantic preservation</p>
211
  </div>
 
275
  with gr.Row():
276
  input_words = gr.Number(label="Original Words", precision=0)
277
  output_words = gr.Number(label="New Words", precision=0)
278
+ similarity_score = gr.Number(label="Similarity (%)", precision=0)
279
 
280
  with gr.Accordion("Processing Log", open=False):
281
  progress_log = gr.HTML(elem_classes=["progress-log"])
282
 
283
  # ========== FOOTER ==========
284
  gr.HTML("""
285
+ <footer>
286
  <p>© 2024 AI Paraphraser Pro | Secure Processing | Files Never Stored</p>
287
+ </footer>
288
  """)
289
 
290
  # ========== EVENT HANDLERS ==========
 
299
  None,
300
  [output_text],
301
  None,
302
+ js="(text) => { navigator.clipboard.writeText(text); alert('Copied to clipboard!'); }"
303
  )
304
 
305
  download_btn.click(
 
310
 
311
  # ========== LAUNCH SETTINGS ==========
312
  if __name__ == "__main__":
313
+ demo.queue(concurrency_count=2).launch(
314
  server_name="0.0.0.0",
315
  server_port=7860,
316
+ show_api=False,
317
+ favicon_path="favicon.ico"
318
  )