Spaces:

Eaz123
/

tool1

Sleeping

App Files Files Community

Eaz123 commited on Jun 22, 2025

Commit

d31a082

verified ·

1 Parent(s): 87542ab

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -53

app.py CHANGED Viewed

@@ -6,96 +6,149 @@ import tempfile
 from pathlib import Path
 import difflib
 import time
-from typing import Optional
 # ========== MODEL SETUP ==========
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model_name = "ramsrigouthamg/t5_paraphraser"
-tokenizer = T5Tokenizer.from_pretrained(model_name)
-model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)
-model.eval()
 # ========== UTILITIES ==========
-def cleanup_file(file_path: Optional[str]):
-    """Securely delete temporary files"""
     if file_path and Path(file_path).exists():
         try:
             Path(file_path).unlink()
         except Exception as e:
-            print(f"Cleanup error: {e}")
-def extract_text(file_obj) -> tuple[str, Optional[str]]:
-    """Handle file uploads with automatic cleanup"""
     temp_path = None
     try:
         if file_obj.name.endswith('.pdf'):
-            # Create temp file (auto-deleted later)
-            temp_path = Path(tempfile.mktemp(suffix='.pdf'))
-            temp_path.write_bytes(file_obj.read())
             with pdfplumber.open(temp_path) as pdf:
-                text = "\n".join(page.extract_text() or "" for page in pdf.pages[:3])  # Limit to 3 pages
-            return text[:5000], str(temp_path)  # Limit to 5000 chars
         # Handle text files
-        return file_obj.read().decode('utf-8')[:5000], None
     except Exception as e:
         if temp_path:
             cleanup_file(temp_path)
-        raise gr.Error(f"File processing error: {str(e)}")
 # ========== CORE FUNCTION ==========
-def process_request(file_obj, text_input, creativity=3, tone="professional"):
-    """Main processing pipeline with progress tracking"""
     start_time = time.time()
     temp_file = None
     progress = []
     try:
         # Process input
         if file_obj:
             text, temp_file = extract_text(file_obj)
-            progress.append("📄 File processed")
         else:
-            text = text_input[:5000]  # Character limit
-            progress.append("📝 Text received")
         if not text.strip():
             return "", 0, 0, 0, progress
-        # Chunk processing
         chunks = [text[i:i+400] for i in range(0, len(text), 400)]
         outputs = []
-        with torch.no_grad():
-            for i, chunk in enumerate(chunks):
-                inputs = tokenizer(
-                    f"paraphrase: {chunk} </s>",
-                    max_length=256,
-                    padding="max_length",
-                    return_tensors="pt",
-                    truncation=True
-                ).to(device)
-                outputs.append(tokenizer.decode(
-                    model.generate(
-                        **inputs,
-                        max_length=256,
-                        num_beams=3 + creativity,
-                        temperature=0.7 + (creativity * 0.15),
-                        early_stopping=True
-                    )[0],
-                    skip_special_tokens=True
-                ))
-                progress.append(f"✍️ Processed chunk {i+1}/{len(chunks)}")
         result = " ".join(outputs)
         similarity = int(difflib.SequenceMatcher(None, text, result).ratio() * 100)
-        progress.append(f"✅ Completed in {time.time()-start_time:.1f}s")
         return result, len(text.split()), len(result.split()), similarity, progress
     finally:
         if temp_file:
             cleanup_file(temp_file)
@@ -117,6 +170,7 @@ custom_css = """
     background: linear-gradient(135deg, var(--primary) 0%, var(--primary-dark) 100%);
     border-radius: 12px 12px 0 0;
     padding: 2rem 1rem;
 }
 .card {
     background: white;
@@ -130,19 +184,28 @@ custom_css = """
     color: #64748b;
     max-height: 120px;
     overflow-y: auto;
 }
 .file-upload {
     border: 2px dashed #e2e8f0 !important;
     border-radius: 8px !important;
     padding: 1.5rem !important;
 }
 """
 with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Paraphraser Pro") as demo:
     # ========== HEADER ==========
     with gr.Column(elem_classes=["header"]):
         gr.Markdown("""
-        <div style="text-align: center; color: white">
             <h1 style="font-weight: 700; margin-bottom: 0.5rem">AI Paraphraser Pro</h1>
             <p style="opacity: 0.9">Enterprise-grade text transformation with semantic preservation</p>
         </div>
@@ -212,16 +275,16 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Paraphraser Pro
                     with gr.Row():
                         input_words = gr.Number(label="Original Words", precision=0)
                         output_words = gr.Number(label="New Words", precision=0)
-                        similarity_score = gr.Number(label="Similarity", suffix="%")
                 with gr.Accordion("Processing Log", open=False):
                     progress_log = gr.HTML(elem_classes=["progress-log"])
     # ========== FOOTER ==========
     gr.HTML("""
-    <div style="text-align: center; padding: 1rem; color: #64748b; font-size: 0.9em">
         <p>© 2024 AI Paraphraser Pro | Secure Processing | Files Never Stored</p>
-    </div>
     """)
     # ========== EVENT HANDLERS ==========
@@ -236,7 +299,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Paraphraser Pro
         None,
         [output_text],
         None,
-        js="(text) => { navigator.clipboard.writeText(text); }"
     )
     download_btn.click(
@@ -247,8 +310,9 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Paraphraser Pro
 # ========== LAUNCH SETTINGS ==========
 if __name__ == "__main__":
-    demo.queue(concurrency_count=3).launch(
         server_name="0.0.0.0",
         server_port=7860,
-        show_api=False
     )

 from pathlib import Path
 import difflib
 import time
+from typing import Optional, Tuple
+import logging
+from concurrent.futures import ThreadPoolExecutor
+# ========== LOGGING SETUP ==========
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
 # ========== MODEL SETUP ==========
+def load_model() -> Tuple[T5ForConditionalGeneration, T5Tokenizer]:
+    """Load model with error handling and progress tracking"""
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model_name = "ramsrigouthamg/t5_paraphraser"
+    try:
+        logger.info("Loading tokenizer...")
+        tokenizer = T5Tokenizer.from_pretrained(model_name, legacy=False)
+        logger.info("Loading model...")
+        model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)
+        model.eval()
+        logger.info("Model loaded successfully")
+        return model, tokenizer
+    except Exception as e:
+        logger.error(f"Model loading failed: {str(e)}")
+        raise gr.Error("Failed to initialize the AI model. Please try again later.")
+model, tokenizer = load_model()
+device = next(model.parameters()).device
 # ========== UTILITIES ==========
+def cleanup_file(file_path: Optional[str]) -> None:
+    """Securely delete temporary files with error handling"""
     if file_path and Path(file_path).exists():
         try:
             Path(file_path).unlink()
+            logger.info(f"Cleaned up temporary file: {file_path}")
         except Exception as e:
+            logger.warning(f"File cleanup error: {e}")
+def extract_text(file_obj) -> Tuple[str, Optional[str]]:
+    """Handle file uploads with comprehensive error handling"""
     temp_path = None
     try:
         if file_obj.name.endswith('.pdf'):
+            # Create temp file with secure permissions
+            with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp:
+                temp_path = tmp.name
+                tmp.write(file_obj.read())
             with pdfplumber.open(temp_path) as pdf:
+                text = "\n".join(
+                    page.extract_text() or ""
+                    for page in pdf.pages[:3]  # Limit to 3 pages for performance
+                )
+            return text[:5000], temp_path  # Limit to 5000 chars
         # Handle text files
+        text = file_obj.read().decode('utf-8')[:5000]
+        return text, None
     except Exception as e:
+        logger.error(f"File processing error: {str(e)}")
         if temp_path:
             cleanup_file(temp_path)
+        raise gr.Error(f"File processing failed: {str(e)}")
 # ========== CORE FUNCTION ==========
+def process_request(
+    file_obj,
+    text_input: str,
+    creativity: int = 3,
+    tone: str = "professional"
+) -> Tuple[str, int, int, int, list]:
+    """Main processing pipeline with enhanced error handling"""
     start_time = time.time()
     temp_file = None
     progress = []
     try:
+        # Input validation
+        if not (file_obj or text_input):
+            raise gr.Error("Please provide either text or a file")
         # Process input
         if file_obj:
             text, temp_file = extract_text(file_obj)
+            progress.append("📄 File processed successfully")
         else:
+            text = text_input[:5000]
+            progress.append("📝 Text input received")
         if not text.strip():
             return "", 0, 0, 0, progress
+        # Chunk processing with parallelization
         chunks = [text[i:i+400] for i in range(0, len(text), 400)]
         outputs = []
+        def process_chunk(chunk: str) -> str:
+            """Process a single text chunk"""
+            inputs = tokenizer(
+                f"paraphrase: {chunk} </s>",
+                max_length=256,
+                padding="max_length",
+                return_tensors="pt",
+                truncation=True
+            ).to(device)
+            outputs = model.generate(
+                **inputs,
+                max_length=256,
+                num_beams=3 + creativity,
+                temperature=0.7 + (creativity * 0.15),
+                early_stopping=True,
+                num_return_sequences=1
+            )
+            return tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Process chunks in parallel (limited threads)
+        with ThreadPoolExecutor(max_workers=2) as executor:
+            outputs = list(executor.map(process_chunk, chunks))
+            progress.extend(f"✍️ Processed chunk {i+1}/{len(chunks)}"
+                          for i in range(len(chunks)))
         result = " ".join(outputs)
         similarity = int(difflib.SequenceMatcher(None, text, result).ratio() * 100)
+        elapsed = time.time() - start_time
+        progress.append(f"✅ Completed in {elapsed:.1f} seconds")
+        logger.info(f"Processed {len(text.split())} words in {elapsed:.2f}s")
         return result, len(text.split()), len(result.split()), similarity, progress
+    except Exception as e:
+        logger.error(f"Processing error: {str(e)}")
+        progress.append(f"❌ Error: {str(e)}")
+        raise gr.Error(f"Processing failed: {str(e)}")
     finally:
         if temp_file:
             cleanup_file(temp_file)
     background: linear-gradient(135deg, var(--primary) 0%, var(--primary-dark) 100%);
     border-radius: 12px 12px 0 0;
     padding: 2rem 1rem;
+    color: white;
 }
 .card {
     background: white;
     color: #64748b;
     max-height: 120px;
     overflow-y: auto;
+    background: #f8fafc;
+    padding: 0.75rem;
+    border-radius: 8px;
 }
 .file-upload {
     border: 2px dashed #e2e8f0 !important;
     border-radius: 8px !important;
     padding: 1.5rem !important;
 }
+footer {
+    text-align: center;
+    padding: 1rem;
+    color: #64748b;
+    font-size: 0.9em;
+}
 """
 with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Paraphraser Pro") as demo:
     # ========== HEADER ==========
     with gr.Column(elem_classes=["header"]):
         gr.Markdown("""
+        <div style="text-align: center">
             <h1 style="font-weight: 700; margin-bottom: 0.5rem">AI Paraphraser Pro</h1>
             <p style="opacity: 0.9">Enterprise-grade text transformation with semantic preservation</p>
         </div>
                     with gr.Row():
                         input_words = gr.Number(label="Original Words", precision=0)
                         output_words = gr.Number(label="New Words", precision=0)
+                        similarity_score = gr.Number(label="Similarity (%)", precision=0)
                 with gr.Accordion("Processing Log", open=False):
                     progress_log = gr.HTML(elem_classes=["progress-log"])
     # ========== FOOTER ==========
     gr.HTML("""
+    <footer>
         <p>© 2024 AI Paraphraser Pro | Secure Processing | Files Never Stored</p>
+    </footer>
     """)
     # ========== EVENT HANDLERS ==========
         None,
         [output_text],
         None,
+        js="(text) => { navigator.clipboard.writeText(text); alert('Copied to clipboard!'); }"
     )
     download_btn.click(
 # ========== LAUNCH SETTINGS ==========
 if __name__ == "__main__":
+    demo.queue(concurrency_count=2).launch(
         server_name="0.0.0.0",
         server_port=7860,
+        show_api=False,
+        favicon_path="favicon.ico"
     )