Spaces:

izhan001
/

Smart-Doc-Processor

Build error

App Files Files Community

izhan001 commited on Nov 7, 2024

Commit

840069e

verified ·

1 Parent(s): 1418689

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -25

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from io import BytesIO
 import tempfile
 # Initialize Hugging Face models for summarization, rephrasing, and sentiment analysis
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn")  # Specify the model
 rephraser = pipeline("text2text-generation", model="Vamsi/T5_Paraphrase_Paws", max_length=512, truncation=True)
 sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
@@ -36,11 +36,11 @@ def read_file(file, file_type):
 # Function to process the file and generate outputs
 def process_file(file, file_type, language="en"):
     content = read_file(file, file_type)
     # Check if content is not empty
     if not content.strip():
         return "Error: The document is empty or unsupported format.", None, None, None, None, None
     # Summarize the content
     try:
         summary = summarizer(content, max_length=150, min_length=50, do_sample=False)
@@ -51,7 +51,7 @@ def process_file(file, file_type, language="en"):
     # Rephrase the entire content in manageable chunks
     rephrased_text = ""
     try:
-        chunk_size = 500  # Adjust this size based on model and resource limits
         content_chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
         for chunk in content_chunks:
             rephrased = rephraser(chunk)
@@ -61,13 +61,13 @@ def process_file(file, file_type, language="en"):
     # Sentiment analysis
     try:
-        sentiment = sentiment_analyzer(content[:512])  # Limiting to 512 tokens for sentiment analysis
         sentiment_text = sentiment[0]['label']
     except Exception as e:
         sentiment_text = f"Sentiment Analysis Error: {str(e)}"
     # Extract keywords (for simplicity, extracting words here, but you can replace this with a better method)
-    keywords = ' '.join([word for word in content.split()[:10]])  # Sample, first 10 words as keywords
     # Saving processed file (for download link)
     try:
@@ -79,24 +79,48 @@ def process_file(file, file_type, language="en"):
     return content, rephrased_text.strip(), summary_text, sentiment_text, keywords, processed_file_path
-# Set up Gradio interface
-iface = gr.Interface(
-    fn=process_file,
-    inputs=[
-        gr.File(label="Upload Document (PDF, DOCX, TXT, PPTX)"),
-        gr.Dropdown(["pdf", "docx", "txt", "pptx"], label="File Type"),
-    ],
-    outputs=[
-        gr.Textbox(label="Original Content"),
-        gr.Textbox(label="Rephrased Content"),
-        gr.Textbox(label="Summary"),
-        gr.Textbox(label="Sentiment Analysis"),
-        gr.Textbox(label="Keywords"),
-        gr.File(label="Download Processed Document")
-    ],
-    title="Enhanced Document Processor",
-    description="Upload a document to rephrase, summarize, analyze sentiment, extract keywords, and highlight key information. Supports PDF, DOCX, TXT, PPTX."
-)
-iface.launch()

 import tempfile
 # Initialize Hugging Face models for summarization, rephrasing, and sentiment analysis
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 rephraser = pipeline("text2text-generation", model="Vamsi/T5_Paraphrase_Paws", max_length=512, truncation=True)
 sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
 # Function to process the file and generate outputs
 def process_file(file, file_type, language="en"):
     content = read_file(file, file_type)
     # Check if content is not empty
     if not content.strip():
         return "Error: The document is empty or unsupported format.", None, None, None, None, None
     # Summarize the content
     try:
         summary = summarizer(content, max_length=150, min_length=50, do_sample=False)
     # Rephrase the entire content in manageable chunks
     rephrased_text = ""
     try:
+        chunk_size = 500
         content_chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
         for chunk in content_chunks:
             rephrased = rephraser(chunk)
     # Sentiment analysis
     try:
+        sentiment = sentiment_analyzer(content[:512])
         sentiment_text = sentiment[0]['label']
     except Exception as e:
         sentiment_text = f"Sentiment Analysis Error: {str(e)}"
     # Extract keywords (for simplicity, extracting words here, but you can replace this with a better method)
+    keywords = ' '.join([word for word in content.split()[:10]])
     # Saving processed file (for download link)
     try:
     return content, rephrased_text.strip(), summary_text, sentiment_text, keywords, processed_file_path
+# Define the functions for the different pages
+def home_page():
+    with gr.Blocks() as home:
+        # Header
+        gr.Markdown("## Original Content Rephrased Content")
+        # Menu bar as buttons
+        with gr.Row():
+            home_btn = gr.Button("Home")
+            full_analysis_btn = gr.Button("Full Analysis", variant="primary")
+        # Display content on home page
+        gr.Markdown("Welcome to the Document Processor!")
+        gr.Markdown("Upload your document here and click to view details on the 'Full Analysis' page.")
+    return home
+def detailed_page():
+    with gr.Blocks() as detailed:
+        # Header
+        gr.Markdown("## Detailed Analysis Page")
+        # Menu bar as buttons
+        with gr.Row():
+            home_btn = gr.Button("Home", variant="primary")
+            full_analysis_btn = gr.Button("Full Analysis")
+        # File upload and processing components go here
+        file_input = gr.File(label="Upload Document")
+        file_type = gr.Dropdown(["pdf", "docx", "txt", "pptx"], label="File Type")
+        content_output = gr.Textbox(label="Original Content")
+        rephrased_output = gr.Textbox(label="Rephrased Content")
+        summary_output = gr.Textbox(label="Summary")
+        sentiment_output = gr.Textbox(label="Sentiment Analysis")
+        keywords_output = gr.Textbox(label="Keywords")
+        download_link = gr.File(label="Download Processed Document")
+        # Sample output or content for the detailed analysis page
+        gr.Markdown("Here you will see detailed analysis outputs after document upload.")
+    return detailed
+# Main application interface with tabbed navigation
+iface = gr.TabbedInterface([home_page(), detailed_page()], ["Home", "Full Analysis"])
+iface.launch()