Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,7 +8,7 @@ from io import BytesIO
|
|
| 8 |
import tempfile
|
| 9 |
|
| 10 |
# Initialize Hugging Face models for summarization, rephrasing, and sentiment analysis
|
| 11 |
-
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 12 |
rephraser = pipeline("text2text-generation", model="Vamsi/T5_Paraphrase_Paws", max_length=512, truncation=True)
|
| 13 |
sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
|
| 14 |
|
|
@@ -36,11 +36,11 @@ def read_file(file, file_type):
|
|
| 36 |
# Function to process the file and generate outputs
|
| 37 |
def process_file(file, file_type, language="en"):
|
| 38 |
content = read_file(file, file_type)
|
| 39 |
-
|
| 40 |
# Check if content is not empty
|
| 41 |
if not content.strip():
|
| 42 |
return "Error: The document is empty or unsupported format.", None, None, None, None, None
|
| 43 |
-
|
| 44 |
# Summarize the content
|
| 45 |
try:
|
| 46 |
summary = summarizer(content, max_length=150, min_length=50, do_sample=False)
|
|
@@ -51,7 +51,7 @@ def process_file(file, file_type, language="en"):
|
|
| 51 |
# Rephrase the entire content in manageable chunks
|
| 52 |
rephrased_text = ""
|
| 53 |
try:
|
| 54 |
-
chunk_size = 500
|
| 55 |
content_chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
|
| 56 |
for chunk in content_chunks:
|
| 57 |
rephrased = rephraser(chunk)
|
|
@@ -61,13 +61,13 @@ def process_file(file, file_type, language="en"):
|
|
| 61 |
|
| 62 |
# Sentiment analysis
|
| 63 |
try:
|
| 64 |
-
sentiment = sentiment_analyzer(content[:512])
|
| 65 |
sentiment_text = sentiment[0]['label']
|
| 66 |
except Exception as e:
|
| 67 |
sentiment_text = f"Sentiment Analysis Error: {str(e)}"
|
| 68 |
|
| 69 |
# Extract keywords (for simplicity, extracting words here, but you can replace this with a better method)
|
| 70 |
-
keywords = ' '.join([word for word in content.split()[:10]])
|
| 71 |
|
| 72 |
# Saving processed file (for download link)
|
| 73 |
try:
|
|
@@ -79,24 +79,48 @@ def process_file(file, file_type, language="en"):
|
|
| 79 |
|
| 80 |
return content, rephrased_text.strip(), summary_text, sentiment_text, keywords, processed_file_path
|
| 81 |
|
| 82 |
-
#
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
gr.
|
| 87 |
-
gr.Dropdown(["pdf", "docx", "txt", "pptx"], label="File Type"),
|
| 88 |
-
],
|
| 89 |
-
outputs=[
|
| 90 |
-
gr.Textbox(label="Original Content"),
|
| 91 |
-
gr.Textbox(label="Rephrased Content"),
|
| 92 |
-
gr.Textbox(label="Summary"),
|
| 93 |
-
gr.Textbox(label="Sentiment Analysis"),
|
| 94 |
-
gr.Textbox(label="Keywords"),
|
| 95 |
-
gr.File(label="Download Processed Document")
|
| 96 |
-
],
|
| 97 |
-
title="Enhanced Document Processor",
|
| 98 |
-
description="Upload a document to rephrase, summarize, analyze sentiment, extract keywords, and highlight key information. Supports PDF, DOCX, TXT, PPTX."
|
| 99 |
-
)
|
| 100 |
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
import tempfile
|
| 9 |
|
| 10 |
# Initialize Hugging Face models for summarization, rephrasing, and sentiment analysis
|
| 11 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 12 |
rephraser = pipeline("text2text-generation", model="Vamsi/T5_Paraphrase_Paws", max_length=512, truncation=True)
|
| 13 |
sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
|
| 14 |
|
|
|
|
| 36 |
# Function to process the file and generate outputs
|
| 37 |
def process_file(file, file_type, language="en"):
|
| 38 |
content = read_file(file, file_type)
|
| 39 |
+
|
| 40 |
# Check if content is not empty
|
| 41 |
if not content.strip():
|
| 42 |
return "Error: The document is empty or unsupported format.", None, None, None, None, None
|
| 43 |
+
|
| 44 |
# Summarize the content
|
| 45 |
try:
|
| 46 |
summary = summarizer(content, max_length=150, min_length=50, do_sample=False)
|
|
|
|
| 51 |
# Rephrase the entire content in manageable chunks
|
| 52 |
rephrased_text = ""
|
| 53 |
try:
|
| 54 |
+
chunk_size = 500
|
| 55 |
content_chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
|
| 56 |
for chunk in content_chunks:
|
| 57 |
rephrased = rephraser(chunk)
|
|
|
|
| 61 |
|
| 62 |
# Sentiment analysis
|
| 63 |
try:
|
| 64 |
+
sentiment = sentiment_analyzer(content[:512])
|
| 65 |
sentiment_text = sentiment[0]['label']
|
| 66 |
except Exception as e:
|
| 67 |
sentiment_text = f"Sentiment Analysis Error: {str(e)}"
|
| 68 |
|
| 69 |
# Extract keywords (for simplicity, extracting words here, but you can replace this with a better method)
|
| 70 |
+
keywords = ' '.join([word for word in content.split()[:10]])
|
| 71 |
|
| 72 |
# Saving processed file (for download link)
|
| 73 |
try:
|
|
|
|
| 79 |
|
| 80 |
return content, rephrased_text.strip(), summary_text, sentiment_text, keywords, processed_file_path
|
| 81 |
|
| 82 |
+
# Define the functions for the different pages
|
| 83 |
+
def home_page():
|
| 84 |
+
with gr.Blocks() as home:
|
| 85 |
+
# Header
|
| 86 |
+
gr.Markdown("## Original Content Rephrased Content")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
+
# Menu bar as buttons
|
| 89 |
+
with gr.Row():
|
| 90 |
+
home_btn = gr.Button("Home")
|
| 91 |
+
full_analysis_btn = gr.Button("Full Analysis", variant="primary")
|
| 92 |
+
|
| 93 |
+
# Display content on home page
|
| 94 |
+
gr.Markdown("Welcome to the Document Processor!")
|
| 95 |
+
gr.Markdown("Upload your document here and click to view details on the 'Full Analysis' page.")
|
| 96 |
+
|
| 97 |
+
return home
|
| 98 |
|
| 99 |
+
def detailed_page():
|
| 100 |
+
with gr.Blocks() as detailed:
|
| 101 |
+
# Header
|
| 102 |
+
gr.Markdown("## Detailed Analysis Page")
|
| 103 |
+
|
| 104 |
+
# Menu bar as buttons
|
| 105 |
+
with gr.Row():
|
| 106 |
+
home_btn = gr.Button("Home", variant="primary")
|
| 107 |
+
full_analysis_btn = gr.Button("Full Analysis")
|
| 108 |
+
|
| 109 |
+
# File upload and processing components go here
|
| 110 |
+
file_input = gr.File(label="Upload Document")
|
| 111 |
+
file_type = gr.Dropdown(["pdf", "docx", "txt", "pptx"], label="File Type")
|
| 112 |
+
content_output = gr.Textbox(label="Original Content")
|
| 113 |
+
rephrased_output = gr.Textbox(label="Rephrased Content")
|
| 114 |
+
summary_output = gr.Textbox(label="Summary")
|
| 115 |
+
sentiment_output = gr.Textbox(label="Sentiment Analysis")
|
| 116 |
+
keywords_output = gr.Textbox(label="Keywords")
|
| 117 |
+
download_link = gr.File(label="Download Processed Document")
|
| 118 |
+
|
| 119 |
+
# Sample output or content for the detailed analysis page
|
| 120 |
+
gr.Markdown("Here you will see detailed analysis outputs after document upload.")
|
| 121 |
+
|
| 122 |
+
return detailed
|
| 123 |
+
|
| 124 |
+
# Main application interface with tabbed navigation
|
| 125 |
+
iface = gr.TabbedInterface([home_page(), detailed_page()], ["Home", "Full Analysis"])
|
| 126 |
+
iface.launch()
|