Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,22 +15,26 @@ sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncas
|
|
| 15 |
# Function to read content from different file types
|
| 16 |
def read_file(file, file_type):
|
| 17 |
content = ""
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
for
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
return content
|
| 35 |
|
| 36 |
# Function to process the file and generate outputs
|
|
@@ -38,7 +42,7 @@ def process_file(file, file_type, language="en"):
|
|
| 38 |
content = read_file(file, file_type)
|
| 39 |
|
| 40 |
# Check if content is not empty
|
| 41 |
-
if not content.strip():
|
| 42 |
return "Error: The document is empty or unsupported format.", None, None, None, None, None
|
| 43 |
|
| 44 |
# Summarize the content
|
|
@@ -83,7 +87,7 @@ def process_file(file, file_type, language="en"):
|
|
| 83 |
def home_page():
|
| 84 |
with gr.Blocks() as home:
|
| 85 |
# Header
|
| 86 |
-
gr.Markdown("##
|
| 87 |
|
| 88 |
# Menu bar as buttons
|
| 89 |
with gr.Row():
|
|
@@ -93,6 +97,20 @@ def home_page():
|
|
| 93 |
# Display content on home page
|
| 94 |
gr.Markdown("Welcome to the Document Processor!")
|
| 95 |
gr.Markdown("Upload your document here and click to view details on the 'Full Analysis' page.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
return home
|
| 98 |
|
|
@@ -109,21 +127,18 @@ def detailed_page():
|
|
| 109 |
# File upload and processing components
|
| 110 |
file_input = gr.File(label="Upload Document")
|
| 111 |
file_type = gr.Dropdown(["pdf", "docx", "txt", "pptx"], label="File Type")
|
| 112 |
-
content_output = gr.Textbox(label="Original Content")
|
| 113 |
-
rephrased_output = gr.Textbox(label="Rephrased Content")
|
| 114 |
-
summary_output = gr.Textbox(label="Summary")
|
| 115 |
-
sentiment_output = gr.Textbox(label="Sentiment Analysis")
|
| 116 |
keywords_output = gr.Textbox(label="Keywords")
|
|
|
|
| 117 |
download_link = gr.File(label="Download Processed Document")
|
| 118 |
|
| 119 |
def on_file_upload(file, file_type):
|
| 120 |
if not file:
|
| 121 |
-
return "No file uploaded.", None, None, None
|
| 122 |
-
|
| 123 |
-
return
|
| 124 |
|
| 125 |
# Process file on upload
|
| 126 |
-
file_input.change(on_file_upload, inputs=[file_input, file_type], outputs=[
|
| 127 |
|
| 128 |
# Sample output or content for the detailed analysis page
|
| 129 |
gr.Markdown("Here you will see detailed analysis outputs after document upload.")
|
|
|
|
| 15 |
# Function to read content from different file types
|
| 16 |
def read_file(file, file_type):
|
| 17 |
content = ""
|
| 18 |
+
try:
|
| 19 |
+
if file_type == "docx":
|
| 20 |
+
doc = Document(file)
|
| 21 |
+
for para in doc.paragraphs:
|
| 22 |
+
content += para.text + "\n"
|
| 23 |
+
elif file_type == "txt":
|
| 24 |
+
content = file.read().decode("utf-8")
|
| 25 |
+
elif file_type == "pdf":
|
| 26 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
| 27 |
+
for page in pdf_reader.pages:
|
| 28 |
+
content += page.extract_text() + "\n"
|
| 29 |
+
elif file_type == "pptx":
|
| 30 |
+
prs = Presentation(file)
|
| 31 |
+
for slide in prs.slides:
|
| 32 |
+
for shape in slide.shapes:
|
| 33 |
+
if hasattr(shape, "text"):
|
| 34 |
+
content += shape.text + "\n"
|
| 35 |
+
except Exception as e:
|
| 36 |
+
content = f"Error reading the file: {str(e)}"
|
| 37 |
+
|
| 38 |
return content
|
| 39 |
|
| 40 |
# Function to process the file and generate outputs
|
|
|
|
| 42 |
content = read_file(file, file_type)
|
| 43 |
|
| 44 |
# Check if content is not empty
|
| 45 |
+
if not content.strip() or "Error" in content:
|
| 46 |
return "Error: The document is empty or unsupported format.", None, None, None, None, None
|
| 47 |
|
| 48 |
# Summarize the content
|
|
|
|
| 87 |
def home_page():
|
| 88 |
with gr.Blocks() as home:
|
| 89 |
# Header
|
| 90 |
+
gr.Markdown("## Upload a Document to Process")
|
| 91 |
|
| 92 |
# Menu bar as buttons
|
| 93 |
with gr.Row():
|
|
|
|
| 97 |
# Display content on home page
|
| 98 |
gr.Markdown("Welcome to the Document Processor!")
|
| 99 |
gr.Markdown("Upload your document here and click to view details on the 'Full Analysis' page.")
|
| 100 |
+
|
| 101 |
+
# File upload and content output
|
| 102 |
+
file_input = gr.File(label="Upload Document")
|
| 103 |
+
content_output = gr.Textbox(label="Original Content")
|
| 104 |
+
rephrased_output = gr.Textbox(label="Rephrased Content")
|
| 105 |
+
|
| 106 |
+
def on_file_upload(file):
|
| 107 |
+
if not file:
|
| 108 |
+
return "No file uploaded.", None
|
| 109 |
+
content, rephrased, _, _, _, _ = process_file(file, file_type="docx")
|
| 110 |
+
return content, rephrased
|
| 111 |
+
|
| 112 |
+
# Process file on upload
|
| 113 |
+
file_input.change(on_file_upload, inputs=file_input, outputs=[content_output, rephrased_output])
|
| 114 |
|
| 115 |
return home
|
| 116 |
|
|
|
|
| 127 |
# File upload and processing components
|
| 128 |
file_input = gr.File(label="Upload Document")
|
| 129 |
file_type = gr.Dropdown(["pdf", "docx", "txt", "pptx"], label="File Type")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
keywords_output = gr.Textbox(label="Keywords")
|
| 131 |
+
sentiment_output = gr.Textbox(label="Sentiment Analysis")
|
| 132 |
download_link = gr.File(label="Download Processed Document")
|
| 133 |
|
| 134 |
def on_file_upload(file, file_type):
|
| 135 |
if not file:
|
| 136 |
+
return "No file uploaded.", None, None, None
|
| 137 |
+
_, _, _, sentiment, keywords, download_path = process_file(file, file_type)
|
| 138 |
+
return keywords, sentiment, download_path
|
| 139 |
|
| 140 |
# Process file on upload
|
| 141 |
+
file_input.change(on_file_upload, inputs=[file_input, file_type], outputs=[keywords_output, sentiment_output, download_link])
|
| 142 |
|
| 143 |
# Sample output or content for the detailed analysis page
|
| 144 |
gr.Markdown("Here you will see detailed analysis outputs after document upload.")
|