izhan001 commited on
Commit
840069e
·
verified ·
1 Parent(s): 1418689

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -25
app.py CHANGED
@@ -8,7 +8,7 @@ from io import BytesIO
8
  import tempfile
9
 
10
  # Initialize Hugging Face models for summarization, rephrasing, and sentiment analysis
11
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Specify the model
12
  rephraser = pipeline("text2text-generation", model="Vamsi/T5_Paraphrase_Paws", max_length=512, truncation=True)
13
  sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
14
 
@@ -36,11 +36,11 @@ def read_file(file, file_type):
36
  # Function to process the file and generate outputs
37
  def process_file(file, file_type, language="en"):
38
  content = read_file(file, file_type)
39
-
40
  # Check if content is not empty
41
  if not content.strip():
42
  return "Error: The document is empty or unsupported format.", None, None, None, None, None
43
-
44
  # Summarize the content
45
  try:
46
  summary = summarizer(content, max_length=150, min_length=50, do_sample=False)
@@ -51,7 +51,7 @@ def process_file(file, file_type, language="en"):
51
  # Rephrase the entire content in manageable chunks
52
  rephrased_text = ""
53
  try:
54
- chunk_size = 500 # Adjust this size based on model and resource limits
55
  content_chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
56
  for chunk in content_chunks:
57
  rephrased = rephraser(chunk)
@@ -61,13 +61,13 @@ def process_file(file, file_type, language="en"):
61
 
62
  # Sentiment analysis
63
  try:
64
- sentiment = sentiment_analyzer(content[:512]) # Limiting to 512 tokens for sentiment analysis
65
  sentiment_text = sentiment[0]['label']
66
  except Exception as e:
67
  sentiment_text = f"Sentiment Analysis Error: {str(e)}"
68
 
69
  # Extract keywords (for simplicity, extracting words here, but you can replace this with a better method)
70
- keywords = ' '.join([word for word in content.split()[:10]]) # Sample, first 10 words as keywords
71
 
72
  # Saving processed file (for download link)
73
  try:
@@ -79,24 +79,48 @@ def process_file(file, file_type, language="en"):
79
 
80
  return content, rephrased_text.strip(), summary_text, sentiment_text, keywords, processed_file_path
81
 
82
- # Set up Gradio interface
83
- iface = gr.Interface(
84
- fn=process_file,
85
- inputs=[
86
- gr.File(label="Upload Document (PDF, DOCX, TXT, PPTX)"),
87
- gr.Dropdown(["pdf", "docx", "txt", "pptx"], label="File Type"),
88
- ],
89
- outputs=[
90
- gr.Textbox(label="Original Content"),
91
- gr.Textbox(label="Rephrased Content"),
92
- gr.Textbox(label="Summary"),
93
- gr.Textbox(label="Sentiment Analysis"),
94
- gr.Textbox(label="Keywords"),
95
- gr.File(label="Download Processed Document")
96
- ],
97
- title="Enhanced Document Processor",
98
- description="Upload a document to rephrase, summarize, analyze sentiment, extract keywords, and highlight key information. Supports PDF, DOCX, TXT, PPTX."
99
- )
100
 
101
- iface.launch()
 
 
 
 
 
 
 
 
 
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  import tempfile
9
 
10
  # Initialize Hugging Face models for summarization, rephrasing, and sentiment analysis
11
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
12
  rephraser = pipeline("text2text-generation", model="Vamsi/T5_Paraphrase_Paws", max_length=512, truncation=True)
13
  sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
14
 
 
36
  # Function to process the file and generate outputs
37
  def process_file(file, file_type, language="en"):
38
  content = read_file(file, file_type)
39
+
40
  # Check if content is not empty
41
  if not content.strip():
42
  return "Error: The document is empty or unsupported format.", None, None, None, None, None
43
+
44
  # Summarize the content
45
  try:
46
  summary = summarizer(content, max_length=150, min_length=50, do_sample=False)
 
51
  # Rephrase the entire content in manageable chunks
52
  rephrased_text = ""
53
  try:
54
+ chunk_size = 500
55
  content_chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)]
56
  for chunk in content_chunks:
57
  rephrased = rephraser(chunk)
 
61
 
62
  # Sentiment analysis
63
  try:
64
+ sentiment = sentiment_analyzer(content[:512])
65
  sentiment_text = sentiment[0]['label']
66
  except Exception as e:
67
  sentiment_text = f"Sentiment Analysis Error: {str(e)}"
68
 
69
  # Extract keywords (for simplicity, extracting words here, but you can replace this with a better method)
70
+ keywords = ' '.join([word for word in content.split()[:10]])
71
 
72
  # Saving processed file (for download link)
73
  try:
 
79
 
80
  return content, rephrased_text.strip(), summary_text, sentiment_text, keywords, processed_file_path
81
 
82
+ # Define the functions for the different pages
83
+ def home_page():
84
+ with gr.Blocks() as home:
85
+ # Header
86
+ gr.Markdown("## Original Content Rephrased Content")
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ # Menu bar as buttons
89
+ with gr.Row():
90
+ home_btn = gr.Button("Home")
91
+ full_analysis_btn = gr.Button("Full Analysis", variant="primary")
92
+
93
+ # Display content on home page
94
+ gr.Markdown("Welcome to the Document Processor!")
95
+ gr.Markdown("Upload your document here and click to view details on the 'Full Analysis' page.")
96
+
97
+ return home
98
 
99
+ def detailed_page():
100
+ with gr.Blocks() as detailed:
101
+ # Header
102
+ gr.Markdown("## Detailed Analysis Page")
103
+
104
+ # Menu bar as buttons
105
+ with gr.Row():
106
+ home_btn = gr.Button("Home", variant="primary")
107
+ full_analysis_btn = gr.Button("Full Analysis")
108
+
109
+ # File upload and processing components go here
110
+ file_input = gr.File(label="Upload Document")
111
+ file_type = gr.Dropdown(["pdf", "docx", "txt", "pptx"], label="File Type")
112
+ content_output = gr.Textbox(label="Original Content")
113
+ rephrased_output = gr.Textbox(label="Rephrased Content")
114
+ summary_output = gr.Textbox(label="Summary")
115
+ sentiment_output = gr.Textbox(label="Sentiment Analysis")
116
+ keywords_output = gr.Textbox(label="Keywords")
117
+ download_link = gr.File(label="Download Processed Document")
118
+
119
+ # Sample output or content for the detailed analysis page
120
+ gr.Markdown("Here you will see detailed analysis outputs after document upload.")
121
+
122
+ return detailed
123
+
124
+ # Main application interface with tabbed navigation
125
+ iface = gr.TabbedInterface([home_page(), detailed_page()], ["Home", "Full Analysis"])
126
+ iface.launch()