Spaces:
Build error
Build error
| import gradio as gr | |
| import docx | |
| import PyPDF2 | |
| from pptx import Presentation | |
| from transformers import pipeline | |
| from docx import Document | |
| from io import BytesIO | |
| import tempfile | |
| # Initialize Hugging Face models for summarization, rephrasing, and sentiment analysis | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| rephraser = pipeline("text2text-generation", model="Vamsi/T5_Paraphrase_Paws", max_length=512, truncation=True) | |
| sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") | |
| # Function to read content from different file types | |
| def read_file(file, file_type): | |
| content = "" | |
| try: | |
| if file_type == "docx": | |
| doc = Document(file) | |
| for para in doc.paragraphs: | |
| content += para.text + "\n" | |
| elif file_type == "txt": | |
| content = file.read().decode("utf-8") | |
| elif file_type == "pdf": | |
| pdf_reader = PyPDF2.PdfReader(file) | |
| for page in pdf_reader.pages: | |
| content += page.extract_text() + "\n" | |
| elif file_type == "pptx": | |
| prs = Presentation(file) | |
| for slide in prs.slides: | |
| for shape in slide.shapes: | |
| if hasattr(shape, "text"): | |
| content += shape.text + "\n" | |
| except Exception as e: | |
| content = f"Error reading the file: {str(e)}" | |
| return content | |
| # Function to process the file and generate outputs | |
| def process_file(file, file_type, language="en"): | |
| content = read_file(file, file_type) | |
| # Check if content is not empty | |
| if not content.strip() or "Error" in content: | |
| return "Error: The document is empty or unsupported format.", None, None, None, None, None | |
| # Summarize the content | |
| try: | |
| summary = summarizer(content, max_length=150, min_length=50, do_sample=False) | |
| summary_text = summary[0]['summary_text'] | |
| except Exception as e: | |
| summary_text = f"Summary Error: {str(e)}" | |
| # Rephrase the entire content in manageable chunks | |
| rephrased_text = "" | |
| try: | |
| chunk_size = 500 | |
| content_chunks = [content[i:i + chunk_size] for i in range(0, len(content), chunk_size)] | |
| for chunk in content_chunks: | |
| rephrased = rephraser(chunk) | |
| rephrased_text += rephrased[0]['generated_text'] + " " | |
| except Exception as e: | |
| rephrased_text = f"Rephrase Error: {str(e)}" | |
| # Sentiment analysis | |
| try: | |
| sentiment = sentiment_analyzer(content[:512]) | |
| sentiment_text = sentiment[0]['label'] | |
| except Exception as e: | |
| sentiment_text = f"Sentiment Analysis Error: {str(e)}" | |
| # Extract keywords (for simplicity, extracting words here, but you can replace this with a better method) | |
| keywords = ' '.join([word for word in content.split()[:10]]) | |
| # Saving processed file (for download link) | |
| try: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.txt') as temp_file: | |
| temp_file.write(content.encode('utf-8')) | |
| processed_file_path = temp_file.name | |
| except Exception as e: | |
| processed_file_path = f"Error saving processed document: {str(e)}" | |
| return content, rephrased_text.strip(), summary_text, sentiment_text, keywords, processed_file_path | |
| # Define the functions for the different pages | |
| def home_page(): | |
| with gr.Blocks() as home: | |
| # Header | |
| gr.Markdown("## Upload a Document to Process") | |
| # Menu bar as buttons | |
| with gr.Row(): | |
| home_btn = gr.Button("Home") | |
| full_analysis_btn = gr.Button("Full Analysis", variant="primary") | |
| # Display content on home page | |
| gr.Markdown("Welcome to the Document Processor!") | |
| gr.Markdown("Upload your document here and click to view details on the 'Full Analysis' page.") | |
| # File upload and content output | |
| file_input = gr.File(label="Upload Document") | |
| content_output = gr.Textbox(label="Original Content") | |
| rephrased_output = gr.Textbox(label="Rephrased Content") | |
| def on_file_upload(file): | |
| if not file: | |
| return "No file uploaded.", None | |
| content, rephrased, _, _, _, _ = process_file(file, file_type="docx") | |
| return content, rephrased | |
| # Process file on upload | |
| file_input.change(on_file_upload, inputs=file_input, outputs=[content_output, rephrased_output]) | |
| return home | |
| def detailed_page(): | |
| with gr.Blocks() as detailed: | |
| # Header | |
| gr.Markdown("## Detailed Analysis Page") | |
| # Menu bar as buttons | |
| with gr.Row(): | |
| home_btn = gr.Button("Home", variant="primary") | |
| full_analysis_btn = gr.Button("Full Analysis") | |
| # File upload and processing components | |
| file_input = gr.File(label="Upload Document") | |
| file_type = gr.Dropdown(["pdf", "docx", "txt", "pptx"], label="File Type") | |
| keywords_output = gr.Textbox(label="Keywords") | |
| sentiment_output = gr.Textbox(label="Sentiment Analysis") | |
| download_link = gr.File(label="Download Processed Document") | |
| def on_file_upload(file, file_type): | |
| if not file: | |
| return "No file uploaded.", None, None, None | |
| _, _, _, sentiment, keywords, download_path = process_file(file, file_type) | |
| return keywords, sentiment, download_path | |
| # Process file on upload | |
| file_input.change(on_file_upload, inputs=[file_input, file_type], outputs=[keywords_output, sentiment_output, download_link]) | |
| # Sample output or content for the detailed analysis page | |
| gr.Markdown("Here you will see detailed analysis outputs after document upload.") | |
| return detailed | |
| # Main application interface with tabbed navigation | |
| iface = gr.TabbedInterface([home_page(), detailed_page()], ["Home", "Full Analysis"]) | |
| iface.launch() | |