import gradio as gr from transformers import pipeline import docx2txt import PyPDF2 from docx import Document from fpdf import FPDF import os from io import BytesIO # Load models summarizer = pipeline("summarization", model="facebook/bart-large-cnn") translator_hi_en = pipeline("translation", model="Helsinki-NLP/opus-mt-hi-en") translator_mr_en = pipeline("translation", model="Helsinki-NLP/opus-mt-mr-en") translator_en_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi") translator_en_mr = pipeline("translation", model="Helsinki-NLP/opus-mt-en-mr") # Extract text based on file type def extract_text(file): ext = file.name.split(".")[-1].lower() if ext == "txt": return file.read().decode("utf-8") elif ext == "pdf": reader = PyPDF2.PdfReader(file) return "\n".join(page.extract_text() for page in reader.pages if page.extract_text()) elif ext == "docx": return docx2txt.process(file) else: return "Unsupported file type. Please upload a .pdf, .docx, or .txt file." # Chunk long text for translation and summarization def chunk_text(text, max_length=1000): paragraphs = text.split("\n") chunks = [] current_chunk = "" for para in paragraphs: if len(current_chunk) + len(para) < max_length: current_chunk += para + "\n" else: chunks.append(current_chunk.strip()) current_chunk = para + "\n" if current_chunk: chunks.append(current_chunk.strip()) return chunks # Translate to English from selected language def translate_to_english(text, lang): if lang == "Hindi": return " ".join([translator_hi_en(chunk)[0]['translation_text'] for chunk in chunk_text(text, 500)]) elif lang == "Marathi": return " ".join([translator_mr_en(chunk)[0]['translation_text'] for chunk in chunk_text(text, 500)]) return text # Translate from English to selected output language def translate_from_english(text, lang): if lang == "Hindi": return " ".join([translator_en_hi(chunk)[0]['translation_text'] for chunk in chunk_text(text, 500)]) elif lang == "Marathi": return " ".join([translator_en_mr(chunk)[0]['translation_text'] for chunk in chunk_text(text, 500)]) return text # Save summary to DOCX def generate_docx(text): doc = Document() doc.add_heading("Summary", 0) doc.add_paragraph(text) buffer = BytesIO() doc.save(buffer) buffer.seek(0) return buffer # Save summary to PDF def generate_pdf(text): pdf = FPDF() pdf.add_page() pdf.set_font("Arial", size=12) for line in text.split("\n"): pdf.multi_cell(0, 10, line) buffer = BytesIO() pdf.output(buffer) buffer.seek(0) return buffer # Main summarization function def summarize_input(text, file, length, input_lang, output_lang): source_text = text.strip() if text.strip() else extract_text(file) if not source_text: return "", None, None # Translate to English if needed if input_lang != "English": source_text = translate_to_english(source_text, input_lang) # Set summary length if length == "Short (1–2 sentences)": min_len, max_len = 20, 60 elif length == "Detailed (paragraph)": min_len, max_len = 80, 200 else: min_len, max_len = 40, 130 chunks = chunk_text(source_text) summaries = [summarizer(chunk, max_length=max_len, min_length=min_len, do_sample=False)[0]['summary_text'] for chunk in chunks] final_summary = "\n\n".join(summaries) # Translate from English to output language if output_lang != "English": final_summary = translate_from_english(final_summary, output_lang) docx_file = generate_docx(final_summary) pdf_file = generate_pdf(final_summary) return final_summary, ("summary.docx", docx_file), ("summary.pdf", pdf_file) # Gradio interface iface = gr.Interface( fn=summarize_input, inputs=[ gr.Textbox(lines=8, label="Enter text (optional)"), gr.File(label="Upload file (.txt, .pdf, .docx)", file_types=[".pdf", ".docx", ".txt"]), gr.Radio([ "Short (1–2 sentences)", "Medium (3–5 sentences)", "Detailed (paragraph)" ], label="Summary length", value="Medium (3–5 sentences)"), gr.Dropdown(["English", "Hindi", "Marathi"], label="Document Language", value="English"), gr.Dropdown(["English", "Hindi", "Marathi"], label="Summary Output Language", value="English") ], outputs=[ gr.Textbox(label="Summary"), gr.File(label="Download as DOCX"), gr.File(label="Download as PDF") ], title="🌍 Multilingual Document Summarizer", description="Upload or paste a document in English, Hindi, or Marathi. App will translate if needed and summarize it into your chosen output language." ) iface.launch()