Spaces:
Build error
Build error
| import gradio as gr | |
| import pdfplumber | |
| import docx | |
| import os | |
| import datetime | |
| from transformers import pipeline | |
| # Load open-source LLMs | |
| summary_llm = pipeline("summarization", model="google/pegasus-xsum", tokenizer="google/pegasus-xsum") | |
| text_llm = pipeline("text2text-generation", model="MBZUAI/LaMini-T5-738M", tokenizer="MBZUAI/LaMini-T5-738M") | |
| # Extract text from files | |
| def extract_text(file): | |
| if file.name.endswith(".pdf"): | |
| with pdfplumber.open(file.name) as pdf: | |
| return "\n".join([p.extract_text() for p in pdf.pages if p.extract_text()]) | |
| elif file.name.endswith(".docx"): | |
| doc = docx.Document(file) | |
| return "\n".join([para.text for para in doc.paragraphs]) | |
| elif file.name.endswith(".txt"): | |
| return file.read().decode("utf-8") | |
| else: | |
| return "Unsupported file format." | |
| # Format glossary visually | |
| def format_glossary_html(glossary_text): | |
| lines = glossary_text.split('\n') | |
| html = "" | |
| for line in lines: | |
| if ":" in line: | |
| term, desc = line.split(":", 1) | |
| html += f"<b style='color:#1e3a8a'>{term.strip()}</b>: {desc.strip()}<br>" | |
| else: | |
| html += f"{line}<br>" | |
| return html | |
| # Generate summary | |
| def generate_summary(text): | |
| return summary_llm(text[:1024], max_length=250, min_length=80, do_sample=False)[0]["summary_text"] | |
| # Generate text (glossary/verdict/custom) | |
| def generate_text_response(prompt, max_len=512): | |
| return text_llm(prompt, max_length=max_len, do_sample=True)[0]["generated_text"] | |
| # Main document analyzer | |
| def analyze_document(file): | |
| filename = os.path.basename(file.name) | |
| text = extract_text(file) | |
| if not text.strip(): | |
| return "No content found in file.", "", "", "", "", None, "" | |
| short_text = text[:3000] | |
| # Enhanced prompts | |
| summary_prompt = f""" | |
| You are a legal assistant. Read the following legal document and generate a comprehensive summary. | |
| Include: parties involved, key facts, legal issues, arguments, court observations, and likely outcome. | |
| Document: | |
| {short_text} | |
| """ | |
| glossary_prompt = f""" | |
| Extract and explain all legal terms, laws, or references. Format: | |
| Term: ... | |
| Explanation: ... | |
| Document: | |
| {short_text} | |
| """ | |
| verdict_prompt = f""" | |
| Based on the document, predict the likely verdict in 2β3 sentences using standard legal reasoning. | |
| Document: | |
| {short_text} | |
| """ | |
| # Run LLMs | |
| summary = generate_summary(short_text) | |
| glossary = generate_text_response(glossary_prompt) | |
| verdict = generate_text_response(verdict_prompt) | |
| glossary_html = format_glossary_html(glossary) | |
| # Save report | |
| timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") | |
| output_filename = f"LegalSummary_{timestamp}.txt" | |
| with open(output_filename, "w", encoding="utf-8") as f: | |
| f.write(f"π File: {filename}\nπ Time: {timestamp}\n\n") | |
| f.write("=== π Summary ===\n" + summary + "\n\n") | |
| f.write("=== π Glossary ===\n" + glossary + "\n\n") | |
| f.write("=== βοΈ Verdict ===\n" + verdict + "\n") | |
| return text, summary, glossary, glossary_html, verdict, output_filename, short_text | |
| # Custom prompt answer | |
| def custom_prompt_response(doc_text, user_prompt): | |
| if not doc_text.strip() or not user_prompt.strip(): | |
| return "β οΈ Please provide both a document and a prompt." | |
| prompt = f""" | |
| You are a legal expert. Answer the question below using only the document provided. | |
| Question: | |
| {user_prompt.strip()} | |
| Document: | |
| {doc_text.strip()} | |
| """ | |
| return generate_text_response(prompt) | |
| # Gradio UI | |
| with gr.Blocks(css="body { background-color: #f9f9f9; font-family: 'Segoe UI'; }") as demo: | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| gr.Markdown(""" | |
| <div style='text-align: center; font-size: 28px; font-weight: bold; color: #1e3a8a; margin-bottom: 10px;'> | |
| π§Ύ Legal Document Summarizer Using LLMs | |
| </div> | |
| <div style='text-align: center; font-size: 16px; color: #444444; margin-bottom: 25px;'> | |
| Upload legal documents in PDF, DOCX, or TXT format to receive structured summaries, legal term glossaries, and AI-inferred verdicts using open-source language models. | |
| </div> | |
| """) | |
| file_input = gr.File(label="π Upload Legal Document") | |
| submit_btn = gr.Button("π Analyze Document") | |
| download_btn = gr.File(label="β¬οΈ Download Report") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π‘ Features") | |
| gr.Markdown(""" | |
| - π AI-generated legal summaries | |
| - π Glossary of legal terms | |
| - βοΈ Inferred legal verdict | |
| - β Custom Q&A based on the document | |
| """) | |
| extracted = gr.Textbox(label="π Extracted Text", lines=10, interactive=False) | |
| summary = gr.Textbox(label="π Summary", lines=6, interactive=False) | |
| glossary_raw = gr.Textbox(visible=False) | |
| glossary_html = gr.HTML(label="π Glossary of Legal Terms") | |
| final_verdict = gr.Textbox(label="βοΈ Verdict (AI Inferred)", lines=3, interactive=False) | |
| with gr.Row(): | |
| gr.Markdown("### β Ask a Question About the Document") | |
| user_prompt = gr.Textbox(label="Your Question", placeholder="e.g., What is the legal issue?") | |
| custom_response = gr.Textbox(label="π€ AI Answer", lines=4) | |
| custom_btn = gr.Button("π§ Get Answer") | |
| hidden_doc_text = gr.Textbox(visible=False) | |
| submit_btn.click(fn=analyze_document, inputs=[file_input], outputs=[ | |
| extracted, summary, glossary_raw, glossary_html, final_verdict, download_btn, hidden_doc_text | |
| ]) | |
| custom_btn.click(fn=custom_prompt_response, inputs=[hidden_doc_text, user_prompt], outputs=custom_response) | |
| demo.launch() | |