Spaces:

ShreeDeepthi
/

Legal-Document-Summariser

Sleeping

App Files Files Community

ShreeDeepthi commited on Jan 24, 2025

Commit

fcfffa3

verified ·

1 Parent(s): 8b3188c

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -56

app.py CHANGED Viewed

@@ -1,19 +1,18 @@
 import streamlit as st
 from pdfminer.high_level import extract_text
-import smtplib
 from email.message import EmailMessage
 from email_validator import validate_email, EmailNotValidError
-import spacy
 from collections import Counter
 import heapq
 from fpdf import FPDF
-import pandas as pd
 import matplotlib.pyplot as plt
 import requests
 import subprocess
 import sys
-# Install spaCy and download the 'en-core-web-sm' model if not already installed
 try:
     import spacy
 except ImportError:
@@ -33,38 +32,40 @@ RISK_WORDS = [
 ]
 HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
-SENDER_EMAIL = "shreedeepthi2005@gmail.com"
-SENDER_PASSWORD = "qntm oher jqfz oflt"
 def extract_text_from_pdf(uploaded_file):
     return extract_text(uploaded_file)
 def extract_key_clauses(text):
     doc = nlp(text)
     sentences = list(doc.sents)
     clauses = [str(sentence).strip() for sentence in sentences if len(sentence) > 10]
     return clauses[:10]
 def summarize_text(text, num_sentences=5):
     doc = nlp(text)
     sentences = list(doc.sents)
     word_frequencies = Counter([token.text.lower() for token in doc if token.is_alpha and not token.is_stop])
     sentence_scores = {}
     for sent in sentences:
-        sentence_score = 0
-        for word in sent:
-            if word.text.lower() in word_frequencies:
-                sentence_score += word_frequencies[word.text.lower()]
         sentence_scores[sent] = sentence_score
     summarized_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
     summary = ' '.join([str(sentence) for sentence in summarized_sentences])
     return summary
 def detect_risks(text):
     doc = nlp(text.lower())
     detected_risks = [token.text for token in doc if token.text in RISK_WORDS]
     return list(set(detected_risks))
 def get_regulatory_updates():
     predefined_updates = [
         {"title": "New Compliance Guidelines", "summary": "SEC released new guidelines for regulatory compliance."},
@@ -74,11 +75,12 @@ def get_regulatory_updates():
     try:
         response = requests.get(url, headers=HEADERS)
         response.raise_for_status()
-        updates = []
         return updates if updates else predefined_updates
     except requests.exceptions.RequestException:
         return predefined_updates
 def generate_pdf(summary, clauses, risks, updates, pdf_path="Analysis_Results.pdf"):
     pdf = FPDF()
     pdf.set_auto_page_break(auto=True, margin=15)
@@ -115,6 +117,7 @@ def generate_pdf(summary, clauses, risks, updates, pdf_path="Analysis_Results.pd
     pdf.output(pdf_path)
 def send_email(pdf_path, recipient_email):
     msg = EmailMessage()
     msg["Subject"] = "Legal Document Analysis Results"
@@ -131,6 +134,7 @@ def send_email(pdf_path, recipient_email):
         server.login(SENDER_EMAIL, SENDER_PASSWORD)
         server.send_message(msg)
 def plot_word_frequencies(text):
     doc = nlp(text)
     word_frequencies = Counter([token.text.lower() for token in doc if token.is_alpha and not token.is_stop])
@@ -143,6 +147,7 @@ def plot_word_frequencies(text):
     plt.ylabel("Frequency")
     st.pyplot(plt)
 def main():
     st.title("Interactive Legal Document Analysis Dashboard")
@@ -154,52 +159,49 @@ def main():
     recipient_email = st.text_input("Enter your email to receive the analysis results (optional)")
     if st.button("Submit"):
-        if not recipient_email:
-            st.error("Please enter an email address to receive the analysis.")
         else:
-            st.success(f"Analysis will be sent to {recipient_email}.")
-    if uploaded_file is not None:
-        text = extract_text_from_pdf(uploaded_file)
-        summary, clauses, risks, updates = "", [], [], []
-        if "Summary" in features:
-            summary = summarize_text(text)
-            st.subheader("Summary")
-            st.write(summary)
-        if "Key Clauses" in features:
-            clauses = extract_key_clauses(text)
-            st.subheader("Key Clauses")
-            for i, clause in enumerate(clauses, 1):
-                st.write(f"{i}. {clause}")
-        if "Risk Detection" in features:
-            risks = detect_risks(text)
-            st.subheader("Detected Risks")
-            st.write(", ".join(risks) if risks else "No risks detected.")
-        if "Regulatory Updates" in features:
-            updates = get_regulatory_updates()
-            st.subheader("Regulatory Updates")
-            for update in updates:
-                st.write(f"- **{update.get('title')}**: {update.get('summary')}")
-        if "Data Visualization" in features:
-            st.subheader("Word Frequency Visualization")
-            plot_word_frequencies(text)
-        pdf_path = "Analysis_Results.pdf"
-        generate_pdf(summary, clauses, risks, updates, pdf_path)
-        if recipient_email:
-            try:
-                validate_email(recipient_email)
-                send_email(pdf_path, recipient_email)
-                st.success("Analysis PDF has been sent to your email.")
-            except EmailNotValidError:
-                st.error("Invalid email address. Please enter a valid email.")
 if __name__ == "__main__":
     main()

 import streamlit as st
 from pdfminer.high_level import extract_text
 from email.message import EmailMessage
 from email_validator import validate_email, EmailNotValidError
+import smtplib
 from collections import Counter
 import heapq
 from fpdf import FPDF
 import matplotlib.pyplot as plt
+import spacy
 import requests
 import subprocess
 import sys
+# Ensure spaCy is installed and 'en_core_web_sm' is loaded
 try:
     import spacy
 except ImportError:
 ]
 HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
+SENDER_EMAIL = "your_email@gmail.com"  # Replace with your email
+SENDER_PASSWORD = "your_app_password"  # Replace with your app password
+# Extract text from PDF
 def extract_text_from_pdf(uploaded_file):
     return extract_text(uploaded_file)
+# Extract key clauses
 def extract_key_clauses(text):
     doc = nlp(text)
     sentences = list(doc.sents)
     clauses = [str(sentence).strip() for sentence in sentences if len(sentence) > 10]
     return clauses[:10]
+# Summarize text
 def summarize_text(text, num_sentences=5):
     doc = nlp(text)
     sentences = list(doc.sents)
     word_frequencies = Counter([token.text.lower() for token in doc if token.is_alpha and not token.is_stop])
     sentence_scores = {}
     for sent in sentences:
+        sentence_score = sum(word_frequencies.get(word.text.lower(), 0) for word in sent)
         sentence_scores[sent] = sentence_score
     summarized_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
     summary = ' '.join([str(sentence) for sentence in summarized_sentences])
     return summary
+# Detect risks
 def detect_risks(text):
     doc = nlp(text.lower())
     detected_risks = [token.text for token in doc if token.text in RISK_WORDS]
     return list(set(detected_risks))
+# Fetch regulatory updates
 def get_regulatory_updates():
     predefined_updates = [
         {"title": "New Compliance Guidelines", "summary": "SEC released new guidelines for regulatory compliance."},
     try:
         response = requests.get(url, headers=HEADERS)
         response.raise_for_status()
+        updates = []  # Placeholder for scraped updates
         return updates if updates else predefined_updates
     except requests.exceptions.RequestException:
         return predefined_updates
+# Generate PDF
 def generate_pdf(summary, clauses, risks, updates, pdf_path="Analysis_Results.pdf"):
     pdf = FPDF()
     pdf.set_auto_page_break(auto=True, margin=15)
     pdf.output(pdf_path)
+# Send email
 def send_email(pdf_path, recipient_email):
     msg = EmailMessage()
     msg["Subject"] = "Legal Document Analysis Results"
         server.login(SENDER_EMAIL, SENDER_PASSWORD)
         server.send_message(msg)
+# Plot word frequencies
 def plot_word_frequencies(text):
     doc = nlp(text)
     word_frequencies = Counter([token.text.lower() for token in doc if token.is_alpha and not token.is_stop])
     plt.ylabel("Frequency")
     st.pyplot(plt)
+# Main function
 def main():
     st.title("Interactive Legal Document Analysis Dashboard")
     recipient_email = st.text_input("Enter your email to receive the analysis results (optional)")
     if st.button("Submit"):
+        if uploaded_file is not None:
+            text = extract_text_from_pdf(uploaded_file)
+            summary, clauses, risks, updates = "", [], [], []
+            if "Summary" in features:
+                summary = summarize_text(text)
+                st.subheader("Summary")
+                st.write(summary)
+            if "Key Clauses" in features:
+                clauses = extract_key_clauses(text)
+                st.subheader("Key Clauses")
+                for i, clause in enumerate(clauses, 1):
+                    st.write(f"{i}. {clause}")
+            if "Risk Detection" in features:
+                risks = detect_risks(text)
+                st.subheader("Detected Risks")
+                st.write(", ".join(risks) if risks else "No risks detected.")
+            if "Regulatory Updates" in features:
+                updates = get_regulatory_updates()
+                st.subheader("Regulatory Updates")
+                for update in updates:
+                    st.write(f"- **{update.get('title')}**: {update.get('summary')}")
+            if "Data Visualization" in features:
+                st.subheader("Word Frequency Visualization")
+                plot_word_frequencies(text)
+            pdf_path = "Analysis_Results.pdf"
+            generate_pdf(summary, clauses, risks, updates, pdf_path)
+            if recipient_email:
+                try:
+                    validate_email(recipient_email)
+                    send_email(pdf_path, recipient_email)
+                    st.success("Analysis PDF has been sent to your email.")
+                except EmailNotValidError:
+                    st.error("Invalid email address. Please enter a valid email.")
         else:
+            st.error("Please upload a PDF document for analysis.")
 if __name__ == "__main__":
     main()