ShreeDeepthi's picture
Update app.py
ddb7bb4 verified
import streamlit as st
from pdfminer.high_level import extract_text
import smtplib
from email.message import EmailMessage
from email_validator import validate_email, EmailNotValidError
import spacy
from collections import Counter
import heapq
from fpdf import FPDF
import pandas as pd
import matplotlib.pyplot as plt
import requests
import subprocess
import sys
# Install and load spaCy
try:
import spacy
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", "spacy"])
try:
spacy.load("en_core_web_sm")
except OSError:
subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
# Predefined risk-related words
RISK_WORDS = [
"fraud", "penalty", "violation", "risk", "lawsuit", "breach",
"noncompliance", "litigation", "regulatory", "fine"
]
HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
SENDER_EMAIL = "shreedeepthi2005@gmail.com"
SENDER_PASSWORD = "qntm oher jqfz oflt"
def extract_text_from_pdf(uploaded_file):
return extract_text(uploaded_file)
def extract_key_clauses(text):
doc = nlp(text)
sentences = list(doc.sents)
clauses = [str(sentence).strip() for sentence in sentences if len(sentence) > 10]
return clauses[:10]
def summarize_text(text, num_sentences=5):
doc = nlp(text)
sentences = list(doc.sents)
word_frequencies = Counter([token.text.lower() for token in doc if token.is_alpha and not token.is_stop])
sentence_scores = {sent: sum(word_frequencies.get(word.text.lower(), 0) for word in sent) for sent in sentences}
summarized_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
return ' '.join([str(sentence) for sentence in summarized_sentences])
def detect_risks(text):
doc = nlp(text.lower())
return list(set(token.text for token in doc if token.text in RISK_WORDS))
def get_regulatory_updates():
predefined_updates = [
{"title": "πŸ“œ New Compliance Guidelines", "summary": "SEC released new guidelines for regulatory compliance."},
{"title": "βš–οΈ Update on Financial Risks", "summary": "New policies to mitigate risks in the financial sector."},
]
return predefined_updates
def visualize_key_clauses_frequency(clauses):
clause_counts = Counter(clauses)
common_clauses = clause_counts.most_common()
if common_clauses:
labels, values = zip(*common_clauses)
plt.figure(figsize=(10, 6))
plt.barh(labels, values, color='skyblue')
plt.xlabel('Frequency')
plt.title('πŸ“Š Key Clauses Frequency')
st.pyplot(plt)
else:
st.write("🚫 No key clauses to visualize.")
def main():
st.title("πŸ“‘ Interactive Legal Document Analysis Dashboard")
st.sidebar.title("βš™οΈ Options")
features = st.sidebar.multiselect("πŸ” Select Features",
["πŸ“Š Data Visualization", "πŸ“œ Summary", "πŸ”‘ Key Clauses", "⚠️ Risk Detection", "βš–οΈ Regulatory Updates"])
uploaded_file = st.file_uploader("πŸ“‚ Upload a legal document (PDF)", type="pdf")
recipient_email = st.text_input("πŸ“§ Enter your email to receive the analysis results (optional)")
if uploaded_file is not None:
try:
text = extract_text_from_pdf(uploaded_file)
st.success("βœ… Text extracted successfully!")
except Exception as e:
st.error(f"❌ Error extracting text from PDF: {e}")
return
summary, clauses, risks, updates = "", [], [], []
if "πŸ“œ Summary" in features:
summary = summarize_text(text)
st.subheader("πŸ“œ Summary")
st.write(summary)
if "πŸ”‘ Key Clauses" in features:
clauses = extract_key_clauses(text)
st.subheader("πŸ”‘ Key Clauses")
for i, clause in enumerate(clauses, 1):
st.write(f"{i}. {clause}")
if "πŸ“Š Data Visualization" in features:
visualize_key_clauses_frequency(clauses)
if "⚠️ Risk Detection" in features:
risks = detect_risks(text)
st.subheader("⚠️ Detected Risks")
st.write(", ".join(risks) if risks else "βœ… No risks detected.")
if "βš–οΈ Regulatory Updates" in features:
updates = get_regulatory_updates()
st.subheader("βš–οΈ Regulatory Updates")
for update in updates:
st.write(f"- **{update.get('title')}**: {update.get('summary')}")
if st.button("πŸ“„ Generate PDF Report"):
pdf_path = "Analysis_Results.pdf"
st.success("πŸ“₯ PDF Report Ready! Download Below")
with open(pdf_path, "rb") as file:
st.download_button("πŸ“₯ Download PDF Report", file, file_name="Analysis_Results.pdf", mime="application/pdf")
if recipient_email:
try:
validate_email(recipient_email)
st.success(f"πŸ“§ PDF sent to {recipient_email} successfully!")
except EmailNotValidError:
st.error("❌ Invalid email address. Please enter a valid one.")
if __name__ == "__main__":
main()