File size: 5,274 Bytes
84cab31 005ea1c 84cab31 005ea1c 84cab31 005ea1c 84cab31 9a24c5c ddb7bb4 9a24c5c 84cab31 005ea1c 84cab31 ddb7bb4 84cab31 ddb7bb4 84cab31 ddb7bb4 84cab31 ddb7bb4 84cab31 ddb7bb4 84cab31 ddb7bb4 84cab31 005ea1c ddb7bb4 005ea1c ddb7bb4 005ea1c 84cab31 ddb7bb4 005ea1c fcfffa3 ddb7bb4 005ea1c ddb7bb4 005ea1c ddb7bb4 005ea1c ddb7bb4 005ea1c ddb7bb4 005ea1c ddb7bb4 005ea1c ddb7bb4 005ea1c ddb7bb4 005ea1c fcfffa3 ddb7bb4 005ea1c ddb7bb4 fcfffa3 ddb7bb4 005ea1c ddb7bb4 005ea1c fcfffa3 ddb7bb4 fcfffa3 ddb7bb4 005ea1c ddb7bb4 fcfffa3 ddb7bb4 fcfffa3 ddb7bb4 84cab31 8b3188c ddb7bb4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import streamlit as st
from pdfminer.high_level import extract_text
import smtplib
from email.message import EmailMessage
from email_validator import validate_email, EmailNotValidError
import spacy
from collections import Counter
import heapq
from fpdf import FPDF
import pandas as pd
import matplotlib.pyplot as plt
import requests
import subprocess
import sys
# Install and load spaCy
try:
import spacy
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", "spacy"])
try:
spacy.load("en_core_web_sm")
except OSError:
subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
nlp = spacy.load("en_core_web_sm")
# Predefined risk-related words
RISK_WORDS = [
"fraud", "penalty", "violation", "risk", "lawsuit", "breach",
"noncompliance", "litigation", "regulatory", "fine"
]
HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
SENDER_EMAIL = "shreedeepthi2005@gmail.com"
SENDER_PASSWORD = "qntm oher jqfz oflt"
def extract_text_from_pdf(uploaded_file):
return extract_text(uploaded_file)
def extract_key_clauses(text):
doc = nlp(text)
sentences = list(doc.sents)
clauses = [str(sentence).strip() for sentence in sentences if len(sentence) > 10]
return clauses[:10]
def summarize_text(text, num_sentences=5):
doc = nlp(text)
sentences = list(doc.sents)
word_frequencies = Counter([token.text.lower() for token in doc if token.is_alpha and not token.is_stop])
sentence_scores = {sent: sum(word_frequencies.get(word.text.lower(), 0) for word in sent) for sent in sentences}
summarized_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
return ' '.join([str(sentence) for sentence in summarized_sentences])
def detect_risks(text):
doc = nlp(text.lower())
return list(set(token.text for token in doc if token.text in RISK_WORDS))
def get_regulatory_updates():
predefined_updates = [
{"title": "π New Compliance Guidelines", "summary": "SEC released new guidelines for regulatory compliance."},
{"title": "βοΈ Update on Financial Risks", "summary": "New policies to mitigate risks in the financial sector."},
]
return predefined_updates
def visualize_key_clauses_frequency(clauses):
clause_counts = Counter(clauses)
common_clauses = clause_counts.most_common()
if common_clauses:
labels, values = zip(*common_clauses)
plt.figure(figsize=(10, 6))
plt.barh(labels, values, color='skyblue')
plt.xlabel('Frequency')
plt.title('π Key Clauses Frequency')
st.pyplot(plt)
else:
st.write("π« No key clauses to visualize.")
def main():
st.title("π Interactive Legal Document Analysis Dashboard")
st.sidebar.title("βοΈ Options")
features = st.sidebar.multiselect("π Select Features",
["π Data Visualization", "π Summary", "π Key Clauses", "β οΈ Risk Detection", "βοΈ Regulatory Updates"])
uploaded_file = st.file_uploader("π Upload a legal document (PDF)", type="pdf")
recipient_email = st.text_input("π§ Enter your email to receive the analysis results (optional)")
if uploaded_file is not None:
try:
text = extract_text_from_pdf(uploaded_file)
st.success("β
Text extracted successfully!")
except Exception as e:
st.error(f"β Error extracting text from PDF: {e}")
return
summary, clauses, risks, updates = "", [], [], []
if "π Summary" in features:
summary = summarize_text(text)
st.subheader("π Summary")
st.write(summary)
if "π Key Clauses" in features:
clauses = extract_key_clauses(text)
st.subheader("π Key Clauses")
for i, clause in enumerate(clauses, 1):
st.write(f"{i}. {clause}")
if "π Data Visualization" in features:
visualize_key_clauses_frequency(clauses)
if "β οΈ Risk Detection" in features:
risks = detect_risks(text)
st.subheader("β οΈ Detected Risks")
st.write(", ".join(risks) if risks else "β
No risks detected.")
if "βοΈ Regulatory Updates" in features:
updates = get_regulatory_updates()
st.subheader("βοΈ Regulatory Updates")
for update in updates:
st.write(f"- **{update.get('title')}**: {update.get('summary')}")
if st.button("π Generate PDF Report"):
pdf_path = "Analysis_Results.pdf"
st.success("π₯ PDF Report Ready! Download Below")
with open(pdf_path, "rb") as file:
st.download_button("π₯ Download PDF Report", file, file_name="Analysis_Results.pdf", mime="application/pdf")
if recipient_email:
try:
validate_email(recipient_email)
st.success(f"π§ PDF sent to {recipient_email} successfully!")
except EmailNotValidError:
st.error("β Invalid email address. Please enter a valid one.")
if __name__ == "__main__":
main()
|