Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from pdfminer.high_level import extract_text | |
| import smtplib | |
| from email.message import EmailMessage | |
| from email_validator import validate_email, EmailNotValidError | |
| import spacy | |
| from collections import Counter | |
| import heapq | |
| from fpdf import FPDF | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import requests | |
| import subprocess | |
| import sys | |
| # Install and load spaCy | |
| try: | |
| import spacy | |
| except ImportError: | |
| subprocess.check_call([sys.executable, "-m", "pip", "install", "spacy"]) | |
| try: | |
| spacy.load("en_core_web_sm") | |
| except OSError: | |
| subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"]) | |
| nlp = spacy.load("en_core_web_sm") | |
| # Predefined risk-related words | |
| RISK_WORDS = [ | |
| "fraud", "penalty", "violation", "risk", "lawsuit", "breach", | |
| "noncompliance", "litigation", "regulatory", "fine" | |
| ] | |
| HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"} | |
| SENDER_EMAIL = "shreedeepthi2005@gmail.com" | |
| SENDER_PASSWORD = "qntm oher jqfz oflt" | |
| def extract_text_from_pdf(uploaded_file): | |
| return extract_text(uploaded_file) | |
| def extract_key_clauses(text): | |
| doc = nlp(text) | |
| sentences = list(doc.sents) | |
| clauses = [str(sentence).strip() for sentence in sentences if len(sentence) > 10] | |
| return clauses[:10] | |
| def summarize_text(text, num_sentences=5): | |
| doc = nlp(text) | |
| sentences = list(doc.sents) | |
| word_frequencies = Counter([token.text.lower() for token in doc if token.is_alpha and not token.is_stop]) | |
| sentence_scores = {sent: sum(word_frequencies.get(word.text.lower(), 0) for word in sent) for sent in sentences} | |
| summarized_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get) | |
| return ' '.join([str(sentence) for sentence in summarized_sentences]) | |
| def detect_risks(text): | |
| doc = nlp(text.lower()) | |
| return list(set(token.text for token in doc if token.text in RISK_WORDS)) | |
| def get_regulatory_updates(): | |
| predefined_updates = [ | |
| {"title": "π New Compliance Guidelines", "summary": "SEC released new guidelines for regulatory compliance."}, | |
| {"title": "βοΈ Update on Financial Risks", "summary": "New policies to mitigate risks in the financial sector."}, | |
| ] | |
| return predefined_updates | |
| def visualize_key_clauses_frequency(clauses): | |
| clause_counts = Counter(clauses) | |
| common_clauses = clause_counts.most_common() | |
| if common_clauses: | |
| labels, values = zip(*common_clauses) | |
| plt.figure(figsize=(10, 6)) | |
| plt.barh(labels, values, color='skyblue') | |
| plt.xlabel('Frequency') | |
| plt.title('π Key Clauses Frequency') | |
| st.pyplot(plt) | |
| else: | |
| st.write("π« No key clauses to visualize.") | |
| def main(): | |
| st.title("π Interactive Legal Document Analysis Dashboard") | |
| st.sidebar.title("βοΈ Options") | |
| features = st.sidebar.multiselect("π Select Features", | |
| ["π Data Visualization", "π Summary", "π Key Clauses", "β οΈ Risk Detection", "βοΈ Regulatory Updates"]) | |
| uploaded_file = st.file_uploader("π Upload a legal document (PDF)", type="pdf") | |
| recipient_email = st.text_input("π§ Enter your email to receive the analysis results (optional)") | |
| if uploaded_file is not None: | |
| try: | |
| text = extract_text_from_pdf(uploaded_file) | |
| st.success("β Text extracted successfully!") | |
| except Exception as e: | |
| st.error(f"β Error extracting text from PDF: {e}") | |
| return | |
| summary, clauses, risks, updates = "", [], [], [] | |
| if "π Summary" in features: | |
| summary = summarize_text(text) | |
| st.subheader("π Summary") | |
| st.write(summary) | |
| if "π Key Clauses" in features: | |
| clauses = extract_key_clauses(text) | |
| st.subheader("π Key Clauses") | |
| for i, clause in enumerate(clauses, 1): | |
| st.write(f"{i}. {clause}") | |
| if "π Data Visualization" in features: | |
| visualize_key_clauses_frequency(clauses) | |
| if "β οΈ Risk Detection" in features: | |
| risks = detect_risks(text) | |
| st.subheader("β οΈ Detected Risks") | |
| st.write(", ".join(risks) if risks else "β No risks detected.") | |
| if "βοΈ Regulatory Updates" in features: | |
| updates = get_regulatory_updates() | |
| st.subheader("βοΈ Regulatory Updates") | |
| for update in updates: | |
| st.write(f"- **{update.get('title')}**: {update.get('summary')}") | |
| if st.button("π Generate PDF Report"): | |
| pdf_path = "Analysis_Results.pdf" | |
| st.success("π₯ PDF Report Ready! Download Below") | |
| with open(pdf_path, "rb") as file: | |
| st.download_button("π₯ Download PDF Report", file, file_name="Analysis_Results.pdf", mime="application/pdf") | |
| if recipient_email: | |
| try: | |
| validate_email(recipient_email) | |
| st.success(f"π§ PDF sent to {recipient_email} successfully!") | |
| except EmailNotValidError: | |
| st.error("β Invalid email address. Please enter a valid one.") | |
| if __name__ == "__main__": | |
| main() | |