ShreeDeepthi commited on
Commit
fcfffa3
·
verified ·
1 Parent(s): 8b3188c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -56
app.py CHANGED
@@ -1,19 +1,18 @@
1
  import streamlit as st
2
  from pdfminer.high_level import extract_text
3
- import smtplib
4
  from email.message import EmailMessage
5
  from email_validator import validate_email, EmailNotValidError
6
- import spacy
7
  from collections import Counter
8
  import heapq
9
  from fpdf import FPDF
10
- import pandas as pd
11
  import matplotlib.pyplot as plt
 
12
  import requests
13
  import subprocess
14
  import sys
15
 
16
- # Install spaCy and download the 'en-core-web-sm' model if not already installed
17
  try:
18
  import spacy
19
  except ImportError:
@@ -33,38 +32,40 @@ RISK_WORDS = [
33
  ]
34
 
35
  HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
36
- SENDER_EMAIL = "shreedeepthi2005@gmail.com"
37
- SENDER_PASSWORD = "qntm oher jqfz oflt"
38
 
 
39
  def extract_text_from_pdf(uploaded_file):
40
  return extract_text(uploaded_file)
41
 
 
42
  def extract_key_clauses(text):
43
  doc = nlp(text)
44
  sentences = list(doc.sents)
45
  clauses = [str(sentence).strip() for sentence in sentences if len(sentence) > 10]
46
  return clauses[:10]
47
 
 
48
  def summarize_text(text, num_sentences=5):
49
  doc = nlp(text)
50
  sentences = list(doc.sents)
51
  word_frequencies = Counter([token.text.lower() for token in doc if token.is_alpha and not token.is_stop])
52
  sentence_scores = {}
53
  for sent in sentences:
54
- sentence_score = 0
55
- for word in sent:
56
- if word.text.lower() in word_frequencies:
57
- sentence_score += word_frequencies[word.text.lower()]
58
  sentence_scores[sent] = sentence_score
59
  summarized_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
60
  summary = ' '.join([str(sentence) for sentence in summarized_sentences])
61
  return summary
62
 
 
63
  def detect_risks(text):
64
  doc = nlp(text.lower())
65
  detected_risks = [token.text for token in doc if token.text in RISK_WORDS]
66
  return list(set(detected_risks))
67
 
 
68
  def get_regulatory_updates():
69
  predefined_updates = [
70
  {"title": "New Compliance Guidelines", "summary": "SEC released new guidelines for regulatory compliance."},
@@ -74,11 +75,12 @@ def get_regulatory_updates():
74
  try:
75
  response = requests.get(url, headers=HEADERS)
76
  response.raise_for_status()
77
- updates = []
78
  return updates if updates else predefined_updates
79
  except requests.exceptions.RequestException:
80
  return predefined_updates
81
 
 
82
  def generate_pdf(summary, clauses, risks, updates, pdf_path="Analysis_Results.pdf"):
83
  pdf = FPDF()
84
  pdf.set_auto_page_break(auto=True, margin=15)
@@ -115,6 +117,7 @@ def generate_pdf(summary, clauses, risks, updates, pdf_path="Analysis_Results.pd
115
 
116
  pdf.output(pdf_path)
117
 
 
118
  def send_email(pdf_path, recipient_email):
119
  msg = EmailMessage()
120
  msg["Subject"] = "Legal Document Analysis Results"
@@ -131,6 +134,7 @@ def send_email(pdf_path, recipient_email):
131
  server.login(SENDER_EMAIL, SENDER_PASSWORD)
132
  server.send_message(msg)
133
 
 
134
  def plot_word_frequencies(text):
135
  doc = nlp(text)
136
  word_frequencies = Counter([token.text.lower() for token in doc if token.is_alpha and not token.is_stop])
@@ -143,6 +147,7 @@ def plot_word_frequencies(text):
143
  plt.ylabel("Frequency")
144
  st.pyplot(plt)
145
 
 
146
  def main():
147
  st.title("Interactive Legal Document Analysis Dashboard")
148
 
@@ -154,52 +159,49 @@ def main():
154
  recipient_email = st.text_input("Enter your email to receive the analysis results (optional)")
155
 
156
  if st.button("Submit"):
157
- if not recipient_email:
158
- st.error("Please enter an email address to receive the analysis.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  else:
160
- st.success(f"Analysis will be sent to {recipient_email}.")
161
-
162
- if uploaded_file is not None:
163
- text = extract_text_from_pdf(uploaded_file)
164
-
165
- summary, clauses, risks, updates = "", [], [], []
166
-
167
- if "Summary" in features:
168
- summary = summarize_text(text)
169
- st.subheader("Summary")
170
- st.write(summary)
171
-
172
- if "Key Clauses" in features:
173
- clauses = extract_key_clauses(text)
174
- st.subheader("Key Clauses")
175
- for i, clause in enumerate(clauses, 1):
176
- st.write(f"{i}. {clause}")
177
-
178
- if "Risk Detection" in features:
179
- risks = detect_risks(text)
180
- st.subheader("Detected Risks")
181
- st.write(", ".join(risks) if risks else "No risks detected.")
182
-
183
- if "Regulatory Updates" in features:
184
- updates = get_regulatory_updates()
185
- st.subheader("Regulatory Updates")
186
- for update in updates:
187
- st.write(f"- **{update.get('title')}**: {update.get('summary')}")
188
-
189
- if "Data Visualization" in features:
190
- st.subheader("Word Frequency Visualization")
191
- plot_word_frequencies(text)
192
-
193
- pdf_path = "Analysis_Results.pdf"
194
- generate_pdf(summary, clauses, risks, updates, pdf_path)
195
-
196
- if recipient_email:
197
- try:
198
- validate_email(recipient_email)
199
- send_email(pdf_path, recipient_email)
200
- st.success("Analysis PDF has been sent to your email.")
201
- except EmailNotValidError:
202
- st.error("Invalid email address. Please enter a valid email.")
203
 
204
  if __name__ == "__main__":
205
  main()
 
1
  import streamlit as st
2
  from pdfminer.high_level import extract_text
 
3
  from email.message import EmailMessage
4
  from email_validator import validate_email, EmailNotValidError
5
+ import smtplib
6
  from collections import Counter
7
  import heapq
8
  from fpdf import FPDF
 
9
  import matplotlib.pyplot as plt
10
+ import spacy
11
  import requests
12
  import subprocess
13
  import sys
14
 
15
+ # Ensure spaCy is installed and 'en_core_web_sm' is loaded
16
  try:
17
  import spacy
18
  except ImportError:
 
32
  ]
33
 
34
  HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
35
+ SENDER_EMAIL = "your_email@gmail.com" # Replace with your email
36
+ SENDER_PASSWORD = "your_app_password" # Replace with your app password
37
 
38
+ # Extract text from PDF
39
  def extract_text_from_pdf(uploaded_file):
40
  return extract_text(uploaded_file)
41
 
42
+ # Extract key clauses
43
  def extract_key_clauses(text):
44
  doc = nlp(text)
45
  sentences = list(doc.sents)
46
  clauses = [str(sentence).strip() for sentence in sentences if len(sentence) > 10]
47
  return clauses[:10]
48
 
49
+ # Summarize text
50
  def summarize_text(text, num_sentences=5):
51
  doc = nlp(text)
52
  sentences = list(doc.sents)
53
  word_frequencies = Counter([token.text.lower() for token in doc if token.is_alpha and not token.is_stop])
54
  sentence_scores = {}
55
  for sent in sentences:
56
+ sentence_score = sum(word_frequencies.get(word.text.lower(), 0) for word in sent)
 
 
 
57
  sentence_scores[sent] = sentence_score
58
  summarized_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
59
  summary = ' '.join([str(sentence) for sentence in summarized_sentences])
60
  return summary
61
 
62
+ # Detect risks
63
  def detect_risks(text):
64
  doc = nlp(text.lower())
65
  detected_risks = [token.text for token in doc if token.text in RISK_WORDS]
66
  return list(set(detected_risks))
67
 
68
+ # Fetch regulatory updates
69
  def get_regulatory_updates():
70
  predefined_updates = [
71
  {"title": "New Compliance Guidelines", "summary": "SEC released new guidelines for regulatory compliance."},
 
75
  try:
76
  response = requests.get(url, headers=HEADERS)
77
  response.raise_for_status()
78
+ updates = [] # Placeholder for scraped updates
79
  return updates if updates else predefined_updates
80
  except requests.exceptions.RequestException:
81
  return predefined_updates
82
 
83
+ # Generate PDF
84
  def generate_pdf(summary, clauses, risks, updates, pdf_path="Analysis_Results.pdf"):
85
  pdf = FPDF()
86
  pdf.set_auto_page_break(auto=True, margin=15)
 
117
 
118
  pdf.output(pdf_path)
119
 
120
+ # Send email
121
  def send_email(pdf_path, recipient_email):
122
  msg = EmailMessage()
123
  msg["Subject"] = "Legal Document Analysis Results"
 
134
  server.login(SENDER_EMAIL, SENDER_PASSWORD)
135
  server.send_message(msg)
136
 
137
+ # Plot word frequencies
138
  def plot_word_frequencies(text):
139
  doc = nlp(text)
140
  word_frequencies = Counter([token.text.lower() for token in doc if token.is_alpha and not token.is_stop])
 
147
  plt.ylabel("Frequency")
148
  st.pyplot(plt)
149
 
150
+ # Main function
151
  def main():
152
  st.title("Interactive Legal Document Analysis Dashboard")
153
 
 
159
  recipient_email = st.text_input("Enter your email to receive the analysis results (optional)")
160
 
161
  if st.button("Submit"):
162
+ if uploaded_file is not None:
163
+ text = extract_text_from_pdf(uploaded_file)
164
+
165
+ summary, clauses, risks, updates = "", [], [], []
166
+
167
+ if "Summary" in features:
168
+ summary = summarize_text(text)
169
+ st.subheader("Summary")
170
+ st.write(summary)
171
+
172
+ if "Key Clauses" in features:
173
+ clauses = extract_key_clauses(text)
174
+ st.subheader("Key Clauses")
175
+ for i, clause in enumerate(clauses, 1):
176
+ st.write(f"{i}. {clause}")
177
+
178
+ if "Risk Detection" in features:
179
+ risks = detect_risks(text)
180
+ st.subheader("Detected Risks")
181
+ st.write(", ".join(risks) if risks else "No risks detected.")
182
+
183
+ if "Regulatory Updates" in features:
184
+ updates = get_regulatory_updates()
185
+ st.subheader("Regulatory Updates")
186
+ for update in updates:
187
+ st.write(f"- **{update.get('title')}**: {update.get('summary')}")
188
+
189
+ if "Data Visualization" in features:
190
+ st.subheader("Word Frequency Visualization")
191
+ plot_word_frequencies(text)
192
+
193
+ pdf_path = "Analysis_Results.pdf"
194
+ generate_pdf(summary, clauses, risks, updates, pdf_path)
195
+
196
+ if recipient_email:
197
+ try:
198
+ validate_email(recipient_email)
199
+ send_email(pdf_path, recipient_email)
200
+ st.success("Analysis PDF has been sent to your email.")
201
+ except EmailNotValidError:
202
+ st.error("Invalid email address. Please enter a valid email.")
203
  else:
204
+ st.error("Please upload a PDF document for analysis.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  if __name__ == "__main__":
207
  main()