ShreeDeepthi commited on
Commit
8b3188c
·
verified ·
1 Parent(s): ca6d7df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -44
app.py CHANGED
@@ -13,25 +13,19 @@ import requests
13
  import subprocess
14
  import sys
15
 
16
- import subprocess
17
- import sys
18
-
19
- # Install spaCy if not installed
20
  try:
21
  import spacy
22
  except ImportError:
23
  subprocess.check_call([sys.executable, "-m", "pip", "install", "spacy"])
24
 
25
- # Download the 'en-core-web-sm' model
26
  try:
27
  spacy.load("en_core_web_sm")
28
  except OSError:
29
  subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
30
 
31
- # Now load the model
32
  nlp = spacy.load("en_core_web_sm")
33
 
34
-
35
  # Predefined risk-related words
36
  RISK_WORDS = [
37
  "fraud", "penalty", "violation", "risk", "lawsuit", "breach",
@@ -49,7 +43,7 @@ def extract_key_clauses(text):
49
  doc = nlp(text)
50
  sentences = list(doc.sents)
51
  clauses = [str(sentence).strip() for sentence in sentences if len(sentence) > 10]
52
- return clauses[:10] # Return top 10 clauses for simplicity
53
 
54
  def summarize_text(text, num_sentences=5):
55
  doc = nlp(text)
@@ -72,7 +66,6 @@ def detect_risks(text):
72
  return list(set(detected_risks))
73
 
74
  def get_regulatory_updates():
75
- # Fallback: Pre-defined updates
76
  predefined_updates = [
77
  {"title": "New Compliance Guidelines", "summary": "SEC released new guidelines for regulatory compliance."},
78
  {"title": "Update on Financial Risks", "summary": "New policies to mitigate risks in the financial sector."},
@@ -81,8 +74,7 @@ def get_regulatory_updates():
81
  try:
82
  response = requests.get(url, headers=HEADERS)
83
  response.raise_for_status()
84
- updates = [] # Placeholder for parsed updates (needs a proper parsing method)
85
- # Process response.content with BeautifulSoup or similar parser if allowed
86
  return updates if updates else predefined_updates
87
  except requests.exceptions.RequestException:
88
  return predefined_updates
@@ -95,13 +87,11 @@ def generate_pdf(summary, clauses, risks, updates, pdf_path="Analysis_Results.pd
95
 
96
  pdf.cell(200, 10, txt="Legal Document Analysis Results", ln=True, align="C")
97
 
98
- # Summary
99
  pdf.ln(10)
100
  pdf.cell(200, 10, txt="Summary", ln=True, align="L")
101
  pdf.set_font("Arial", size=10)
102
  pdf.multi_cell(0, 10, summary)
103
 
104
- # Key Clauses
105
  pdf.ln(10)
106
  pdf.set_font("Arial", size=12)
107
  pdf.cell(200, 10, txt="Key Clauses", ln=True, align="L")
@@ -109,14 +99,12 @@ def generate_pdf(summary, clauses, risks, updates, pdf_path="Analysis_Results.pd
109
  for clause in clauses:
110
  pdf.multi_cell(0, 10, f"- {clause}")
111
 
112
- # Risks
113
  pdf.ln(10)
114
  pdf.set_font("Arial", size=12)
115
  pdf.cell(200, 10, txt="Detected Risks", ln=True, align="L")
116
  pdf.set_font("Arial", size=10)
117
  pdf.multi_cell(0, 10, ", ".join(risks))
118
 
119
- # Regulatory Updates
120
  pdf.ln(10)
121
  pdf.set_font("Arial", size=12)
122
  pdf.cell(200, 10, txt="Regulatory Updates", ln=True, align="L")
@@ -158,25 +146,23 @@ def plot_word_frequencies(text):
158
  def main():
159
  st.title("Interactive Legal Document Analysis Dashboard")
160
 
161
- # Sidebar options
162
  st.sidebar.title("Options")
163
  features = st.sidebar.multiselect("Select Features",
164
  ["Data Visualization", "Summary", "Key Clauses", "Risk Detection", "Regulatory Updates"])
165
 
166
- # File upload
167
  uploaded_file = st.file_uploader("Upload a legal document (PDF)", type="pdf")
168
  recipient_email = st.text_input("Enter your email to receive the analysis results (optional)")
169
 
 
 
 
 
 
 
170
  if uploaded_file is not None:
171
- try:
172
- text = extract_text_from_pdf(uploaded_file)
173
- st.success("Text extracted successfully!")
174
- except Exception as e:
175
- st.error(f"Error extracting text from PDF: {e}")
176
- return
177
 
178
- summary = ""
179
- clauses, risks, updates = [], [], []
180
 
181
  if "Summary" in features:
182
  summary = summarize_text(text)
@@ -204,24 +190,16 @@ def main():
204
  st.subheader("Word Frequency Visualization")
205
  plot_word_frequencies(text)
206
 
207
- # Generate PDF
208
- if st.button("Generate PDF Report"):
209
- pdf_path = "Analysis_Results.pdf"
210
- generate_pdf(summary, clauses, risks, updates, pdf_path)
211
- with open(pdf_path, "rb") as file:
212
- st.download_button("Download PDF Report", file, file_name="Analysis_Results.pdf", mime="application/pdf")
213
-
214
- # Email PDF
215
- if st.button("Submit"):
216
- if recipient_email:
217
- try:
218
- validate_email(recipient_email)
219
- send_email(pdf_path, recipient_email)
220
- st.success(f"PDF sent to {recipient_email} successfully!")
221
- except EmailNotValidError:
222
- st.error("Invalid email address. Please enter a valid one.")
223
- else:
224
- st.error("Please enter a valid email address to send the report.")
225
 
226
  if __name__ == "__main__":
227
- main()
 
13
  import subprocess
14
  import sys
15
 
16
+ # Install spaCy and download the 'en-core-web-sm' model if not already installed
 
 
 
17
  try:
18
  import spacy
19
  except ImportError:
20
  subprocess.check_call([sys.executable, "-m", "pip", "install", "spacy"])
21
 
 
22
  try:
23
  spacy.load("en_core_web_sm")
24
  except OSError:
25
  subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
26
 
 
27
  nlp = spacy.load("en_core_web_sm")
28
 
 
29
  # Predefined risk-related words
30
  RISK_WORDS = [
31
  "fraud", "penalty", "violation", "risk", "lawsuit", "breach",
 
43
  doc = nlp(text)
44
  sentences = list(doc.sents)
45
  clauses = [str(sentence).strip() for sentence in sentences if len(sentence) > 10]
46
+ return clauses[:10]
47
 
48
  def summarize_text(text, num_sentences=5):
49
  doc = nlp(text)
 
66
  return list(set(detected_risks))
67
 
68
  def get_regulatory_updates():
 
69
  predefined_updates = [
70
  {"title": "New Compliance Guidelines", "summary": "SEC released new guidelines for regulatory compliance."},
71
  {"title": "Update on Financial Risks", "summary": "New policies to mitigate risks in the financial sector."},
 
74
  try:
75
  response = requests.get(url, headers=HEADERS)
76
  response.raise_for_status()
77
+ updates = []
 
78
  return updates if updates else predefined_updates
79
  except requests.exceptions.RequestException:
80
  return predefined_updates
 
87
 
88
  pdf.cell(200, 10, txt="Legal Document Analysis Results", ln=True, align="C")
89
 
 
90
  pdf.ln(10)
91
  pdf.cell(200, 10, txt="Summary", ln=True, align="L")
92
  pdf.set_font("Arial", size=10)
93
  pdf.multi_cell(0, 10, summary)
94
 
 
95
  pdf.ln(10)
96
  pdf.set_font("Arial", size=12)
97
  pdf.cell(200, 10, txt="Key Clauses", ln=True, align="L")
 
99
  for clause in clauses:
100
  pdf.multi_cell(0, 10, f"- {clause}")
101
 
 
102
  pdf.ln(10)
103
  pdf.set_font("Arial", size=12)
104
  pdf.cell(200, 10, txt="Detected Risks", ln=True, align="L")
105
  pdf.set_font("Arial", size=10)
106
  pdf.multi_cell(0, 10, ", ".join(risks))
107
 
 
108
  pdf.ln(10)
109
  pdf.set_font("Arial", size=12)
110
  pdf.cell(200, 10, txt="Regulatory Updates", ln=True, align="L")
 
146
  def main():
147
  st.title("Interactive Legal Document Analysis Dashboard")
148
 
 
149
  st.sidebar.title("Options")
150
  features = st.sidebar.multiselect("Select Features",
151
  ["Data Visualization", "Summary", "Key Clauses", "Risk Detection", "Regulatory Updates"])
152
 
 
153
  uploaded_file = st.file_uploader("Upload a legal document (PDF)", type="pdf")
154
  recipient_email = st.text_input("Enter your email to receive the analysis results (optional)")
155
 
156
+ if st.button("Submit"):
157
+ if not recipient_email:
158
+ st.error("Please enter an email address to receive the analysis.")
159
+ else:
160
+ st.success(f"Analysis will be sent to {recipient_email}.")
161
+
162
  if uploaded_file is not None:
163
+ text = extract_text_from_pdf(uploaded_file)
 
 
 
 
 
164
 
165
+ summary, clauses, risks, updates = "", [], [], []
 
166
 
167
  if "Summary" in features:
168
  summary = summarize_text(text)
 
190
  st.subheader("Word Frequency Visualization")
191
  plot_word_frequencies(text)
192
 
193
+ pdf_path = "Analysis_Results.pdf"
194
+ generate_pdf(summary, clauses, risks, updates, pdf_path)
195
+
196
+ if recipient_email:
197
+ try:
198
+ validate_email(recipient_email)
199
+ send_email(pdf_path, recipient_email)
200
+ st.success("Analysis PDF has been sent to your email.")
201
+ except EmailNotValidError:
202
+ st.error("Invalid email address. Please enter a valid email.")
 
 
 
 
 
 
 
 
203
 
204
  if __name__ == "__main__":
205
+ main()