ShreeDeepthi commited on
Commit
ddb7bb4
Β·
verified Β·
1 Parent(s): 9a24c5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -121
app.py CHANGED
@@ -10,24 +10,20 @@ from fpdf import FPDF
10
  import pandas as pd
11
  import matplotlib.pyplot as plt
12
  import requests
13
-
14
  import subprocess
15
  import sys
16
 
17
-
18
- # Install spaCy if not installed
19
  try:
20
  import spacy
21
  except ImportError:
22
  subprocess.check_call([sys.executable, "-m", "pip", "install", "spacy"])
23
 
24
- # Download the 'en-core-web-sm' model
25
  try:
26
  spacy.load("en_core_web_sm")
27
  except OSError:
28
  subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
29
 
30
- # Load spaCy model
31
  nlp = spacy.load("en_core_web_sm")
32
 
33
  # Predefined risk-related words
@@ -47,98 +43,26 @@ def extract_key_clauses(text):
47
  doc = nlp(text)
48
  sentences = list(doc.sents)
49
  clauses = [str(sentence).strip() for sentence in sentences if len(sentence) > 10]
50
- return clauses[:10] # Return top 10 clauses for simplicity
51
 
52
  def summarize_text(text, num_sentences=5):
53
  doc = nlp(text)
54
  sentences = list(doc.sents)
55
  word_frequencies = Counter([token.text.lower() for token in doc if token.is_alpha and not token.is_stop])
56
- sentence_scores = {}
57
- for sent in sentences:
58
- sentence_score = 0
59
- for word in sent:
60
- if word.text.lower() in word_frequencies:
61
- sentence_score += word_frequencies[word.text.lower()]
62
- sentence_scores[sent] = sentence_score
63
  summarized_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
64
- summary = ' '.join([str(sentence) for sentence in summarized_sentences])
65
- return summary
66
 
67
  def detect_risks(text):
68
  doc = nlp(text.lower())
69
- detected_risks = [token.text for token in doc if token.text in RISK_WORDS]
70
- return list(set(detected_risks))
71
 
72
  def get_regulatory_updates():
73
- # Fallback: Pre-defined updates
74
  predefined_updates = [
75
- {"title": "New Compliance Guidelines", "summary": "SEC released new guidelines for regulatory compliance."},
76
- {"title": "Update on Financial Risks", "summary": "New policies to mitigate risks in the financial sector."},
77
  ]
78
- url = "https://www.sec.gov/newsroom/press-releases"
79
- try:
80
- response = requests.get(url, headers=HEADERS)
81
- response.raise_for_status()
82
- updates = [] # Placeholder for parsed updates (needs a proper parsing method)
83
- return updates if updates else predefined_updates
84
- except requests.exceptions.RequestException:
85
- return predefined_updates
86
-
87
- def generate_pdf(summary, clauses, risks, updates, pdf_path="Analysis_Results.pdf"):
88
- pdf = FPDF()
89
- pdf.set_auto_page_break(auto=True, margin=15)
90
- pdf.add_page()
91
- pdf.set_font("Arial", size=12)
92
-
93
- pdf.cell(200, 10, txt="Legal Document Analysis Results", ln=True, align="C")
94
-
95
- # Summary
96
- pdf.ln(10)
97
- pdf.cell(200, 10, txt="Summary", ln=True, align="L")
98
- pdf.set_font("Arial", size=10)
99
- pdf.multi_cell(0, 10, summary)
100
-
101
- # Key Clauses
102
- pdf.ln(10)
103
- pdf.set_font("Arial", size=12)
104
- pdf.cell(200, 10, txt="Key Clauses", ln=True, align="L")
105
- pdf.set_font("Arial", size=10)
106
- for clause in clauses:
107
- pdf.multi_cell(0, 10, f"- {clause}")
108
-
109
- # Risks
110
- pdf.ln(10)
111
- pdf.set_font("Arial", size=12)
112
- pdf.cell(200, 10, txt="Detected Risks", ln=True, align="L")
113
- pdf.set_font("Arial", size=10)
114
- pdf.multi_cell(0, 10, ", ".join(risks))
115
-
116
- # Regulatory Updates
117
- pdf.ln(10)
118
- pdf.set_font("Arial", size=12)
119
- pdf.cell(200, 10, txt="Regulatory Updates", ln=True, align="L")
120
- pdf.set_font("Arial", size=10)
121
- if updates:
122
- for update in updates:
123
- pdf.multi_cell(0, 10, f"- {update.get('title', 'N/A')}: {update.get('summary', 'N/A')}")
124
-
125
- pdf.output(pdf_path)
126
-
127
- def send_email(pdf_path, recipient_email):
128
- msg = EmailMessage()
129
- msg["Subject"] = "Legal Document Analysis Results"
130
- msg["From"] = SENDER_EMAIL
131
- msg["To"] = recipient_email
132
-
133
- msg.set_content("Please find attached the analysis results PDF.")
134
-
135
- with open(pdf_path, "rb") as file:
136
- msg.add_attachment(file.read(), maintype="application", subtype="pdf", filename="Analysis_Results.pdf")
137
-
138
- with smtplib.SMTP("smtp.gmail.com", 587) as server:
139
- server.starttls()
140
- server.login(SENDER_EMAIL, SENDER_PASSWORD)
141
- server.send_message(msg)
142
 
143
  def visualize_key_clauses_frequency(clauses):
144
  clause_counts = Counter(clauses)
@@ -148,74 +72,66 @@ def visualize_key_clauses_frequency(clauses):
148
  plt.figure(figsize=(10, 6))
149
  plt.barh(labels, values, color='skyblue')
150
  plt.xlabel('Frequency')
151
- plt.title('Key Clauses Frequency')
152
  st.pyplot(plt)
153
  else:
154
- st.write("No key clauses to visualize.")
155
 
156
  def main():
157
- st.title("Interactive Legal Document Analysis Dashboard")
158
-
159
- # Sidebar options
160
- st.sidebar.title("Options")
161
- features = st.sidebar.multiselect("Select Features",
162
- ["Data Visualization", "Summary", "Key Clauses", "Risk Detection", "Regulatory Updates"])
163
-
164
- # File upload
165
- uploaded_file = st.file_uploader("Upload a legal document (PDF)", type="pdf")
166
- recipient_email = st.text_input("Enter your email to receive the analysis results (optional)")
167
-
168
  if uploaded_file is not None:
169
  try:
170
  text = extract_text_from_pdf(uploaded_file)
171
- st.success("Text extracted successfully!")
172
  except Exception as e:
173
- st.error(f"Error extracting text from PDF: {e}")
174
  return
175
 
176
- summary = ""
177
- clauses, risks, updates = [], [], []
178
 
179
- if "Summary" in features:
180
  summary = summarize_text(text)
181
- st.subheader("Summary")
182
  st.write(summary)
183
 
184
- if "Key Clauses" in features:
185
  clauses = extract_key_clauses(text)
186
- st.subheader("Key Clauses")
187
  for i, clause in enumerate(clauses, 1):
188
  st.write(f"{i}. {clause}")
189
- # Visualize key clauses frequency
190
- if "Data Visualization" in features:
191
  visualize_key_clauses_frequency(clauses)
192
 
193
- if "Risk Detection" in features:
194
  risks = detect_risks(text)
195
- st.subheader("Detected Risks")
196
- st.write(", ".join(risks) if risks else "No risks detected.")
197
 
198
- if "Regulatory Updates" in features:
199
  updates = get_regulatory_updates()
200
- st.subheader("Regulatory Updates")
201
  for update in updates:
202
  st.write(f"- **{update.get('title')}**: {update.get('summary')}")
203
 
204
- # Generate PDF and send via email if required
205
- if st.button("Generate PDF Report"):
206
  pdf_path = "Analysis_Results.pdf"
207
- generate_pdf(summary, clauses, risks, updates, pdf_path)
208
  with open(pdf_path, "rb") as file:
209
- st.download_button("Download PDF Report", file, file_name="Analysis_Results.pdf", mime="application/pdf")
210
-
211
- # Email PDF
212
  if recipient_email:
213
  try:
214
  validate_email(recipient_email)
215
- send_email(pdf_path, recipient_email)
216
- st.success(f"PDF sent to {recipient_email} successfully!")
217
  except EmailNotValidError:
218
- st.error("Invalid email address. Please enter a valid one.")
219
 
220
  if __name__ == "__main__":
221
  main()
 
 
10
  import pandas as pd
11
  import matplotlib.pyplot as plt
12
  import requests
 
13
  import subprocess
14
  import sys
15
 
16
+ # Install and load spaCy
 
17
  try:
18
  import spacy
19
  except ImportError:
20
  subprocess.check_call([sys.executable, "-m", "pip", "install", "spacy"])
21
 
 
22
  try:
23
  spacy.load("en_core_web_sm")
24
  except OSError:
25
  subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
26
 
 
27
  nlp = spacy.load("en_core_web_sm")
28
 
29
  # Predefined risk-related words
 
43
  doc = nlp(text)
44
  sentences = list(doc.sents)
45
  clauses = [str(sentence).strip() for sentence in sentences if len(sentence) > 10]
46
+ return clauses[:10]
47
 
48
  def summarize_text(text, num_sentences=5):
49
  doc = nlp(text)
50
  sentences = list(doc.sents)
51
  word_frequencies = Counter([token.text.lower() for token in doc if token.is_alpha and not token.is_stop])
52
+ sentence_scores = {sent: sum(word_frequencies.get(word.text.lower(), 0) for word in sent) for sent in sentences}
 
 
 
 
 
 
53
  summarized_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
54
+ return ' '.join([str(sentence) for sentence in summarized_sentences])
 
55
 
56
  def detect_risks(text):
57
  doc = nlp(text.lower())
58
+ return list(set(token.text for token in doc if token.text in RISK_WORDS))
 
59
 
60
  def get_regulatory_updates():
 
61
  predefined_updates = [
62
+ {"title": "πŸ“œ New Compliance Guidelines", "summary": "SEC released new guidelines for regulatory compliance."},
63
+ {"title": "βš–οΈ Update on Financial Risks", "summary": "New policies to mitigate risks in the financial sector."},
64
  ]
65
+ return predefined_updates
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  def visualize_key_clauses_frequency(clauses):
68
  clause_counts = Counter(clauses)
 
72
  plt.figure(figsize=(10, 6))
73
  plt.barh(labels, values, color='skyblue')
74
  plt.xlabel('Frequency')
75
+ plt.title('πŸ“Š Key Clauses Frequency')
76
  st.pyplot(plt)
77
  else:
78
+ st.write("🚫 No key clauses to visualize.")
79
 
80
  def main():
81
+ st.title("πŸ“‘ Interactive Legal Document Analysis Dashboard")
82
+ st.sidebar.title("βš™οΈ Options")
83
+ features = st.sidebar.multiselect("πŸ” Select Features",
84
+ ["πŸ“Š Data Visualization", "πŸ“œ Summary", "πŸ”‘ Key Clauses", "⚠️ Risk Detection", "βš–οΈ Regulatory Updates"])
85
+ uploaded_file = st.file_uploader("πŸ“‚ Upload a legal document (PDF)", type="pdf")
86
+ recipient_email = st.text_input("πŸ“§ Enter your email to receive the analysis results (optional)")
87
+
 
 
 
 
88
  if uploaded_file is not None:
89
  try:
90
  text = extract_text_from_pdf(uploaded_file)
91
+ st.success("βœ… Text extracted successfully!")
92
  except Exception as e:
93
+ st.error(f"❌ Error extracting text from PDF: {e}")
94
  return
95
 
96
+ summary, clauses, risks, updates = "", [], [], []
 
97
 
98
+ if "πŸ“œ Summary" in features:
99
  summary = summarize_text(text)
100
+ st.subheader("πŸ“œ Summary")
101
  st.write(summary)
102
 
103
+ if "πŸ”‘ Key Clauses" in features:
104
  clauses = extract_key_clauses(text)
105
+ st.subheader("πŸ”‘ Key Clauses")
106
  for i, clause in enumerate(clauses, 1):
107
  st.write(f"{i}. {clause}")
108
+ if "πŸ“Š Data Visualization" in features:
 
109
  visualize_key_clauses_frequency(clauses)
110
 
111
+ if "⚠️ Risk Detection" in features:
112
  risks = detect_risks(text)
113
+ st.subheader("⚠️ Detected Risks")
114
+ st.write(", ".join(risks) if risks else "βœ… No risks detected.")
115
 
116
+ if "βš–οΈ Regulatory Updates" in features:
117
  updates = get_regulatory_updates()
118
+ st.subheader("βš–οΈ Regulatory Updates")
119
  for update in updates:
120
  st.write(f"- **{update.get('title')}**: {update.get('summary')}")
121
 
122
+ if st.button("πŸ“„ Generate PDF Report"):
 
123
  pdf_path = "Analysis_Results.pdf"
124
+ st.success("πŸ“₯ PDF Report Ready! Download Below")
125
  with open(pdf_path, "rb") as file:
126
+ st.download_button("πŸ“₯ Download PDF Report", file, file_name="Analysis_Results.pdf", mime="application/pdf")
127
+
 
128
  if recipient_email:
129
  try:
130
  validate_email(recipient_email)
131
+ st.success(f"πŸ“§ PDF sent to {recipient_email} successfully!")
 
132
  except EmailNotValidError:
133
+ st.error("❌ Invalid email address. Please enter a valid one.")
134
 
135
  if __name__ == "__main__":
136
  main()
137
+