Kushalmanda commited on
Commit
99543cd
·
verified ·
1 Parent(s): 5eac03e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +259 -328
app.py CHANGED
@@ -9,10 +9,14 @@ from simple_salesforce import Salesforce
9
  import os
10
  import base64
11
  from io import BytesIO
12
- from datetime import datetime
13
- import pytz
14
 
15
- # Custom CSS for styling with full-page background image
 
 
 
 
16
  css = """
17
  body {
18
  background-image: url('https://images.unsplash.com/photo-1604147706283-d7119b5b822c?ixlib=rb-1.2.1&auto=format&fit=crop&w=1920&q=80');
@@ -20,248 +24,205 @@ body {
20
  background-position: center;
21
  background-attachment: fixed;
22
  background-repeat: no-repeat;
23
- min-height: 100vh; /* Ensures full height */
24
  margin: 0;
25
  padding: 0;
26
  font-family: Arial, sans-serif;
27
  }
28
-
29
  .gradio-container {
30
- background-color: rgba(255, 255, 255, 0.9);
31
  border-radius: 10px;
32
  padding: 20px;
33
  margin: 20px;
34
- max-width: 100%; /* Allow full width */
35
  box-shadow: 0 4px 12px rgba(0,0,0,0.15);
36
- min-height: 90vh; /* Make sure the container fills the screen */
 
37
  }
38
-
39
  .risk-low { color: #28a745; font-weight: bold; }
40
  .risk-medium { color: #ffc107; font-weight: bold; }
41
  .risk-high { color: #dc3545; font-weight: bold; }
42
-
43
- .result-box {
44
- padding: 20px;
45
- border-radius: 5px;
46
- margin-bottom: 20px;
47
- background-color: rgba(255, 255, 255, 0.8);
48
  }
49
-
50
- .penalty-box {
51
- padding: 15px;
52
- border-radius: 5px;
53
- margin-bottom: 15px;
54
  border-left: 5px solid #dc3545;
55
- background-color: rgba(255, 245, 245, 0.8);
56
  }
57
-
58
- .obligation-box {
59
- padding: 15px;
60
- border-radius: 5px;
61
- margin-bottom: 15px;
62
  border-left: 5px solid #ffc107;
63
- background-color: rgba(255, 249, 230, 0.8);
64
  }
65
-
66
- .delay-box {
67
- padding: 15px;
68
- border-radius: 5px;
69
- margin-bottom: 15px;
70
  border-left: 5px solid #17a2b8;
71
- background-color: rgba(230, 249, 255, 0.8);
72
  }
73
-
74
  .combined-risk-container {
75
  display: flex;
76
  flex-direction: column;
77
  gap: 10px;
78
  margin-bottom: 20px;
79
  }
80
-
81
  .risk-row {
82
  display: flex;
83
  align-items: center;
84
  gap: 15px;
85
  padding: 10px;
86
  border-radius: 5px;
87
- background-color: rgba(248, 249, 250, 0.8);
 
88
  }
89
-
90
  .risk-label {
91
  width: 120px;
92
  font-weight: bold;
93
  font-size: 16px;
 
94
  }
95
-
96
  .risk-score {
97
  width: 100px;
98
  font-size: 18px;
99
  text-align: center;
100
  }
101
-
102
  .heatmap-wrapper {
103
  flex-grow: 1;
104
  }
105
-
106
  .warning-box {
107
  padding: 15px;
108
  border-radius: 5px;
109
  margin: 10px 0;
110
- background-color: rgba(255, 243, 205, 0.8);
111
  border-left: 5px solid #ffc107;
112
  font-weight: bold;
113
  }
114
-
115
  .danger-box {
116
  padding: 15px;
117
  border-radius: 5px;
118
  margin: 10px 0;
119
- background-color: rgba(248, 215, 218, 0.8);
120
  border-left: 5px solid #dc3545;
121
  font-weight: bold;
122
  }
123
-
124
  .success-box {
125
  padding: 15px;
126
  border-radius: 5px;
127
  margin: 10px 0;
128
- background-color: rgba(212, 237, 218, 0.8);
129
  border-left: 5px solid #28a745;
130
  font-weight: bold;
131
  }
132
-
133
  .section-title {
134
  font-size: 20px;
135
  font-weight: bold;
136
  margin-bottom: 15px;
137
- color: #333;
138
  }
139
-
140
  .count-item {
141
  display: flex;
142
  justify-content: space-between;
143
  padding: 10px 0;
144
- border-bottom: 1px solid #eee;
145
  }
146
-
147
  .count-label {
148
  font-weight: bold;
 
149
  }
150
-
151
  .count-value {
152
- color: #555;
153
  }
154
-
155
- .section-container {
156
- margin-bottom: 25px;
157
- padding: 15px;
158
- border-radius: 5px;
159
- background-color: rgba(248, 249, 250, 0.8);
160
  }
161
-
162
- .salesforce-success {
163
- padding: 15px;
164
- border-radius: 5px;
165
- background-color: rgba(212, 237, 218, 0.8);
166
- border-left: 5px solid #28a745;
167
- margin-top: 20px;
168
- font-weight: bold;
169
  }
170
-
171
- .salesforce-error {
172
- padding: 15px;
173
- border-radius: 5px;
174
- background-color: rgba(248, 215, 218, 0.8);
175
- border-left: 5px solid #dc3545;
176
- margin-top: 20px;
177
- font-weight: bold;
178
- }
179
-
180
- header {
181
- background-color: rgba(13, 110, 253, 0.85);
182
- color: white;
183
- padding: 20px;
184
- border-radius: 10px;
185
- margin-bottom: 20px;
186
  }
187
-
188
- footer {
189
- text-align: center;
190
- margin-top: 30px;
191
- padding: 15px;
192
- background-color: rgba(0, 0, 0, 0.7);
193
- color: white;
194
- border-radius: 5px;
195
  }
196
-
197
- @media only screen and (max-width: 1200px) {
198
- .gradio-container {
199
- margin: 10px;
200
- }
201
- .risk-row {
202
- flex-direction: column;
203
- align-items: flex-start;
204
- }
205
  }
206
  """
207
 
208
  # Salesforce credentials
209
- USERNAME = 'Kushalpavansekharm503@agentforce.com'
210
- PASSWORD = 'Kushal@123'
211
- SECURITY_TOKEN = 'WwUIFWBVUjeKn9VPKyWJmawY0'
212
 
213
- def get_salesforce_connection():
214
- """Establishes connection to Salesforce"""
215
  try:
216
  sf = Salesforce(
217
- username=USERNAME,
218
- password=PASSWORD,
219
- security_token=SECURITY_TOKEN
220
  )
 
221
  return sf
222
  except Exception as e:
223
- print(f"Salesforce connection error: {str(e)}")
224
- return None
 
 
 
 
 
 
 
 
 
 
225
 
226
- def create_salesforce_record(results: dict):
227
- """Creates record in Salesforce Custom_Risk_Analysis__c object with automatic date handling."""
228
  try:
229
- sf = get_salesforce_connection()
230
- if not sf:
231
- return None, "Failed to connect to Salesforce"
232
-
233
- # Prepare the record data to create a record in Salesforce
234
- record_data = {
235
- 'Risk_Score__c': results['risk_score'],
236
- 'Risk_Level__c': results['risk_level'],
237
- 'Penalty_Examples__c': results['penalty_examples'][:32768], # Truncate to max length
238
- 'Penalty_Amounts__c': results['penalty_amounts'][:255],
239
- 'Penalty_Details__c': results['penalty_details'][:32768],
240
- 'Obligation_Details__c': results['obligation_details'][:32768],
241
- 'Delay_Details__c': results['delay_details'][:32768],
242
- # Do not manually add the 'CreatedDate' field here as it is auto-managed by Salesforce
243
- }
244
-
245
- # Create the record in Salesforce
246
- response = sf.Custom_Risk_Analysis__c.create(record_data)
247
-
248
- # Check the response and return appropriate results
249
- if response.get('success'):
250
- return response.get('id'), None
251
- else:
252
- errors = response.get('errors', [])
253
- error_msg = errors[0]['message'] if errors else "Unknown error"
254
- return None, error_msg
255
  except Exception as e:
256
- return None, f"Salesforce error: {str(e)}"
 
257
 
258
  def extract_text_from_pdf(pdf_path: str) -> str:
259
  """Extract text from PDF using pdfplumber"""
260
- text = ""
261
- with pdfplumber.open(pdf_path) as pdf:
262
- for page in pdf.pages:
263
- text += page.extract_text() or ""
264
- return text
 
 
 
 
 
 
265
 
266
  def count_keywords(text: str, keywords: List[str]) -> Dict[str, int]:
267
  """Count occurrences of keywords in text"""
@@ -278,7 +239,7 @@ def find_penalty_values(text: str) -> List[float]:
278
  r'\d+\s*(?:percent|%)',
279
  r'(?:\b[a-z]+\s*)+dollars',
280
  ]
281
-
282
  penalties = []
283
  for pattern in patterns:
284
  matches = re.finditer(pattern, text, flags=re.IGNORECASE)
@@ -298,7 +259,7 @@ def calculate_risk_score(penalty_count: int, penalty_values: List[float], obliga
298
  """Calculate risk score based on various factors"""
299
  score = 0
300
  score += min(penalty_count * 5, 30)
301
-
302
  if penalty_values:
303
  avg_penalty = sum(penalty_values) / len(penalty_values)
304
  if avg_penalty > 1000000:
@@ -309,11 +270,11 @@ def calculate_risk_score(penalty_count: int, penalty_values: List[float], obliga
309
  score += 15
310
  else:
311
  score += 5
312
-
313
  score += min(obligation_count * 2, 20)
314
  score += min(delay_count * 10, 30)
315
  score = min(score, 100)
316
-
317
  if score < 30:
318
  return score, "Low"
319
  elif score < 70:
@@ -321,229 +282,199 @@ def calculate_risk_score(penalty_count: int, penalty_values: List[float], obliga
321
  else:
322
  return score, "High"
323
 
324
- def generate_combined_risk_display(risk_score: float, risk_level: str) -> Tuple[str, plt.Figure]:
325
- """Generate a combined display with all three risk levels in one layout"""
326
- fig, ax = plt.subplots(figsize=(10, 3))
327
- ax.axis('off')
328
-
329
- risk_levels = ["Low", "Medium", "High"]
330
- colors = ['#28a745', '#ffc107', '#dc3545']
331
-
332
- # Create HTML for the text display
333
- html_parts = []
334
- html_parts.append("<div class='combined-risk-container'>")
335
-
336
- for i, level in enumerate(risk_levels):
337
- active = level == risk_level
338
- score = risk_score if active else 0
339
-
340
- # Add to HTML
341
- html_parts.append(f"""
342
- <div class='risk-row'>
343
- <div class='risk-label risk-{level.lower()}'>{level} Risk</div>
344
- <div class='risk-score risk-{level.lower()}'>{score:.1f}%</div>
345
- <div class='heatmap-wrapper'>
346
- <img src='data:image/png;base64,{create_mini_heatmap(score, colors[i])}' style='width:100%'>
347
- </div>
348
- </div>
349
- """)
350
-
351
- html_parts.append("</div>")
352
-
353
- return "\n".join(html_parts), fig
354
-
355
- def create_mini_heatmap(score: float, color: str) -> str:
356
- """Create a small heatmap for one risk level"""
357
- fig, ax = plt.subplots(figsize=(8, 0.5))
358
-
359
- if score > 0:
360
- gradient = np.linspace(0, score/100, 256).reshape(1, -1)
361
- else:
362
- gradient = np.zeros((1, 256))
363
-
364
- gradient = np.vstack((gradient, gradient))
365
-
366
- ax.imshow(gradient, aspect='auto', cmap=plt.cm.colors.LinearSegmentedColormap.from_list('custom', ['white', color]))
367
- ax.set_axis_off()
368
- plt.tight_layout()
369
-
370
- # Save to base64 string
371
- buf = BytesIO()
372
- plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
373
- plt.close(fig)
374
- return base64.b64encode(buf.getvalue()).decode('utf-8')
375
-
376
- def format_warning_message(count: int, items: str, item_type: str) -> str:
377
- """Format warning message based on count"""
378
- if count == 0:
379
- return f"""<div class="success-box">✓ No {item_type} clauses detected - Good!</div>"""
380
- elif count < 5:
381
- return f"""<div class="warning-box">⚠️ {count} {item_type} clauses detected</div>"""
382
- else:
383
- return f"""<div class="danger-box">⚠️⚠️ {count} {item_type} clauses detected - High Risk!</div>"""
384
 
385
- def create_vertical_count_display(counts: Dict[str, int], total: int, section_type: str) -> str:
386
- """Create a vertical display for counts with section styling"""
387
- items_html = []
388
- for kw, count in counts.items():
389
- items_html.append(f"""
390
- <div class="count-item">
391
- <span class="count-label">{kw.title()}:</span>
392
- <span class="count-value">{count}</span>
393
- </div>
394
- """)
395
-
396
- return f"""
397
- <div class="section-container {section_type}-box">
398
- <div class="section-title">{section_type.title()} Analysis</div>
399
- <div style="margin-bottom: 10px;">
400
- <span style="font-weight: bold;">Total:</span>
401
- <span style="margin-left: 10px;">{total}</span>
402
- </div>
403
- {"".join(items_html)}
404
- </div>
405
- """
406
 
407
  def analyze_pdf(file_obj) -> List:
408
  """Main analysis function for Gradio interface"""
409
  try:
410
- # Extract text from the uploaded file
411
- text = extract_text_from_pdf(file_obj.name)
412
-
413
- # Define keywords to search for
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  penalty_keywords = ["penalty", "fine", "forfeit", "liquidated damages", "breach"]
415
  obligation_keywords = ["shall", "must", "required to", "obligated to", "duty"]
416
  delay_keywords = ["delay", "late", "overdue", "extension", "time is of the essence"]
417
-
418
- # Count keyword occurrences
419
  penalty_counts = count_keywords(text, penalty_keywords)
420
  obligation_counts = count_keywords(text, obligation_keywords)
421
  delay_counts = count_keywords(text, delay_keywords)
422
-
423
- # Find penalty values
424
  penalty_values = find_penalty_values(text)
425
-
426
- # Calculate total counts
427
  total_penalties = sum(penalty_counts.values())
428
  total_obligations = sum(obligation_counts.values())
429
  total_delays = sum(delay_counts.values())
430
-
431
- # Calculate risk score
432
- risk_score, risk_level = calculate_risk_score(
433
- total_penalties, penalty_values, total_obligations, total_delays
434
- )
435
-
436
- # Generate combined risk display
437
- risk_display, _ = generate_combined_risk_display(risk_score, risk_level)
438
-
439
- # Prepare warning/success messages
440
- penalty_warning = format_warning_message(total_penalties, "penalty", "penalty")
441
- obligation_warning = format_warning_message(total_obligations, "obligation", "obligation")
442
- delay_warning = format_warning_message(total_delays, "delay", "delay")
443
-
444
- # Create vertical displays
445
- penalty_display = create_vertical_count_display(penalty_counts, total_penalties, "penalty")
446
- obligation_display = create_vertical_count_display(obligation_counts, total_obligations, "obligation")
447
- delay_display = create_vertical_count_display(delay_counts, total_delays, "delay")
448
-
449
- # Combine warnings with displays
450
- penalty_output = f"{penalty_warning}\n{penalty_display}"
451
- obligation_output = f"{obligation_warning}\n{obligation_display}"
452
- delay_output = f"{delay_warning}\n{delay_display}"
453
-
454
- penalty_amounts = "\n".join([f"- ${amt:,.2f}" for amt in penalty_values[:5]]) if penalty_values else "No specific penalty amounts found"
455
-
456
- # Find example sentences with penalties
457
  penalty_sentences = []
458
  for sentence in re.split(r'(?<=[.!?])\s+', text):
459
  if any(kw.lower() in sentence.lower() for kw in penalty_keywords):
460
  penalty_sentences.append(sentence.strip())
461
-
462
  penalty_examples = "\n\n".join([f"{i+1}. {sent}" for i, sent in enumerate(penalty_sentences[:3])]) if penalty_sentences else "No penalty clauses found"
463
-
464
- # Prepare Salesforce payload
465
- salesforce_data = {
 
466
  'risk_score': risk_score,
467
  'risk_level': risk_level,
 
468
  'penalty_examples': penalty_examples,
469
- 'penalty_amounts': penalty_amounts,
470
- 'penalty_details': f"Total Penalties: {total_penalties}\n\n" + "\n".join([f"{kw}: {count}" for kw, count in penalty_counts.items()]),
471
- 'obligation_details': f"Total Obligations: {total_obligations}\n\n" + "\n".join([f"{kw}: {count}" for kw, count in obligation_counts.items()]),
472
- 'delay_details': f"Total Delays: {total_delays}\n\n" + "\n".join([f"{kw}: {count}" for kw, count in delay_counts.items()])
473
  }
474
-
475
- # Create Salesforce record
476
- record_id, error = create_salesforce_record(salesforce_data)
477
-
478
- # **Removed the Record ID display in the output**
479
-
 
 
 
480
  return [
481
- risk_display,
482
- penalty_output,
483
- penalty_amounts,
484
- obligation_output,
485
- delay_output,
486
- penalty_examples,
 
 
 
487
  ]
488
  except Exception as e:
489
- error_msg = f"<div class='salesforce-error'>❌ Processing error: {str(e)}</div>"
490
- return [error_msg] * 7
 
491
 
492
- # Create Gradio interface with full-page layout
493
- with gr.Blocks(css=css, title="PDF Contract Risk Analyzer") as demo:
494
  gr.Markdown("""
495
- <header>
496
- <h1 style="text-align: center; margin: 0;">📄 PDF Contract Risk Analyzer</h1>
497
- <p style="text-align: center; margin: 10px 0 0;">Upload a contract PDF to analyze penalties, obligations, and delays</p>
498
- </header>
499
  """)
500
-
501
  with gr.Row():
502
  with gr.Column(scale=1):
503
- file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
504
  submit_btn = gr.Button("Analyze PDF", variant="primary")
505
-
506
- with gr.Column(scale=2):
507
- gr.Markdown("### 🔍 Overall Risk Assessment")
508
- risk_display = gr.HTML(label="Risk Analysis")
509
-
 
 
 
510
  with gr.Row():
511
- with gr.Column(scale=1):
512
- gr.Markdown("### 📊 Penalties Analysis")
513
  penalty_count = gr.HTML(label="Penalty Clauses")
514
- gr.Markdown("### Penalty Amounts")
515
- penalty_amounts = gr.Textbox(label="", lines=5)
516
-
517
- with gr.Column(scale=1):
518
- gr.Markdown("### ⚖️ Obligations Analysis")
519
  obligation_count = gr.HTML(label="Obligation Clauses")
520
-
521
- with gr.Column(scale=1):
522
- gr.Markdown("### ⏱️ Delays Analysis")
523
  delay_count = gr.HTML(label="Delay Clauses")
524
-
 
 
 
525
  with gr.Row():
526
- gr.Markdown("### 🔎 Extracted Penalty Clauses")
527
- penalty_examples = gr.Textbox(label="Example Penalty Clauses", lines=5)
528
-
529
- # Footer
530
- #gr.Markdown("""
531
- #<footer>
532
- #<p>Contract Risk Analyzer • Securely analyze legal documents</p>
533
- #<p>All analyses are stored securely in Salesforce</p>
534
- #</footer>
535
- #""")
536
-
537
  submit_btn.click(
538
  fn=analyze_pdf,
539
  inputs=file_input,
540
  outputs=[
541
- risk_display,
542
- penalty_count,
543
- penalty_amounts,
544
- obligation_count,
545
- delay_count,
546
- penalty_examples,
547
  ]
548
  )
549
 
 
9
  import os
10
  import base64
11
  from io import BytesIO
12
+ import uuid
13
+ import logging
14
 
15
+ # Set up logging
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Custom CSS for styling with blue theme and hidden elements
20
  css = """
21
  body {
22
  background-image: url('https://images.unsplash.com/photo-1604147706283-d7119b5b822c?ixlib=rb-1.2.1&auto=format&fit=crop&w=1920&q=80');
 
24
  background-position: center;
25
  background-attachment: fixed;
26
  background-repeat: no-repeat;
27
+ min-height: 100vh;
28
  margin: 0;
29
  padding: 0;
30
  font-family: Arial, sans-serif;
31
  }
 
32
  .gradio-container {
33
+ background-color: rgba(240, 248, 255, 0.95) !important;
34
  border-radius: 10px;
35
  padding: 20px;
36
  margin: 20px;
37
+ max-width: 100%;
38
  box-shadow: 0 4px 12px rgba(0,0,0,0.15);
39
+ min-height: 90vh;
40
+ border: 1px solid #1e90ff !important;
41
  }
 
42
  .risk-low { color: #28a745; font-weight: bold; }
43
  .risk-medium { color: #ffc107; font-weight: bold; }
44
  .risk-high { color: #dc3545; font-weight: bold; }
45
+ .result-box {
46
+ padding: 20px;
47
+ border-radius: 5px;
48
+ margin-bottom: 20px;
49
+ background-color: rgba(240, 248, 255, 0.9);
50
+ border-left: 5px solid #1e90ff !important;
51
  }
52
+ .penalty-box {
53
+ padding: 15px;
54
+ border-radius: 5px;
55
+ margin-bottom: 15px;
 
56
  border-left: 5px solid #dc3545;
57
+ background-color: rgba(255, 245, 245, 0.9);
58
  }
59
+ .obligation-box {
60
+ padding: 15px;
61
+ border-radius: 5px;
62
+ margin-bottom: 15px;
 
63
  border-left: 5px solid #ffc107;
64
+ background-color: rgba(255, 249, 230, 0.9);
65
  }
66
+ .delay-box {
67
+ padding: 15px;
68
+ border-radius: 5px;
69
+ margin-bottom: 15px;
 
70
  border-left: 5px solid #17a2b8;
71
+ background-color: rgba(230, 249, 255, 0.9);
72
  }
 
73
  .combined-risk-container {
74
  display: flex;
75
  flex-direction: column;
76
  gap: 10px;
77
  margin-bottom: 20px;
78
  }
 
79
  .risk-row {
80
  display: flex;
81
  align-items: center;
82
  gap: 15px;
83
  padding: 10px;
84
  border-radius: 5px;
85
+ background-color: rgba(240, 248, 255, 0.8);
86
+ border: 1px solid #add8e6 !important;
87
  }
 
88
  .risk-label {
89
  width: 120px;
90
  font-weight: bold;
91
  font-size: 16px;
92
+ color: #1e4b8f !important;
93
  }
 
94
  .risk-score {
95
  width: 100px;
96
  font-size: 18px;
97
  text-align: center;
98
  }
 
99
  .heatmap-wrapper {
100
  flex-grow: 1;
101
  }
 
102
  .warning-box {
103
  padding: 15px;
104
  border-radius: 5px;
105
  margin: 10px 0;
106
+ background-color: rgba(255, 243, 205, 0.9);
107
  border-left: 5px solid #ffc107;
108
  font-weight: bold;
109
  }
 
110
  .danger-box {
111
  padding: 15px;
112
  border-radius: 5px;
113
  margin: 10px 0;
114
+ background-color: rgba(248, 215, 218, 0.9);
115
  border-left: 5px solid #dc3545;
116
  font-weight: bold;
117
  }
 
118
  .success-box {
119
  padding: 15px;
120
  border-radius: 5px;
121
  margin: 10px 0;
122
+ background-color: rgba(212, 237, 218, 0.9);
123
  border-left: 5px solid #28a745;
124
  font-weight: bold;
125
  }
 
126
  .section-title {
127
  font-size: 20px;
128
  font-weight: bold;
129
  margin-bottom: 15px;
130
+ color: #1e4b8f !important;
131
  }
 
132
  .count-item {
133
  display: flex;
134
  justify-content: space-between;
135
  padding: 10px 0;
136
+ border-bottom: 1px solid #add8e6 !important;
137
  }
 
138
  .count-label {
139
  font-weight: bold;
140
+ color: #1e4b8f !important;
141
  }
 
142
  .count-value {
143
+ color: #4169e1 !important;
144
  }
145
+ button {
146
+ background: linear-gradient(135deg, #1e90ff, #4169e1) !important;
147
+ border: none !important;
148
+ color: white !important;
149
+ font-weight: bold !important;
 
150
  }
151
+ button:hover {
152
+ background: linear-gradient(135deg, #4169e1, #1e90ff) !important;
 
 
 
 
 
 
153
  }
154
+ .upload-area {
155
+ border: 2px dashed #1e90ff !important;
156
+ background-color: rgba(240, 248, 255, 0.7) !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  }
158
+ .upload-area:hover {
159
+ background-color: rgba(224, 255, 255, 0.7) !important;
 
 
 
 
 
 
160
  }
161
+ /* Hide elements */
162
+ footer, .gradio-footer, .hide, [data-testid="Use via API"], [data-testid="mmsettings"] {
163
+ display: none !important;
164
+ visibility: hidden !important;
165
+ height: 0 !important;
166
+ width: 0 !important;
167
+ padding: 0 !important;
168
+ margin: 0 !important;
 
169
  }
170
  """
171
 
172
  # Salesforce credentials
173
+ SF_USERNAME = "Kushalpavansekharm503@agentforce.com"
174
+ SF_PASSWORD = "Kushal@123"
175
+ SF_TOKEN = "WwUIFWBVUjeKn9VPKyWJmawY0"
176
 
177
+ def authenticate_salesforce() -> Salesforce:
178
+ """Authenticate with Salesforce and return a Salesforce client"""
179
  try:
180
  sf = Salesforce(
181
+ username=SF_USERNAME,
182
+ password=SF_PASSWORD,
183
+ security_token=SF_TOKEN
184
  )
185
+ logger.info("Successfully authenticated with Salesforce")
186
  return sf
187
  except Exception as e:
188
+ logger.error(f"Failed to authenticate with Salesforce: {str(e)}")
189
+ raise Exception(f"Salesforce authentication failed: {str(e)}")
190
+
191
+ # Warning message formatting
192
+ def format_warning_message(count: int, items: str, item_type: str) -> str:
193
+ """Format warning message based on count"""
194
+ if count == 0:
195
+ return f"""<div class="success-box">✓ No {item_type} clauses detected - Good!</div>"""
196
+ elif count < 5:
197
+ return f"""<div class="warning-box">⚠️ {count} {item_type} clauses detected</div>"""
198
+ else:
199
+ return f"""<div class="danger-box">⚠️⚠️ {count} {item_type} clauses detected - High Risk!</div>"""
200
 
201
+ def get_hugging_face_sentiment(text: str) -> float:
202
+ """Get sentiment score using Hugging Face model"""
203
  try:
204
+ from transformers import pipeline
205
+ classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
206
+ result = classifier(text[:512])[0]
207
+ score = result['score'] if result['label'] == 'POSITIVE' else 1 - result['score']
208
+ return round(score, 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  except Exception as e:
210
+ logger.error(f"Hugging Face sentiment analysis failed: {str(e)}. Using fallback score.")
211
+ return 0.5
212
 
213
  def extract_text_from_pdf(pdf_path: str) -> str:
214
  """Extract text from PDF using pdfplumber"""
215
+ try:
216
+ text = ""
217
+ with pdfplumber.open(pdf_path) as pdf:
218
+ for page in pdf.pages:
219
+ page_text = page.extract_text()
220
+ if page_text:
221
+ text += page_text
222
+ return text
223
+ except Exception as e:
224
+ logger.error(f"PDF text extraction failed: {str(e)}")
225
+ raise Exception(f"PDF text extraction failed: {str(e)}")
226
 
227
  def count_keywords(text: str, keywords: List[str]) -> Dict[str, int]:
228
  """Count occurrences of keywords in text"""
 
239
  r'\d+\s*(?:percent|%)',
240
  r'(?:\b[a-z]+\s*)+dollars',
241
  ]
242
+
243
  penalties = []
244
  for pattern in patterns:
245
  matches = re.finditer(pattern, text, flags=re.IGNORECASE)
 
259
  """Calculate risk score based on various factors"""
260
  score = 0
261
  score += min(penalty_count * 5, 30)
262
+
263
  if penalty_values:
264
  avg_penalty = sum(penalty_values) / len(penalty_values)
265
  if avg_penalty > 1000000:
 
270
  score += 15
271
  else:
272
  score += 5
273
+
274
  score += min(obligation_count * 2, 20)
275
  score += min(delay_count * 10, 30)
276
  score = min(score, 100)
277
+
278
  if score < 30:
279
  return score, "Low"
280
  elif score < 70:
 
282
  else:
283
  return score, "High"
284
 
285
+ def generate_heatmap(risk_level: str):
286
+ """Generate a simple heatmap based on risk level"""
287
+ try:
288
+ fig, ax = plt.subplots(figsize=(8, 2))
289
+
290
+ if risk_level == "Low":
291
+ cmap = plt.cm.Blues
292
+ elif risk_level == "Medium":
293
+ cmap = plt.cm.Oranges
294
+ else:
295
+ cmap = plt.cm.Reds
296
+
297
+ gradient = np.linspace(0, 1, 256).reshape(1, -1)
298
+ gradient = np.vstack((gradient, gradient))
299
+
300
+ ax.imshow(gradient, aspect='auto', cmap=cmap)
301
+ ax.text(128, 0.5, f"{risk_level} Risk", color='white' if risk_level in ["High", "Medium"] else 'black',
302
+ ha='center', va='center', fontsize=24, fontweight='bold')
303
+
304
+ ax.set_axis_off()
305
+ plt.tight_layout()
306
+ return fig
307
+ except Exception as e:
308
+ logger.error(f"Heatmap generation failed: {str(e)}")
309
+ raise Exception(f"Heatmap generation failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
 
311
+ def save_to_salesforce(sf: Salesforce, data: Dict):
312
+ """Save analysis results to Salesforce Custom_Risk_Analysis__c object"""
313
+ try:
314
+ record = {
315
+ 'Sentiment_Score__c': data['sentiment_score'],
316
+ 'Risk_Score__c': data['risk_score'],
317
+ 'Risk_Level__c': data['risk_level'],
318
+ 'Record_Id__c': data['record_id'],
319
+ 'Penalty_Examples__c': data['penalty_examples'][:131072],
320
+ 'Penalty_Details__c': data['penalty_details'][:131072],
321
+ 'Penalty_Amounts__c': data['penalty_amounts'][:131072],
322
+ 'Obligation_Details__c': data['obligation_details'][:131072],
323
+ 'Delay_Details__c': data['delay_details'][:131072]
324
+ }
325
+ result = sf.Custom_Risk_Analysis__c.create(record)
326
+ logger.info(f"Successfully created Salesforce record: {result['id']}")
327
+ return result['id']
328
+ except Exception as e:
329
+ logger.error(f"Failed to save to Salesforce: {str(e)}")
330
+ raise Exception(f"Salesforce record creation failed: {str(e)}")
 
331
 
332
  def analyze_pdf(file_obj) -> List:
333
  """Main analysis function for Gradio interface"""
334
  try:
335
+ if not file_obj:
336
+ raise Exception("No PDF file uploaded. Please upload a valid PDF file.")
337
+
338
+ try:
339
+ sf = authenticate_salesforce()
340
+ except Exception as e:
341
+ raise Exception(f"Salesforce authentication failed: {str(e)}")
342
+
343
+ try:
344
+ text = extract_text_from_pdf(file_obj.name)
345
+ if not text.strip():
346
+ raise Exception("No text extracted from PDF. It might be a scanned document.")
347
+ except Exception as e:
348
+ raise Exception(f"PDF text extraction failed: {str(e)}")
349
+
350
+ try:
351
+ sentiment_score = get_hugging_face_sentiment(text)
352
+ except Exception as e:
353
+ logger.warning(f"Sentiment analysis failed: {str(e)}. Using fallback score of 0.5.")
354
+ sentiment_score = 0.5
355
+
356
  penalty_keywords = ["penalty", "fine", "forfeit", "liquidated damages", "breach"]
357
  obligation_keywords = ["shall", "must", "required to", "obligated to", "duty"]
358
  delay_keywords = ["delay", "late", "overdue", "extension", "time is of the essence"]
359
+
 
360
  penalty_counts = count_keywords(text, penalty_keywords)
361
  obligation_counts = count_keywords(text, obligation_keywords)
362
  delay_counts = count_keywords(text, delay_keywords)
363
+
 
364
  penalty_values = find_penalty_values(text)
365
+
 
366
  total_penalties = sum(penalty_counts.values())
367
  total_obligations = sum(obligation_counts.values())
368
  total_delays = sum(delay_counts.values())
369
+
370
+ try:
371
+ risk_score, risk_level = calculate_risk_score(
372
+ total_penalties, penalty_values, total_obligations, total_delays
373
+ )
374
+ except Exception as e:
375
+ raise Exception(f"Risk score calculation failed: {str(e)}")
376
+
377
+ try:
378
+ heatmap = generate_heatmap(risk_level)
379
+ except Exception as e:
380
+ raise Exception(f"Heatmap generation failed: {str(e)}")
381
+
382
+ penalty_details = "\n".join([f"<div class='count-item'><span class='count-label'>{kw}</span><span class='count-value'>{count}</span></div>" for kw, count in penalty_counts.items()])
383
+ obligation_details = "\n".join([f"<div class='count-item'><span class='count-label'>{kw}</span><span class='count-value'>{count}</span></div>" for kw, count in obligation_counts.items()])
384
+ delay_details = "\n".join([f"<div class='count-item'><span class='count-label'>{kw}</span><span class='count-value'>{count}</span></div>" for kw, count in delay_counts.items()])
385
+
386
+ penalty_amounts = "\n".join([f"<div class='count-item'><span class='count-label'>Amount</span><span class='count-value'>${amt:,.2f}</span></div>" for amt in penalty_values[:5]]) if penalty_values else "<div class='count-item'><span class='count-label'>No penalty amounts found</span><span class='count-value'>0</span></div>"
387
+
 
 
 
 
 
 
 
 
388
  penalty_sentences = []
389
  for sentence in re.split(r'(?<=[.!?])\s+', text):
390
  if any(kw.lower() in sentence.lower() for kw in penalty_keywords):
391
  penalty_sentences.append(sentence.strip())
392
+
393
  penalty_examples = "\n\n".join([f"{i+1}. {sent}" for i, sent in enumerate(penalty_sentences[:3])]) if penalty_sentences else "No penalty clauses found"
394
+
395
+ record_id = str(uuid.uuid4())
396
+ sf_data = {
397
+ 'sentiment_score': sentiment_score,
398
  'risk_score': risk_score,
399
  'risk_level': risk_level,
400
+ 'record_id': record_id,
401
  'penalty_examples': penalty_examples,
402
+ 'penalty_details': "\n".join([f"{kw}: {count}" for kw, count in penalty_counts.items()]),
403
+ 'penalty_amounts': "\n".join([f"${amt:,.2f}" for amt in penalty_values[:5]]) if penalty_values else "No specific penalty amounts found",
404
+ 'obligation_details': "\n".join([f"{kw}: {count}" for kw, count in obligation_counts.items()]),
405
+ 'delay_details': "\n".join([f"{kw}: {count}" for kw, count in delay_counts.items()])
406
  }
407
+
408
+ try:
409
+ salesforce_record_id = save_to_salesforce(sf, sf_data)
410
+ logger.info(f"Saved to Salesforce with ID: {salesforce_record_id}")
411
+ except Exception as e:
412
+ logger.error(f"Salesforce record creation failed: {str(e)}")
413
+
414
+ box_class = "success-box" if risk_level == "Low" else "warning-box" if risk_level == "Medium" else "danger-box"
415
+
416
  return [
417
+ f"<div class='risk-row'><span class='risk-label'>Risk Score</span><span class='risk-score risk-{risk_level.lower()}'>{risk_score:.1f}/100</span></div>",
418
+ f"<div class='risk-row'><span class='risk-label'>Risk Level</span><span class='risk-score risk-{risk_level.lower()}'>{risk_level}</span></div>",
419
+ heatmap,
420
+ f"<div class='penalty-box'><div class='section-title'>Penalty Clauses (Total: {total_penalties})</div>{penalty_details}</div>",
421
+ f"<div class='penalty-box'><div class='section-title'>Penalty Amounts ({len(penalty_values)} found)</div>{penalty_amounts}</div>",
422
+ f"<div class='obligation-box'><div class='section-title'>Obligation Clauses (Total: {total_obligations})</div>{obligation_details}</div>",
423
+ f"<div class='delay-box'><div class='section-title'>Delay Clauses (Total: {total_delays})</div>{delay_details}</div>",
424
+ f"<div class='result-box'><div class='section-title'>Example Penalty Clauses</div>{penalty_examples}</div>",
425
+ f"<div class='{box_class}'><div class='section-title'>Sentiment Analysis</div>Sentiment Score: {sentiment_score}</div>"
426
  ]
427
  except Exception as e:
428
+ logger.error(f"Analysis failed: {str(e)}")
429
+ error_message = f"<div class='danger-box'>Error: {str(e)}</div>"
430
+ return [error_message] * 9
431
 
432
+ # Create Gradio interface with blue theme and hidden elements
433
+ with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Default(primary_hue="blue")) as demo:
434
  gr.Markdown("""
435
+ <div style='text-align: center;'>
436
+ <h1 style='color: #1e4b8f;'>📄 PDF Contract Risk Analyzer</h1>
437
+ <p style='color: #4169e1;'>Upload a contract PDF to analyze penalties, obligations, delays, and sentiment.</p>
438
+ </div>
439
  """)
440
+
441
  with gr.Row():
442
  with gr.Column(scale=1):
443
+ file_input = gr.File(label="Upload PDF", file_types=[".pdf"], elem_classes="upload-area")
444
  submit_btn = gr.Button("Analyze PDF", variant="primary")
445
+
446
+ with gr.Column(scale=3):
447
+ gr.Markdown("<div class='section-title'>🔍 Overall Risk Assessment</div>")
448
+ with gr.Group(elem_classes="combined-risk-container"):
449
+ risk_score = gr.HTML(label="Risk Score")
450
+ risk_level = gr.HTML(label="Risk Level")
451
+ heatmap = gr.Plot(label="Risk Heatmap", elem_classes="heatmap-wrapper")
452
+
453
  with gr.Row():
454
+ with gr.Column():
 
455
  penalty_count = gr.HTML(label="Penalty Clauses")
456
+ penalty_amounts = gr.HTML(label="Penalty Amounts")
457
+
458
+ with gr.Column():
 
 
459
  obligation_count = gr.HTML(label="Obligation Clauses")
460
+
461
+ with gr.Column():
 
462
  delay_count = gr.HTML(label="Delay Clauses")
463
+
464
+ with gr.Row():
465
+ penalty_examples = gr.HTML(label="Example Penalty Clauses")
466
+
467
  with gr.Row():
468
+ additional_results = gr.HTML(label="Sentiment Analysis")
469
+
 
 
 
 
 
 
 
 
 
470
  submit_btn.click(
471
  fn=analyze_pdf,
472
  inputs=file_input,
473
  outputs=[
474
+ risk_score, risk_level, heatmap,
475
+ penalty_count, penalty_amounts,
476
+ obligation_count, delay_count,
477
+ penalty_examples, additional_results
 
 
478
  ]
479
  )
480