Kushalmanda commited on
Commit
43f587a
Β·
verified Β·
1 Parent(s): 6806377

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -87
app.py CHANGED
@@ -389,7 +389,7 @@ def generate_sentiment_pdf(sentiment_score: float) -> BytesIO:
389
  doc = SimpleDocTemplate(buffer, pagesize=letter)
390
  styles = getSampleStyleSheet()
391
  story = []
392
-
393
  story.append(Paragraph("Sentiment Analysis Report", styles['Title']))
394
  story.append(Spacer(1, 12))
395
  sentiment_text = f"Sentiment Score: {sentiment_score:.2f} (Scale: 0.0 Negative to 1.0 Positive)"
@@ -397,7 +397,7 @@ def generate_sentiment_pdf(sentiment_score: float) -> BytesIO:
397
  story.append(Spacer(1, 12))
398
  interpretation = "Positive" if sentiment_score > 0.6 else "Negative" if sentiment_score < 0.4 else "Neutral"
399
  story.append(Paragraph(f"Interpretation: {interpretation}", styles['Normal']))
400
-
401
  doc.build(story)
402
  buffer.seek(0)
403
  return buffer
@@ -405,10 +405,9 @@ def generate_sentiment_pdf(sentiment_score: float) -> BytesIO:
405
  logger.error(f"PDF generation failed: {str(e)}")
406
  raise Exception(f"PDF generation failed: {str(e)}")
407
 
408
- def save_to_salesforce(sf: Salesforce, data: Dict) -> Dict:
409
- """Save analysis results and PDF to Salesforce, return record and file IDs"""
410
  try:
411
- # Save Custom_Risk_Analysis__c record
412
  record = {
413
  'Sentiment_Score__c': data['sentiment_score'],
414
  'Risk_Score__c': data['risk_score'],
@@ -422,36 +421,10 @@ def save_to_salesforce(sf: Salesforce, data: Dict) -> Dict:
422
  }
423
  result = sf.Custom_Risk_Analysis__c.create(record)
424
  logger.info(f"Successfully created Salesforce record: {result['id']}")
425
-
426
- # Generate and save PDF as ContentVersion
427
- pdf_buffer = generate_sentiment_pdf(data['sentiment_score'])
428
- pdf_data = base64.b64encode(pdf_buffer.getvalue()).decode('utf-8')
429
-
430
- content_version = {
431
- 'Title': f"Sentiment_Analysis_{data['record_id']}.pdf",
432
- 'PathOnClient': f"Sentiment_Analysis_{data['record_id']}.pdf",
433
- 'VersionData': pdf_data,
434
- 'FirstPublishLocationId': result['id']
435
- }
436
- file_result = sf.ContentVersion.create(content_version)
437
- logger.info(f"Successfully uploaded PDF to Salesforce: {file_result['id']}")
438
-
439
- # Get the ContentDocumentId
440
- content_version_data = sf.ContentVersion.get(file_result['id'])
441
- content_document_id = content_version_data['ContentDocumentId']
442
-
443
- # Construct the download URL
444
- instance_url = sf.base_url.rstrip('/s')
445
- download_url = f"{instance_url}/sfc/servlet.shepherd/version/download/{file_result['id']}"
446
-
447
- return {
448
- 'record_id': result['id'],
449
- 'file_id': file_result['id'],
450
- 'download_url': download_url
451
- }
452
  except Exception as e:
453
  logger.error(f"Failed to save to Salesforce: {str(e)}")
454
- raise Exception(f"Salesforce record or file creation failed: {str(e)}")
455
 
456
  def extract_text_from_pdf(pdf_path: str) -> str:
457
  """Extract text from PDF using pdfplumber"""
@@ -482,7 +455,7 @@ def find_penalty_values(text: str) -> List[float]:
482
  r'\d+\s*(?:percent|%)',
483
  r'(?:\b[a-z]+\s*)+dollars',
484
  ]
485
-
486
  penalties = []
487
  for pattern in patterns:
488
  matches = re.finditer(pattern, text, flags=re.IGNORECASE)
@@ -502,7 +475,7 @@ def calculate_risk_score(penalty_count: int, penalty_values: List[float], obliga
502
  """Calculate risk score based on various factors"""
503
  score = 0
504
  score += min(penalty_count * 5, 30)
505
-
506
  if penalty_values:
507
  avg_penalty = sum(penalty_values) / len(penalty_values)
508
  if avg_penalty > 1000000:
@@ -513,11 +486,11 @@ def calculate_risk_score(penalty_count: int, penalty_values: List[float], obliga
513
  score += 15
514
  else:
515
  score += 5
516
-
517
  score += min(obligation_count * 2, 20)
518
  score += min(delay_count * 10, 30)
519
  score = min(score, 100)
520
-
521
  if score < 30:
522
  return score, "Low"
523
  elif score < 70:
@@ -557,7 +530,7 @@ def generate_heatmap(risk_level: str):
557
  """Generate a simple heatmap based on risk level"""
558
  try:
559
  fig, ax = plt.subplots(figsize=(8, 2))
560
-
561
  if risk_level == "Low":
562
  cmap = plt.cm.Blues
563
  color = '#4CAF50'
@@ -567,14 +540,14 @@ def generate_heatmap(risk_level: str):
567
  else:
568
  cmap = plt.cm.Reds
569
  color = '#F44336'
570
-
571
  gradient = np.linspace(0, 1, 256).reshape(1, -1)
572
  gradient = np.vstack((gradient, gradient))
573
-
574
  ax.imshow(gradient, aspect='auto', cmap=cmap)
575
  ax.text(128, 0.5, f"{risk_level} Risk", color='white' if risk_level in ["High", "Medium"] else 'black',
576
  ha='center', va='center', fontsize=24, fontweight='bold')
577
-
578
  ax.set_axis_off()
579
  plt.tight_layout()
580
  return fig
@@ -602,7 +575,7 @@ def format_clause_example(example: str, index: int) -> str:
602
  </div>
603
  """
604
 
605
- def analyze_pdf(file_obj) -> List:
606
  """Main analysis function for Gradio interface"""
607
  try:
608
  if not file_obj:
@@ -629,17 +602,17 @@ def analyze_pdf(file_obj) -> List:
629
  penalty_keywords = ["penalty", "fine", "forfeit", "liquidated damages", "breach"]
630
  obligation_keywords = ["shall", "must", "required to", "obligated to", "duty"]
631
  delay_keywords = ["delay", "late", "overdue", "extension", "time is of the essence"]
632
-
633
  penalty_counts = count_keywords(text, penalty_keywords)
634
  obligation_counts = count_keywords(text, obligation_keywords)
635
  delay_counts = count_keywords(text, delay_keywords)
636
-
637
  penalty_values = find_penalty_values(text)
638
-
639
  total_penalties = sum(penalty_counts.values())
640
  total_obligations = sum(obligation_counts.values())
641
  total_delays = sum(delay_counts.values())
642
-
643
  # Generate warning messages with emojis
644
  penalty_warning = format_warning_message(total_penalties, "penalty", "πŸ’°")
645
  obligation_warning = format_warning_message(total_obligations, "obligation", "πŸ“")
@@ -685,14 +658,14 @@ def analyze_pdf(file_obj) -> List:
685
  """
686
 
687
  penalty_amounts = "\n".join([f"<div class='count-item'><span class='count-label'>πŸ’° Amount</span><span class='count-value'>${amt:,.2f}</span></div>" for amt in penalty_values[:5]]) if penalty_values else "<div class='success-box'>βœ… No specific penalty amounts found - This is good news!</div>"
688
-
689
  penalty_sentences = []
690
  for sentence in re.split(r'(?<=[.!?])\s+', text):
691
  if any(kw.lower() in sentence.lower() for kw in penalty_keywords):
692
  penalty_sentences.append(sentence.strip())
693
-
694
  extracted_data = "\n".join([format_clause_example(sent, i+1) for i, sent in enumerate(penalty_sentences[:3])]) if penalty_sentences else "<div class='success-box'>βœ… No penalty clauses found - Excellent contract terms!</div>"
695
-
696
  record_id = str(uuid.uuid4())
697
  sf_data = {
698
  'sentiment_score': sentiment_score,
@@ -705,15 +678,12 @@ def analyze_pdf(file_obj) -> List:
705
  'obligation_details': "\n".join([f"{kw}: {count}" for kw, count in obligation_counts.items()]),
706
  'delay_details': "\n".join([f"{kw}: {count}" for kw, count in delay_counts.items()])
707
  }
708
-
709
  try:
710
- salesforce_result = save_to_salesforce(sf, sf_data)
711
- salesforce_record_id = salesforce_result['record_id']
712
- download_url = salesforce_result['download_url']
713
- logger.info(f"Saved to Salesforce with Record ID: {salesforce_record_id}, PDF URL: {download_url}")
714
  except Exception as e:
715
  logger.error(f"Salesforce record creation failed: {str(e)}")
716
- download_url = None
717
 
718
  box_class = "success-box" if risk_level == "Low" else "warning-box" if risk_level == "Medium" else "danger-box"
719
  risk_icon = "βœ…" if risk_level == "Low" else "⚠" if risk_level == "Medium" else "🚨"
@@ -722,8 +692,25 @@ def analyze_pdf(file_obj) -> List:
722
  "Medium": "This contract has moderate risk. Careful review advised.",
723
  "High": "This contract is high risk! Immediate legal review required."
724
  }
725
-
726
- # Add sentiment analysis PDF download link
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
727
  sentiment_analysis_output = f"""
728
  <div class='result-box'>
729
  <div class='section-title'>πŸ“Š Sentiment Analysis</div>
@@ -732,35 +719,20 @@ def analyze_pdf(file_obj) -> List:
732
  <span class='risk-score'>{sentiment_score:.2f}</span>
733
  </div>
734
  {sentiment_meter}
735
- <div style='margin-top: 15px;'>
736
- <strong>Download Sentiment Report:</strong>
737
- {'<a href="' + download_url + '" target="_blank" style="color: var(--primary-color); text-decoration: underline;">Download PDF</a>' if download_url else 'Failed to generate PDF download link'}
738
- </div>
739
  </div>
740
  """
741
 
742
- return [
743
- f"""
744
- <div class='result-box'>
745
- <div class='section-title'>{risk_icon} Contract Risk Summary</div>
746
- <div class='risk-row'>
747
- <span class='risk-label'>Overall Risk Score</span>
748
- <span class='risk-score risk-{risk_level.lower()}'>{risk_score:.1f}/100</span>
749
- </div>
750
- {risk_meter}
751
- <div style='margin-top: 15px; font-size: 16px;'>
752
- <strong>Assessment:</strong> {risk_advice[risk_level]}
753
- </div>
754
- </div>
755
- """,
756
  "", # Empty string for hidden risk visualization
757
  penalty_details,
758
  f"<div class='penalty-box'><div class='section-title'>πŸ’° Penalty Amounts Found</div>{penalty_amounts}</div>",
759
  obligation_details,
760
  delay_details,
761
  f"<div class='result-box'><div class='section-title'>πŸ“œ Extracted Data</div>{extracted_data}</div>",
762
- sentiment_analysis_output
763
- ]
 
764
  except Exception as e:
765
  logger.error(f"Analysis failed: {str(e)}")
766
  error_message = f"""
@@ -775,7 +747,7 @@ def analyze_pdf(file_obj) -> List:
775
  </div>
776
  </div>
777
  """
778
- return [error_message] * 8
779
 
780
  # Create Gradio interface with dark mode compatibility
781
  with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Default(primary_hue="blue")) as demo:
@@ -787,7 +759,7 @@ with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Defa
787
  </p>
788
  </div>
789
  """)
790
-
791
  with gr.Row():
792
  with gr.Column(scale=1):
793
  file_input = gr.File(
@@ -801,28 +773,31 @@ with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Defa
801
  </div>
802
  """)
803
  submit_btn = gr.Button("Analyze Contract", variant="primary")
804
-
805
  with gr.Column(scale=3):
806
  risk_summary = gr.HTML(label="Contract Risk Summary")
807
  risk_visualization = gr.HTML(label="Risk Visualization", visible=False, elem_id="risk-visualization")
808
-
809
  with gr.Row():
810
  with gr.Column():
811
  penalty_count = gr.HTML(label="Penalty Clauses Analysis")
812
  penalty_amounts = gr.HTML(label="Penalty Amounts Found")
813
-
814
  with gr.Column():
815
  obligation_count = gr.HTML(label="Obligation Clauses Analysis")
816
-
817
  with gr.Column():
818
  delay_count = gr.HTML(label="Delay Clauses Analysis")
819
-
820
  with gr.Row():
821
  extracted_data = gr.HTML(label="Extracted Data")
822
-
823
  with gr.Row():
824
  sentiment_analysis = gr.HTML(label="Sentiment Analysis")
825
-
 
 
 
826
  submit_btn.click(
827
  fn=analyze_pdf,
828
  inputs=[file_input],
@@ -830,7 +805,8 @@ with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Defa
830
  risk_summary, risk_visualization,
831
  penalty_count, penalty_amounts,
832
  obligation_count, delay_count,
833
- extracted_data, sentiment_analysis
 
834
  ]
835
  )
836
 
 
389
  doc = SimpleDocTemplate(buffer, pagesize=letter)
390
  styles = getSampleStyleSheet()
391
  story = []
392
+
393
  story.append(Paragraph("Sentiment Analysis Report", styles['Title']))
394
  story.append(Spacer(1, 12))
395
  sentiment_text = f"Sentiment Score: {sentiment_score:.2f} (Scale: 0.0 Negative to 1.0 Positive)"
 
397
  story.append(Spacer(1, 12))
398
  interpretation = "Positive" if sentiment_score > 0.6 else "Negative" if sentiment_score < 0.4 else "Neutral"
399
  story.append(Paragraph(f"Interpretation: {interpretation}", styles['Normal']))
400
+
401
  doc.build(story)
402
  buffer.seek(0)
403
  return buffer
 
405
  logger.error(f"PDF generation failed: {str(e)}")
406
  raise Exception(f"PDF generation failed: {str(e)}")
407
 
408
+ def save_to_salesforce(sf: Salesforce, data: Dict) -> str:
409
+ """Save analysis results to Salesforce Custom_Risk_Analysis__c object"""
410
  try:
 
411
  record = {
412
  'Sentiment_Score__c': data['sentiment_score'],
413
  'Risk_Score__c': data['risk_score'],
 
421
  }
422
  result = sf.Custom_Risk_Analysis__c.create(record)
423
  logger.info(f"Successfully created Salesforce record: {result['id']}")
424
+ return result['id']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  except Exception as e:
426
  logger.error(f"Failed to save to Salesforce: {str(e)}")
427
+ raise Exception(f"Salesforce record creation failed: {str(e)}")
428
 
429
  def extract_text_from_pdf(pdf_path: str) -> str:
430
  """Extract text from PDF using pdfplumber"""
 
455
  r'\d+\s*(?:percent|%)',
456
  r'(?:\b[a-z]+\s*)+dollars',
457
  ]
458
+
459
  penalties = []
460
  for pattern in patterns:
461
  matches = re.finditer(pattern, text, flags=re.IGNORECASE)
 
475
  """Calculate risk score based on various factors"""
476
  score = 0
477
  score += min(penalty_count * 5, 30)
478
+
479
  if penalty_values:
480
  avg_penalty = sum(penalty_values) / len(penalty_values)
481
  if avg_penalty > 1000000:
 
486
  score += 15
487
  else:
488
  score += 5
489
+
490
  score += min(obligation_count * 2, 20)
491
  score += min(delay_count * 10, 30)
492
  score = min(score, 100)
493
+
494
  if score < 30:
495
  return score, "Low"
496
  elif score < 70:
 
530
  """Generate a simple heatmap based on risk level"""
531
  try:
532
  fig, ax = plt.subplots(figsize=(8, 2))
533
+
534
  if risk_level == "Low":
535
  cmap = plt.cm.Blues
536
  color = '#4CAF50'
 
540
  else:
541
  cmap = plt.cm.Reds
542
  color = '#F44336'
543
+
544
  gradient = np.linspace(0, 1, 256).reshape(1, -1)
545
  gradient = np.vstack((gradient, gradient))
546
+
547
  ax.imshow(gradient, aspect='auto', cmap=cmap)
548
  ax.text(128, 0.5, f"{risk_level} Risk", color='white' if risk_level in ["High", "Medium"] else 'black',
549
  ha='center', va='center', fontsize=24, fontweight='bold')
550
+
551
  ax.set_axis_off()
552
  plt.tight_layout()
553
  return fig
 
575
  </div>
576
  """
577
 
578
+ def analyze_pdf(file_obj) -> tuple:
579
  """Main analysis function for Gradio interface"""
580
  try:
581
  if not file_obj:
 
602
  penalty_keywords = ["penalty", "fine", "forfeit", "liquidated damages", "breach"]
603
  obligation_keywords = ["shall", "must", "required to", "obligated to", "duty"]
604
  delay_keywords = ["delay", "late", "overdue", "extension", "time is of the essence"]
605
+
606
  penalty_counts = count_keywords(text, penalty_keywords)
607
  obligation_counts = count_keywords(text, obligation_keywords)
608
  delay_counts = count_keywords(text, delay_keywords)
609
+
610
  penalty_values = find_penalty_values(text)
611
+
612
  total_penalties = sum(penalty_counts.values())
613
  total_obligations = sum(obligation_counts.values())
614
  total_delays = sum(delay_counts.values())
615
+
616
  # Generate warning messages with emojis
617
  penalty_warning = format_warning_message(total_penalties, "penalty", "πŸ’°")
618
  obligation_warning = format_warning_message(total_obligations, "obligation", "πŸ“")
 
658
  """
659
 
660
  penalty_amounts = "\n".join([f"<div class='count-item'><span class='count-label'>πŸ’° Amount</span><span class='count-value'>${amt:,.2f}</span></div>" for amt in penalty_values[:5]]) if penalty_values else "<div class='success-box'>βœ… No specific penalty amounts found - This is good news!</div>"
661
+
662
  penalty_sentences = []
663
  for sentence in re.split(r'(?<=[.!?])\s+', text):
664
  if any(kw.lower() in sentence.lower() for kw in penalty_keywords):
665
  penalty_sentences.append(sentence.strip())
666
+
667
  extracted_data = "\n".join([format_clause_example(sent, i+1) for i, sent in enumerate(penalty_sentences[:3])]) if penalty_sentences else "<div class='success-box'>βœ… No penalty clauses found - Excellent contract terms!</div>"
668
+
669
  record_id = str(uuid.uuid4())
670
  sf_data = {
671
  'sentiment_score': sentiment_score,
 
678
  'obligation_details': "\n".join([f"{kw}: {count}" for kw, count in obligation_counts.items()]),
679
  'delay_details': "\n".join([f"{kw}: {count}" for kw, count in delay_counts.items()])
680
  }
681
+
682
  try:
683
+ salesforce_record_id = save_to_salesforce(sf, sf_data)
684
+ logger.info(f"Saved to Salesforce with ID: {salesforce_record_id}")
 
 
685
  except Exception as e:
686
  logger.error(f"Salesforce record creation failed: {str(e)}")
 
687
 
688
  box_class = "success-box" if risk_level == "Low" else "warning-box" if risk_level == "Medium" else "danger-box"
689
  risk_icon = "βœ…" if risk_level == "Low" else "⚠" if risk_level == "Medium" else "🚨"
 
692
  "Medium": "This contract has moderate risk. Careful review advised.",
693
  "High": "This contract is high risk! Immediate legal review required."
694
  }
695
+
696
+ # Generate PDF for download
697
+ pdf_buffer = generate_sentiment_pdf(sentiment_score)
698
+ pdf_file = (f"Sentiment_Analysis_{record_id}.pdf", pdf_buffer.read(), "application/pdf")
699
+
700
+ risk_summary_output = f"""
701
+ <div class='result-box'>
702
+ <div class='section-title'>{risk_icon} Contract Risk Summary</div>
703
+ <div class='risk-row'>
704
+ <span class='risk-label'>Overall Risk Score</span>
705
+ <span class='risk-score risk-{risk_level.lower()}'>{risk_score:.1f}/100</span>
706
+ </div>
707
+ {risk_meter}
708
+ <div style='margin-top: 15px; font-size: 16px;'>
709
+ <strong>Assessment:</strong> {risk_advice[risk_level]}
710
+ </div>
711
+ </div>
712
+ """
713
+
714
  sentiment_analysis_output = f"""
715
  <div class='result-box'>
716
  <div class='section-title'>πŸ“Š Sentiment Analysis</div>
 
719
  <span class='risk-score'>{sentiment_score:.2f}</span>
720
  </div>
721
  {sentiment_meter}
 
 
 
 
722
  </div>
723
  """
724
 
725
+ return (
726
+ risk_summary_output,
 
 
 
 
 
 
 
 
 
 
 
 
727
  "", # Empty string for hidden risk visualization
728
  penalty_details,
729
  f"<div class='penalty-box'><div class='section-title'>πŸ’° Penalty Amounts Found</div>{penalty_amounts}</div>",
730
  obligation_details,
731
  delay_details,
732
  f"<div class='result-box'><div class='section-title'>πŸ“œ Extracted Data</div>{extracted_data}</div>",
733
+ sentiment_analysis_output,
734
+ pdf_file
735
+ )
736
  except Exception as e:
737
  logger.error(f"Analysis failed: {str(e)}")
738
  error_message = f"""
 
747
  </div>
748
  </div>
749
  """
750
+ return [error_message] * 8 + (None,)
751
 
752
  # Create Gradio interface with dark mode compatibility
753
  with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Default(primary_hue="blue")) as demo:
 
759
  </p>
760
  </div>
761
  """)
762
+
763
  with gr.Row():
764
  with gr.Column(scale=1):
765
  file_input = gr.File(
 
773
  </div>
774
  """)
775
  submit_btn = gr.Button("Analyze Contract", variant="primary")
776
+
777
  with gr.Column(scale=3):
778
  risk_summary = gr.HTML(label="Contract Risk Summary")
779
  risk_visualization = gr.HTML(label="Risk Visualization", visible=False, elem_id="risk-visualization")
780
+
781
  with gr.Row():
782
  with gr.Column():
783
  penalty_count = gr.HTML(label="Penalty Clauses Analysis")
784
  penalty_amounts = gr.HTML(label="Penalty Amounts Found")
785
+
786
  with gr.Column():
787
  obligation_count = gr.HTML(label="Obligation Clauses Analysis")
788
+
789
  with gr.Column():
790
  delay_count = gr.HTML(label="Delay Clauses Analysis")
791
+
792
  with gr.Row():
793
  extracted_data = gr.HTML(label="Extracted Data")
794
+
795
  with gr.Row():
796
  sentiment_analysis = gr.HTML(label="Sentiment Analysis")
797
+
798
+ with gr.Row():
799
+ pdf_download = gr.File(label="Download Sentiment Report", visible=True)
800
+
801
  submit_btn.click(
802
  fn=analyze_pdf,
803
  inputs=[file_input],
 
805
  risk_summary, risk_visualization,
806
  penalty_count, penalty_amounts,
807
  obligation_count, delay_count,
808
+ extracted_data, sentiment_analysis,
809
+ pdf_download
810
  ]
811
  )
812