Kushalmanda commited on
Commit
b8747f3
·
verified ·
1 Parent(s): cd8aaa1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +646 -267
app.py CHANGED
@@ -347,8 +347,279 @@ def get_hugging_face_sentiment(text: str) -> float:
347
  logger.error(f"Hugging Face sentiment analysis failed: {str(e)}. Using fallback score.")
348
  return 0.5
349
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  def generate_analysis_pdf(analysis_data: Dict) -> BytesIO:
351
- """Generate a comprehensive PDF report with analysis results"""
352
  try:
353
  pdf_file = BytesIO()
354
  c = canvas.Canvas(pdf_file, pagesize=letter)
@@ -359,8 +630,6 @@ def generate_analysis_pdf(analysis_data: Dict) -> BytesIO:
359
  c.setFont("Helvetica", 10)
360
  c.drawString(1 * inch, 10.2 * inch, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
361
  c.drawString(1 * inch, 10 * inch, f"Document: {analysis_data.get('document_name', 'Unknown')}")
362
-
363
- # Add a line separator
364
  c.line(1 * inch, 9.8 * inch, 7.5 * inch, 9.8 * inch)
365
 
366
  # Risk Summary Section
@@ -369,7 +638,6 @@ def generate_analysis_pdf(analysis_data: Dict) -> BytesIO:
369
  c.drawString(1 * inch, y_position, "1. Risk Summary")
370
  y_position -= 0.3 * inch
371
 
372
- c.setFont("Helvetica", 10)
373
  risk_level = analysis_data['risk_level']
374
  risk_color = {
375
  "Low": "#4CAF50",
@@ -403,59 +671,156 @@ def generate_analysis_pdf(analysis_data: Dict) -> BytesIO:
403
  # Detailed Metrics Section
404
  y_position -= 0.3 * inch
405
  c.setFont("Helvetica-Bold", 14)
406
- c.drawString(1 * inch, y_position, "2. Detailed Metrics")
407
  y_position -= 0.3 * inch
408
 
409
- # Sentiment Analysis
410
  c.setFont("Helvetica-Bold", 12)
411
- c.drawString(1 * inch, y_position, "Sentiment Analysis:")
412
  y_position -= 0.2 * inch
413
  c.setFont("Helvetica", 10)
414
- sentiment_score = analysis_data['sentiment_score']
415
- sentiment_text = (
416
- "Positive (favorable language)" if sentiment_score > 0.6 else
417
- "Negative (adversarial language)" if sentiment_score < 0.4 else
418
- "Neutral (balanced language)"
419
- )
420
- c.drawString(1.2 * inch, y_position, f"Score: {sentiment_score:.2f} - {sentiment_text}")
421
  y_position -= 0.2 * inch
422
- c.drawString(1.2 * inch, y_position, "Interpretation: Measures the overall tone of the contract language.")
423
- y_position -= 0.25 * inch
424
 
425
- # Penalty Analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
  c.setFont("Helvetica-Bold", 12)
427
- c.drawString(1 * inch, y_position, "Penalty Analysis:")
428
  y_position -= 0.2 * inch
429
  c.setFont("Helvetica", 10)
430
- c.drawString(1.2 * inch, y_position, f"Total penalty clauses found: {analysis_data['penalty_count']}")
431
  y_position -= 0.2 * inch
432
- if analysis_data['penalty_values']:
433
- c.drawString(1.2 * inch, y_position, f"Highest penalty amount: ${max(analysis_data['penalty_values']):,.2f}")
 
 
 
 
 
 
 
434
  y_position -= 0.2 * inch
435
- c.drawString(1.2 * inch, y_position, f"Average penalty amount: ${sum(analysis_data['penalty_values'])/len(analysis_data['penalty_values']):,.2f}")
 
 
 
 
 
436
  y_position -= 0.2 * inch
437
- c.drawString(1.2 * inch, y_position, "Interpretation: Penalties are financial consequences for non-compliance.")
438
- y_position -= 0.25 * inch
 
 
 
 
 
 
 
 
 
 
 
 
 
439
 
440
- # Obligation Analysis
 
 
 
 
441
  c.setFont("Helvetica-Bold", 12)
442
- c.drawString(1 * inch, y_position, "Obligation Analysis:")
443
  y_position -= 0.2 * inch
444
  c.setFont("Helvetica", 10)
445
- c.drawString(1.2 * inch, y_position, f"Total obligation clauses found: {analysis_data['obligation_count']}")
446
  y_position -= 0.2 * inch
447
- c.drawString(1.2 * inch, y_position, "Interpretation: Obligations are requirements that must be fulfilled.")
448
- y_position -= 0.25 * inch
449
 
450
- # Delay Analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
451
  c.setFont("Helvetica-Bold", 12)
452
- c.drawString(1 * inch, y_position, "Delay Analysis:")
453
  y_position -= 0.2 * inch
454
  c.setFont("Helvetica", 10)
455
- c.drawString(1.2 * inch, y_position, f"Total delay clauses found: {analysis_data['delay_count']}")
 
 
 
 
 
 
 
 
456
  y_position -= 0.2 * inch
457
- c.drawString(1.2 * inch, y_position, "Interpretation: Delay clauses specify timelines and consequences for delays.")
458
- y_position -= 0.3 * inch
459
 
460
  # Key Findings Section
461
  if y_position < 2 * inch:
@@ -471,10 +836,12 @@ def generate_analysis_pdf(analysis_data: Dict) -> BytesIO:
471
  findings = []
472
  if analysis_data['risk_level'] == "High":
473
  findings.append("⚠️ High-risk contract requiring immediate legal review")
474
- if analysis_data['penalty_count'] > 5:
475
- findings.append(f"⚠️ High number of penalty clauses ({analysis_data['penalty_count']})")
476
- if analysis_data['obligation_count'] > 10:
477
- findings.append(f"📝 Numerous obligations ({analysis_data['obligation_count']}) that may require tracking")
 
 
478
  if analysis_data['sentiment_score'] < 0.4:
479
  findings.append("🔍 Contract language appears adversarial (low sentiment score)")
480
 
@@ -496,9 +863,11 @@ def generate_analysis_pdf(analysis_data: Dict) -> BytesIO:
496
  if analysis_data['risk_level'] == "High":
497
  recommendations.append("• Engage legal counsel for comprehensive review")
498
  recommendations.append("• Negotiate penalty clauses and liability terms")
499
- if analysis_data['penalty_count'] > 0:
500
  recommendations.append("• Review all penalty clauses for fairness and applicability")
501
- if analysis_data['obligation_count'] > 10:
 
 
502
  recommendations.append("• Create an obligation tracking system")
503
  if analysis_data['sentiment_score'] < 0.4:
504
  recommendations.append("• Consider negotiating more balanced language")
@@ -543,152 +912,210 @@ def save_to_salesforce(sf: Salesforce, data: Dict) -> str:
543
  logger.error(f"Failed to save to Salesforce: {str(e)}")
544
  raise Exception(f"Salesforce record creation failed: {str(e)}")
545
 
546
- def extract_text_from_pdf(pdf_path: str) -> str:
547
- """Extract text from PDF using pdfplumber"""
548
- try:
549
- text = ""
550
- with pdfplumber.open(pdf_path) as pdf:
551
- for page in pdf.pages:
552
- page_text = page.extract_text()
553
- if page_text:
554
- text += page_text
555
- return text
556
- except Exception as e:
557
- logger.error(f"PDF text extraction failed: {str(e)}")
558
- raise Exception(f"PDF text extraction failed: {str(e)}")
559
-
560
- def count_keywords(text: str, keywords: List[str]) -> Dict[str, int]:
561
- """Count occurrences of keywords in text"""
562
- counts = {}
563
- for keyword in keywords:
564
- counts[keyword] = len(re.findall(r'\b' + re.escape(keyword) + r'\b', text, flags=re.IGNORECASE))
565
- return counts
566
-
567
- def find_penalty_values(text: str) -> List[float]:
568
- """Find penalty amounts in the text"""
569
- patterns = [
570
- r'\$\s*[\d,]+(?:\.\d+)?',
571
- r'(?:USD|usd)\s*[\d,]+(?:\.\d+)?',
572
- r'\d+\s*(?:percent|%)',
573
- r'(?:\b[a-z]+\s*)+dollars',
574
- ]
575
-
576
- penalties = []
577
- for pattern in patterns:
578
- matches = re.finditer(pattern, text, flags=re.IGNORECASE)
579
- for match in matches:
580
- penalty_text = match.group()
581
- try:
582
- if any(word in penalty_text.lower() for word in ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'hundred', 'thousand', 'million']):
583
- penalty_value = w2n.word_to_num(penalty_text.split('dollars')[0].strip())
584
- else:
585
- penalty_value = float(re.sub(r'[^\d.]', '', penalty_text))
586
- penalties.append(penalty_value)
587
- except:
588
- continue
589
- return penalties
590
-
591
- def calculate_risk_score(penalty_count: int, penalty_values: List[float], obligation_count: int, delay_count: int) -> Tuple[float, str]:
592
- """Calculate risk score based on various factors"""
593
- score = 0
594
- score += min(penalty_count * 5, 30)
595
-
596
- if penalty_values:
597
- avg_penalty = sum(penalty_values) / len(penalty_values)
598
- if avg_penalty > 1000000:
599
- score += 40
600
- elif avg_penalty > 100000:
601
- score += 25
602
- elif avg_penalty > 10000:
603
- score += 15
604
- else:
605
- score += 5
606
 
607
- score += min(obligation_count * 2, 20)
608
- score += min(delay_count * 10, 30)
609
- score = min(score, 100)
610
 
611
- if score < 30:
612
- return score, "Low"
613
- elif score < 70:
614
- return score, "Medium"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
615
  else:
616
- return score, "High"
617
-
618
- def generate_risk_meter(risk_score: float) -> str:
619
- """Generate a visual risk meter with indicator"""
620
- position = risk_score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
621
  return f"""
622
- <div class="risk-meter">
623
- <div class="risk-meter-indicator" style="left: {position}%"></div>
624
- </div>
625
- <div class="risk-meter-labels">
626
- <span>Low (0-30)</span>
627
- <span>Medium (31-69)</span>
628
- <span>High (70-100)</span>
 
 
 
 
 
 
 
 
 
 
 
629
  </div>
630
  """
631
 
632
- def generate_sentiment_meter(sentiment_score: float) -> str:
633
- """Generate a visual sentiment meter"""
634
- width = sentiment_score * 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
635
  return f"""
636
- <div class="sentiment-meter">
637
- <div class="sentiment-score" style="width: {width}%"></div>
638
- </div>
639
- <div style="display: flex; justify-content: space-between; margin-top: 5px;">
640
- <span>Negative</span>
641
- <span>Neutral</span>
642
- <span>Positive</span>
 
 
 
 
 
 
 
 
 
 
 
643
  </div>
644
  """
645
 
646
- def generate_heatmap(risk_level: str):
647
- """Generate a simple heatmap based on risk level"""
648
- try:
649
- fig, ax = plt.subplots(figsize=(8, 2))
650
-
651
- if risk_level == "Low":
652
- cmap = plt.cm.Blues
653
- color = '#4CAF50'
654
- elif risk_level == "Medium":
655
- cmap = plt.cm.Oranges
656
- color = '#FF9800'
657
- else:
658
- cmap = plt.cm.Reds
659
- color = '#F44336'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
660
 
661
- gradient = np.linspace(0, 1, 256).reshape(1, -1)
662
- gradient = np.vstack((gradient, gradient))
 
 
663
 
664
- ax.imshow(gradient, aspect='auto', cmap=cmap)
665
- ax.text(128, 0.5, f"{risk_level} Risk", color='white' if risk_level in ["High", "Medium"] else 'black',
666
- ha='center', va='center', fontsize=24, fontweight="bold")
 
667
 
668
- ax.set_axis_off()
669
- plt.tight_layout()
670
- return fig
671
- except Exception as e:
672
- logger.error(f"Heatmap generation failed: {str(e)}")
673
- raise Exception(f"Heatmap generation failed: {str(e)}")
674
-
675
- def format_warning_message(count: int, item_type: str, emoji: str) -> str:
676
- """Format warning message based on count with appropriate color coding"""
677
- if count == 0:
678
- return f"""<div class="success-box">✅ {emoji} No {item_type} clauses detected!</div>"""
679
- elif count < 3:
680
- return f"""<div class="info-box">🛈 {emoji} {count} {item_type} clauses detected</div>"""
681
- elif count < 5:
682
- return f"""<div class="warning-box">⚠ {emoji} {count} {item_type} clauses detected!</div>"""
683
- else:
684
- return f"""<div class="danger-box">🚨 {emoji} {count} {item_type} clauses detected!</div>"""
685
-
686
- def format_clause_example(example: str, index: int) -> str:
687
- """Format a clause example with proper wrapping and styling"""
688
- wrapped_text = textwrap.fill(example, width=80)
689
- return f"""
690
- <div class="clause-example">
691
- <span class="clause-number">{index}.</span> {wrapped_text}
692
  </div>
693
  """
694
 
@@ -716,29 +1143,13 @@ def analyze_pdf(file_obj) -> List:
716
  logger.warning(f"Sentiment analysis failed: {str(e)}. Using fallback score of 0.5.")
717
  sentiment_score = 0.5
718
 
719
- penalty_keywords = ["penalty", "fine", "forfeit", "liquidated damages", "breach"]
720
- obligation_keywords = ["shall", "must", "required to", "obligated to", "duty"]
721
- delay_keywords = ["delay", "late", "overdue", "extension", "time is of the essence"]
722
-
723
- penalty_counts = count_keywords(text, penalty_keywords)
724
- obligation_counts = count_keywords(text, obligation_keywords)
725
- delay_counts = count_keywords(text, delay_keywords)
726
 
727
- penalty_values = find_penalty_values(text)
728
-
729
- total_penalties = sum(penalty_counts.values())
730
- total_obligations = sum(obligation_counts.values())
731
- total_delays = sum(delay_counts.values())
732
-
733
- # Generate warning messages with emojis
734
- penalty_warning = format_warning_message(total_penalties, "penalty", "💰")
735
- obligation_warning = format_warning_message(total_obligations, "obligation", "📝")
736
- delay_warning = format_warning_message(total_delays, "delay", "⏱")
737
-
738
  try:
739
- risk_score, risk_level = calculate_risk_score(
740
- total_penalties, penalty_values, total_obligations, total_delays
741
- )
742
  except Exception as e:
743
  raise Exception(f"Risk score calculation failed: {str(e)}")
744
 
@@ -749,51 +1160,34 @@ def analyze_pdf(file_obj) -> List:
749
  except Exception as e:
750
  raise Exception(f"Visual generation failed: {str(e)}")
751
 
752
- # Format details with warning messages and emojis
753
- penalty_details = f"""
754
- {penalty_warning}
755
- <div class='penalty-box'>
756
- <div class='section-title'>💰 Penalty Clause Details</div>
757
- {"".join([f"<div class='count-item'><span class='count-label'><span style='color: var(--danger-color)'>•</span> {kw}</span><span class='count-value'>{count}</span></div>" for kw, count in penalty_counts.items()])}
758
- </div>
759
- """
760
-
761
- obligation_details = f"""
762
- {obligation_warning}
763
- <div class='obligation-box'>
764
- <div class='section-title'>📝 Obligation Clause Details</div>
765
- {"".join([f"<div class='count-item'><span class='count-label'><span style='color: var(--warning-color)'>•</span> {kw}</span><span class='count-value'>{count}</span></div>" for kw, count in obligation_counts.items()])}
766
- </div>
767
- """
768
-
769
- delay_details = f"""
770
- {delay_warning}
771
- <div class='delay-box'>
772
- <div class='section-title'>⏱ Delay Clause Details</div>
773
- {"".join([f"<div class='count-item'><span class='count-label'><span style='color: var(--info-color)'>•</span> {kw}</span><span class='count-value'>{count}</span></div>" for kw, count in delay_counts.items()])}
774
- </div>
775
- """
776
-
777
- penalty_amounts = "\n".join([f"<div class='count-item'><span class='count-label'>💰 Amount</span><span class='count-value'>${amt:,.2f}</span></div>" for amt in penalty_values[:5]]) if penalty_values else "<div class='success-box'>✅ No penalties found!</div>"
778
-
779
- penalty_sentences = []
780
- for sentence in re.split(r'(?<=[.!?])\s+', text):
781
- if any(kw.lower() in sentence.lower() for kw in penalty_keywords):
782
- penalty_sentences.append(sentence.strip())
783
-
784
- extracted_data = "\n".join([format_clause_example(sent, i+1) for i, sent in enumerate(penalty_sentences[:3])]) if penalty_sentences else "<div class='success-box'>✅ No penalty clauses found!</div>"
785
 
 
 
 
 
 
 
 
 
 
 
 
 
786
  record_id = str(uuid.uuid4())
787
  sf_data = {
788
  'sentiment_score': sentiment_score,
789
  'risk_score': risk_score,
790
  'risk_level': risk_level,
791
  'record_id': record_id,
792
- 'penalty_examples': extracted_data,
793
- 'penalty_details': "\n".join([f"{kw}: {count}" for kw, count in penalty_counts.items()]),
794
- 'penalty_amounts': "\n".join([f"${amt:,.2f}" for amt in penalty_values[:5]]) if penalty_values else "",
795
- 'obligation_details': "\n".join([f"{kw}: {count}" for kw, count in obligation_counts.items()]),
796
- 'delay_details': "\n".join([f"{kw}: {count}" for kw, count in delay_counts.items()])
797
  }
798
 
799
  try:
@@ -803,19 +1197,6 @@ def analyze_pdf(file_obj) -> List:
803
  logger.error(f"Salesforce record creation failed: {str(e)}")
804
  salesforce_id = "N/A"
805
 
806
- # Prepare data for PDF report
807
- analysis_data = {
808
- 'document_name': os.path.basename(file_obj.name),
809
- 'sentiment_score': sentiment_score,
810
- 'risk_score': risk_score,
811
- 'risk_level': risk_level,
812
- 'penalty_count': total_penalties,
813
- 'penalty_values': penalty_values,
814
- 'obligation_count': total_obligations,
815
- 'delay_count': total_delays,
816
- 'record_id': record_id
817
- }
818
-
819
  try:
820
  pdf_buffer = generate_analysis_pdf(analysis_data)
821
  if pdf_buffer is None:
@@ -873,11 +1254,9 @@ def analyze_pdf(file_obj) -> List:
873
  </div>
874
  """,
875
  "", # Empty string for hidden risk visualization
876
- penalty_details,
877
- f"<div class='penalty-box'><div class='section-title'>💰 Penalty Amounts Found</div>{penalty_amounts}</div>",
878
- obligation_details,
879
- delay_details,
880
- f"<div class='result-box'><div class='section-title'>📜 Extracted Data</div>{extracted_data}</div>",
881
  sentiment_analysis_output,
882
  temp_file_path # Return temporary file path for PDF download
883
  ]
@@ -895,15 +1274,15 @@ def analyze_pdf(file_obj) -> List:
895
  </div>
896
  </div>
897
  """
898
- return [error_message] * 9
899
 
900
  # Create Gradio interface with dark mode compatibility
901
  with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Default(primary_hue="blue")) as demo:
902
  gr.Markdown("""
903
  <div style='text-align: center; margin-bottom: 30px;'>
904
- <h1 style='color: var(--primary-color); margin-bottom: 10px;'>PDF Contract Analysis</h1>
905
  <p style='color: var(--secondary-color); font-size: 16px;'>
906
- Upload a contract PDF to analyze risks, obligations, and sentiment.
907
  </p>
908
  </div>
909
  """)
@@ -920,38 +1299,38 @@ with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Defa
920
  Drag and drop your contract PDF file here.
921
  </div>
922
  """)
923
- submit_btn = gr.Button("Analyze", variant="primary")
924
 
925
  with gr.Column(scale=3):
926
  risk_summary = gr.HTML(label="Contract Risk Summary")
927
  risk_visualization = gr.HTML(label="Risk Visualization", visible=False, elem_id="risk-visualization")
928
 
929
  with gr.Row():
930
- with gr.Column():
931
- penalty_count = gr.HTML(label="Penalty Clauses Analysis")
932
- penalty_amounts = gr.HTML(label="Penalty Amounts Found")
933
-
934
- with gr.Column():
935
- obligation_count = gr.HTML(label="Obligation Clauses Analysis")
936
-
937
- with gr.Column():
938
- delay_count = gr.HTML(label="Delay Clauses Analysis")
939
 
940
  with gr.Row():
941
- extracted_data = gr.HTML(label="Extracted Data")
942
 
943
  with gr.Row():
944
  sentiment_analysis = gr.HTML(label="Sentiment Analysis")
 
 
945
  pdf_output = gr.File(label="Download Full Analysis Report (PDF)", file_types=[".pdf"])
946
 
947
  submit_btn.click(
948
  fn=analyze_pdf,
949
  inputs=[file_input],
950
  outputs=[
951
- risk_summary, risk_visualization,
952
- penalty_count, penalty_amounts,
953
- obligation_count, delay_count,
954
- extracted_data, sentiment_analysis, pdf_output
 
 
 
955
  ]
956
  )
957
 
 
347
  logger.error(f"Hugging Face sentiment analysis failed: {str(e)}. Using fallback score.")
348
  return 0.5
349
 
350
+ def extract_text_from_pdf(pdf_path: str) -> str:
351
+ """Extract text from PDF using pdfplumber"""
352
+ try:
353
+ text = ""
354
+ with pdfplumber.open(pdf_path) as pdf:
355
+ for page in pdf.pages:
356
+ page_text = page.extract_text()
357
+ if page_text:
358
+ text += page_text + "\n"
359
+ return text
360
+ except Exception as e:
361
+ logger.error(f"PDF text extraction failed: {str(e)}")
362
+ raise Exception(f"PDF text extraction failed: {str(e)}")
363
+
364
+ def extract_penalty_clauses(text: str) -> List[Dict]:
365
+ """Extract detailed penalty information including exact amounts and percentages"""
366
+ penalty_patterns = [
367
+ (r'(penalty|fine|forfeit|liquidated damages|breach damages?)\s*(?:of|:)?\s*(\$?\s*[\d,]+(?:\.\d+)?|\d+\s*%)',
368
+ "Standard monetary penalty"),
369
+ (r'(penalty|fine)\s*(?:shall be|of|is)\s*(\d+\s*%\s*of)', "Percentage penalty"),
370
+ (r'(?:not to exceed|up to|maximum of)\s*(\$?\s*[\d,]+(?:\.\d+)?)', "Maximum penalty"),
371
+ (r'(?:sum of|amount of)\s*(\$?\s*[\d,]+(?:\.\d+)?)\s*(?:per\s*(?:day|month|year|violation))', "Recurring penalty"),
372
+ (r'(?:penalty of|fine of)\s*([a-zA-Z\s]+)\s*dollars', "Word-form penalty")
373
+ ]
374
+
375
+ penalties = []
376
+ for pattern, penalty_type in penalty_patterns:
377
+ matches = re.finditer(pattern, text, flags=re.IGNORECASE)
378
+ for match in matches:
379
+ full_match = match.group(0)
380
+ amount_match = match.group(1) if len(match.groups()) > 1 else match.group(0)
381
+
382
+ # Get context (2 sentences around the match)
383
+ sentences = re.split(r'(?<=[.!?])\s+', text)
384
+ for i, sentence in enumerate(sentences):
385
+ if match.group(0) in sentence:
386
+ context = " ".join(sentences[max(0,i-1):min(len(sentences),i+2)])
387
+ break
388
+
389
+ penalty = {
390
+ 'type': penalty_type,
391
+ 'clause': full_match.strip(),
392
+ 'amount': amount_match.strip(),
393
+ 'context': context,
394
+ 'numeric_value': None,
395
+ 'is_percentage': False
396
+ }
397
+
398
+ # Convert amount to numeric value
399
+ try:
400
+ if '%' in amount_match:
401
+ penalty['numeric_value'] = float(re.sub(r'[^\d.]', '', amount_match))
402
+ penalty['is_percentage'] = True
403
+ elif any(word in amount_match.lower() for word in ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'hundred', 'thousand', 'million']):
404
+ penalty['numeric_value'] = w2n.word_to_num(amount_match.split('dollars')[0].strip())
405
+ else:
406
+ penalty['numeric_value'] = float(re.sub(r'[^\d.]', '', amount_match))
407
+
408
+ penalties.append(penalty)
409
+ except Exception as e:
410
+ logger.warning(f"Couldn't convert penalty amount: {amount_match} - {str(e)}")
411
+ continue
412
+
413
+ return penalties
414
+
415
+ def extract_obligation_clauses(text: str) -> List[Dict]:
416
+ """Extract detailed obligation information with context and subjects"""
417
+ obligation_keywords = ["shall", "must", "required to", "obligated to", "duty", "responsibility", "covenant"]
418
+ obligations = []
419
+
420
+ for keyword in obligation_keywords:
421
+ matches = re.finditer(r'([^.]*?\b' + re.escape(keyword) + r'\b[^.]*\.)', text, flags=re.IGNORECASE)
422
+ for match in matches:
423
+ clause = match.group(1).strip()
424
+
425
+ # Find the subject of the obligation (who is obligated)
426
+ subject = "Party"
427
+ subject_match = re.search(r'(\b[A-Z][a-zA-Z\s,]+\b)\s+(?:shall|must|is required to)', clause)
428
+ if subject_match:
429
+ subject = subject_match.group(1)
430
+
431
+ obligations.append({
432
+ 'keyword': keyword,
433
+ 'clause': clause,
434
+ 'subject': subject,
435
+ 'timeframe': extract_timeframe(clause)
436
+ })
437
+
438
+ return obligations
439
+
440
+ def extract_delay_clauses(text: str) -> List[Dict]:
441
+ """Extract detailed delay information with context and consequences"""
442
+ delay_keywords = ["delay", "late", "overdue", "extension", "time is of the essence", "timely performance"]
443
+ delays = []
444
+
445
+ for keyword in delay_keywords:
446
+ matches = re.finditer(r'([^.]*?\b' + re.escape(keyword) + r'\b[^.]*\.)', text, flags=re.IGNORECASE)
447
+ for match in matches:
448
+ clause = match.group(1).strip()
449
+
450
+ # Extract timeframe if mentioned
451
+ timeframe = extract_timeframe(clause)
452
+
453
+ # Extract consequences
454
+ consequences = "Not specified"
455
+ if "penalty" in clause.lower():
456
+ consequences = "Monetary penalty"
457
+ elif "termination" in clause.lower():
458
+ consequences = "Contract termination"
459
+ elif "damages" in clause.lower():
460
+ consequences = "Liability for damages"
461
+
462
+ delays.append({
463
+ 'keyword': keyword,
464
+ 'clause': clause,
465
+ 'timeframe': timeframe,
466
+ 'consequences': consequences
467
+ })
468
+
469
+ return delays
470
+
471
+ def extract_timeframe(text: str) -> str:
472
+ """Extract timeframe from a clause"""
473
+ timeframe_patterns = [
474
+ r'within\s*(\d+\s*(?:days?|months?|years?|hours?))',
475
+ r'no\s*more\s*than\s*(\d+\s*(?:days?|months?|years?))',
476
+ r'(\d+\s*(?:days?|months?|years?)\s*from\s*the\s*date)',
477
+ r'(\d+\s*-\s*(?:day|month|year)\s*period)'
478
+ ]
479
+
480
+ for pattern in timeframe_patterns:
481
+ match = re.search(pattern, text, flags=re.IGNORECASE)
482
+ if match:
483
+ return match.group(1)
484
+
485
+ return "Not specified"
486
+
487
+ def calculate_risk_score(penalties: List[Dict], obligations: List[Dict], delays: List[Dict]) -> Tuple[float, str]:
488
+ """Calculate comprehensive risk score based on detailed analysis"""
489
+ score = 0
490
+
491
+ # Penalty factors (40% of total score)
492
+ penalty_score = 0
493
+ for penalty in penalties:
494
+ if penalty['numeric_value'] is not None:
495
+ if penalty['is_percentage']:
496
+ # Percentage penalties are considered more severe
497
+ penalty_score += min(penalty['numeric_value'] * 2, 50)
498
+ else:
499
+ if penalty['numeric_value'] > 1000000:
500
+ penalty_score += 40
501
+ elif penalty['numeric_value'] > 100000:
502
+ penalty_score += 25
503
+ elif penalty['numeric_value'] > 10000:
504
+ penalty_score += 15
505
+ else:
506
+ penalty_score += 5
507
+ else:
508
+ penalty_score += 5 # Penalty without specified amount
509
+
510
+ score += min(penalty_score * 0.4, 40)
511
+
512
+ # Obligation factors (30% of total score)
513
+ obligation_score = 0
514
+ for obligation in obligations:
515
+ if "shall" in obligation['keyword'].lower():
516
+ obligation_score += 3
517
+ elif "must" in obligation['keyword'].lower():
518
+ obligation_score += 4
519
+ elif "required" in obligation['keyword'].lower():
520
+ obligation_score += 2
521
+ else:
522
+ obligation_score += 1
523
+
524
+ # More points for strict timeframes
525
+ if "days" in obligation['timeframe'].lower():
526
+ obligation_score += 2
527
+ elif "hours" in obligation['timeframe'].lower():
528
+ obligation_score += 3
529
+
530
+ score += min(obligation_score * 0.3, 30)
531
+
532
+ # Delay factors (30% of total score)
533
+ delay_score = 0
534
+ for delay in delays:
535
+ if "termination" in delay['consequences'].lower():
536
+ delay_score += 15
537
+ elif "penalty" in delay['consequences'].lower():
538
+ delay_score += 10
539
+ elif "damages" in delay['consequences'].lower():
540
+ delay_score += 8
541
+ else:
542
+ delay_score += 5
543
+
544
+ score += min(delay_score * 0.3, 30)
545
+
546
+ score = min(score, 100)
547
+
548
+ if score < 30:
549
+ return score, "Low"
550
+ elif score < 70:
551
+ return score, "Medium"
552
+ else:
553
+ return score, "High"
554
+
555
+ def generate_risk_meter(risk_score: float) -> str:
556
+ """Generate a visual risk meter with indicator"""
557
+ position = risk_score
558
+ return f"""
559
+ <div class="risk-meter">
560
+ <div class="risk-meter-indicator" style="left: {position}%"></div>
561
+ </div>
562
+ <div class="risk-meter-labels">
563
+ <span>Low (0-30)</span>
564
+ <span>Medium (31-69)</span>
565
+ <span>High (70-100)</span>
566
+ </div>
567
+ """
568
+
569
+ def generate_sentiment_meter(sentiment_score: float) -> str:
570
+ """Generate a visual sentiment meter"""
571
+ width = sentiment_score * 100
572
+ return f"""
573
+ <div class="sentiment-meter">
574
+ <div class="sentiment-score" style="width: {width}%"></div>
575
+ </div>
576
+ <div style="display: flex; justify-content: space-between; margin-top: 5px;">
577
+ <span>Negative</span>
578
+ <span>Neutral</span>
579
+ <span>Positive</span>
580
+ </div>
581
+ """
582
+
583
+ def generate_heatmap(risk_level: str):
584
+ """Generate a simple heatmap based on risk level"""
585
+ try:
586
+ fig, ax = plt.subplots(figsize=(8, 2))
587
+
588
+ if risk_level == "Low":
589
+ cmap = plt.cm.Blues
590
+ color = '#4CAF50'
591
+ elif risk_level == "Medium":
592
+ cmap = plt.cm.Oranges
593
+ color = '#FF9800'
594
+ else:
595
+ cmap = plt.cm.Reds
596
+ color = '#F44336'
597
+
598
+ gradient = np.linspace(0, 1, 256).reshape(1, -1)
599
+ gradient = np.vstack((gradient, gradient))
600
+
601
+ ax.imshow(gradient, aspect='auto', cmap=cmap)
602
+ ax.text(128, 0.5, f"{risk_level} Risk", color='white' if risk_level in ["High", "Medium"] else 'black',
603
+ ha='center', va='center', fontsize=24, fontweight="bold")
604
+
605
+ ax.set_axis_off()
606
+ plt.tight_layout()
607
+ return fig
608
+ except Exception as e:
609
+ logger.error(f"Heatmap generation failed: {str(e)}")
610
+ raise Exception(f"Heatmap generation failed: {str(e)}")
611
+
612
+ def format_clause_example(example: str, index: int) -> str:
613
+ """Format a clause example with proper wrapping and styling"""
614
+ wrapped_text = textwrap.fill(example, width=80)
615
+ return f"""
616
+ <div class="clause-example">
617
+ <span class="clause-number">{index}.</span> {wrapped_text}
618
+ </div>
619
+ """
620
+
621
  def generate_analysis_pdf(analysis_data: Dict) -> BytesIO:
622
+ """Generate a comprehensive PDF report with detailed analysis results"""
623
  try:
624
  pdf_file = BytesIO()
625
  c = canvas.Canvas(pdf_file, pagesize=letter)
 
630
  c.setFont("Helvetica", 10)
631
  c.drawString(1 * inch, 10.2 * inch, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
632
  c.drawString(1 * inch, 10 * inch, f"Document: {analysis_data.get('document_name', 'Unknown')}")
 
 
633
  c.line(1 * inch, 9.8 * inch, 7.5 * inch, 9.8 * inch)
634
 
635
  # Risk Summary Section
 
638
  c.drawString(1 * inch, y_position, "1. Risk Summary")
639
  y_position -= 0.3 * inch
640
 
 
641
  risk_level = analysis_data['risk_level']
642
  risk_color = {
643
  "Low": "#4CAF50",
 
671
  # Detailed Metrics Section
672
  y_position -= 0.3 * inch
673
  c.setFont("Helvetica-Bold", 14)
674
+ c.drawString(1 * inch, y_position, "2. Detailed Analysis")
675
  y_position -= 0.3 * inch
676
 
677
+ # Penalty Details
678
  c.setFont("Helvetica-Bold", 12)
679
+ c.drawString(1 * inch, y_position, "2.1 Penalty Clauses")
680
  y_position -= 0.2 * inch
681
  c.setFont("Helvetica", 10)
682
+ c.drawString(1.2 * inch, y_position, f"Total penalty clauses found: {len(analysis_data['penalties'])}")
 
 
 
 
 
 
683
  y_position -= 0.2 * inch
 
 
684
 
685
+ if analysis_data['penalties']:
686
+ # Find max, min, and average penalties
687
+ numeric_penalties = [p['numeric_value'] for p in analysis_data['penalties'] if p['numeric_value'] is not None]
688
+ if numeric_penalties:
689
+ max_penalty = max(numeric_penalties)
690
+ min_penalty = min(numeric_penalties)
691
+ avg_penalty = sum(numeric_penalties)/len(numeric_penalties)
692
+
693
+ c.drawString(1.2 * inch, y_position, f"Highest penalty amount: {max_penalty:,.2f}{'%' if analysis_data['penalties'][numeric_penalties.index(max_penalty)]['is_percentage'] else '$'}")
694
+ y_position -= 0.2 * inch
695
+ c.drawString(1.2 * inch, y_position, f"Average penalty amount: {avg_penalty:,.2f}{'%' if any(p['is_percentage'] for p in analysis_data['penalties']) else '$'}")
696
+ y_position -= 0.2 * inch
697
+ c.drawString(1.2 * inch, y_position, f"Lowest penalty amount: {min_penalty:,.2f}{'%' if analysis_data['penalties'][numeric_penalties.index(min_penalty)]['is_percentage'] else '$'}")
698
+ y_position -= 0.2 * inch
699
+
700
+ # Example penalty clauses
701
+ c.drawString(1.2 * inch, y_position, "Example Penalty Clauses:")
702
+ y_position -= 0.2 * inch
703
+
704
+ for i, penalty in enumerate(analysis_data['penalties'][:3]):
705
+ c.drawString(1.4 * inch, y_position, f"{i+1}. {penalty['type']}: {penalty['amount']}")
706
+ y_position -= 0.2 * inch
707
+ for line in textwrap.wrap(penalty['clause'], width=90):
708
+ c.drawString(1.6 * inch, y_position, line)
709
+ y_position -= 0.2 * inch
710
+ y_position -= 0.1 * inch
711
+ else:
712
+ c.drawString(1.2 * inch, y_position, "No penalty clauses detected.")
713
+ y_position -= 0.2 * inch
714
+
715
+ y_position -= 0.3 * inch
716
+
717
+ # Obligation Details
718
+ if y_position < 2 * inch:
719
+ c.showPage()
720
+ y_position = 10.5 * inch
721
+
722
  c.setFont("Helvetica-Bold", 12)
723
+ c.drawString(1 * inch, y_position, "2.2 Obligation Clauses")
724
  y_position -= 0.2 * inch
725
  c.setFont("Helvetica", 10)
726
+ c.drawString(1.2 * inch, y_position, f"Total obligation clauses found: {len(analysis_data['obligations'])}")
727
  y_position -= 0.2 * inch
728
+
729
+ if analysis_data['obligations']:
730
+ # Count by keyword
731
+ keyword_counts = {}
732
+ for obligation in analysis_data['obligations']:
733
+ keyword = obligation['keyword']
734
+ keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1
735
+
736
+ c.drawString(1.2 * inch, y_position, "Obligation Keywords:")
737
  y_position -= 0.2 * inch
738
+ for keyword, count in keyword_counts.items():
739
+ c.drawString(1.4 * inch, y_position, f"- {keyword}: {count}")
740
+ y_position -= 0.2 * inch
741
+
742
+ # Example obligations
743
+ c.drawString(1.2 * inch, y_position, "Example Obligation Clauses:")
744
  y_position -= 0.2 * inch
745
+
746
+ for i, obligation in enumerate(analysis_data['obligations'][:3]):
747
+ c.drawString(1.4 * inch, y_position, f"{i+1}. {obligation['keyword']} (Subject: {obligation['subject']})")
748
+ y_position -= 0.2 * inch
749
+ c.drawString(1.4 * inch, y_position, f"Timeframe: {obligation['timeframe']}")
750
+ y_position -= 0.2 * inch
751
+ for line in textwrap.wrap(obligation['clause'], width=90):
752
+ c.drawString(1.6 * inch, y_position, line)
753
+ y_position -= 0.2 * inch
754
+ y_position -= 0.1 * inch
755
+ else:
756
+ c.drawString(1.2 * inch, y_position, "No obligation clauses detected.")
757
+ y_position -= 0.2 * inch
758
+
759
+ y_position -= 0.3 * inch
760
 
761
+ # Delay Details
762
+ if y_position < 2 * inch:
763
+ c.showPage()
764
+ y_position = 10.5 * inch
765
+
766
  c.setFont("Helvetica-Bold", 12)
767
+ c.drawString(1 * inch, y_position, "2.3 Delay Clauses")
768
  y_position -= 0.2 * inch
769
  c.setFont("Helvetica", 10)
770
+ c.drawString(1.2 * inch, y_position, f"Total delay clauses found: {len(analysis_data['delays'])}")
771
  y_position -= 0.2 * inch
 
 
772
 
773
+ if analysis_data['delays']:
774
+ # Count by consequence
775
+ consequence_counts = {}
776
+ for delay in analysis_data['delays']:
777
+ consequence = delay['consequences']
778
+ consequence_counts[consequence] = consequence_counts.get(consequence, 0) + 1
779
+
780
+ c.drawString(1.2 * inch, y_position, "Delay Consequences:")
781
+ y_position -= 0.2 * inch
782
+ for consequence, count in consequence_counts.items():
783
+ c.drawString(1.4 * inch, y_position, f"- {consequence}: {count}")
784
+ y_position -= 0.2 * inch
785
+
786
+ # Example delays
787
+ c.drawString(1.2 * inch, y_position, "Example Delay Clauses:")
788
+ y_position -= 0.2 * inch
789
+
790
+ for i, delay in enumerate(analysis_data['delays'][:3]):
791
+ c.drawString(1.4 * inch, y_position, f"{i+1}. {delay['keyword']} (Consequence: {delay['consequences']})")
792
+ y_position -= 0.2 * inch
793
+ c.drawString(1.4 * inch, y_position, f"Timeframe: {delay['timeframe']}")
794
+ y_position -= 0.2 * inch
795
+ for line in textwrap.wrap(delay['clause'], width=90):
796
+ c.drawString(1.6 * inch, y_position, line)
797
+ y_position -= 0.2 * inch
798
+ y_position -= 0.1 * inch
799
+ else:
800
+ c.drawString(1.2 * inch, y_position, "No delay clauses detected.")
801
+ y_position -= 0.2 * inch
802
+
803
+ y_position -= 0.3 * inch
804
+
805
+ # Sentiment Analysis
806
+ if y_position < 2 * inch:
807
+ c.showPage()
808
+ y_position = 10.5 * inch
809
+
810
  c.setFont("Helvetica-Bold", 12)
811
+ c.drawString(1 * inch, y_position, "2.4 Sentiment Analysis")
812
  y_position -= 0.2 * inch
813
  c.setFont("Helvetica", 10)
814
+ sentiment_score = analysis_data['sentiment_score']
815
+ sentiment_text = (
816
+ "Positive (favorable language)" if sentiment_score > 0.6 else
817
+ "Negative (adversarial language)" if sentiment_score < 0.4 else
818
+ "Neutral (balanced language)"
819
+ )
820
+ c.drawString(1.2 * inch, y_position, f"Score: {sentiment_score:.2f} - {sentiment_text}")
821
+ y_position -= 0.2 * inch
822
+ c.drawString(1.2 * inch, y_position, "Interpretation: Measures the overall tone of the contract language.")
823
  y_position -= 0.2 * inch
 
 
824
 
825
  # Key Findings Section
826
  if y_position < 2 * inch:
 
836
  findings = []
837
  if analysis_data['risk_level'] == "High":
838
  findings.append("⚠️ High-risk contract requiring immediate legal review")
839
+ if len(analysis_data['penalties']) > 5:
840
+ findings.append(f"⚠️ High number of penalty clauses ({len(analysis_data['penalties'])})")
841
+ if any(p['is_percentage'] for p in analysis_data['penalties']):
842
+ findings.append("⚠️ Percentage-based penalties detected (may have significant impact)")
843
+ if len(analysis_data['obligations']) > 10:
844
+ findings.append(f"📝 Numerous obligations ({len(analysis_data['obligations'])}) that may require tracking")
845
  if analysis_data['sentiment_score'] < 0.4:
846
  findings.append("🔍 Contract language appears adversarial (low sentiment score)")
847
 
 
863
  if analysis_data['risk_level'] == "High":
864
  recommendations.append("• Engage legal counsel for comprehensive review")
865
  recommendations.append("• Negotiate penalty clauses and liability terms")
866
+ if len(analysis_data['penalties']) > 0:
867
  recommendations.append("• Review all penalty clauses for fairness and applicability")
868
+ if any(p['is_percentage'] for p in analysis_data['penalties']):
869
+ recommendations.append("• Pay special attention to percentage-based penalties which may have significant impact")
870
+ if len(analysis_data['obligations']) > 10:
871
  recommendations.append("• Create an obligation tracking system")
872
  if analysis_data['sentiment_score'] < 0.4:
873
  recommendations.append("• Consider negotiating more balanced language")
 
912
  logger.error(f"Failed to save to Salesforce: {str(e)}")
913
  raise Exception(f"Salesforce record creation failed: {str(e)}")
914
 
915
+ def format_penalty_details(penalties: List[Dict]) -> str:
916
+ """Format penalty details for HTML display with exact amounts"""
917
+ if not penalties:
918
+ return """
919
+ <div class='penalty-box'>
920
+ <div class='section-title'>💰 Penalty Clauses Analysis</div>
921
+ <div class='success-box'>✅ No penalty clauses detected!</div>
922
+ </div>
923
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
924
 
925
+ penalty_count = len(penalties)
926
+ warning_level = "success-box" if penalty_count == 0 else "info-box" if penalty_count < 3 else "warning-box" if penalty_count < 5 else "danger-box"
927
+ warning_emoji = "✅" if penalty_count == 0 else "🛈" if penalty_count < 3 else "⚠" if penalty_count < 5 else "🚨"
928
 
929
+ # Calculate statistics
930
+ numeric_penalties = [p['numeric_value'] for p in penalties if p['numeric_value'] is not None]
931
+ if numeric_penalties:
932
+ max_penalty = max(numeric_penalties)
933
+ min_penalty = min(numeric_penalties)
934
+ avg_penalty = sum(numeric_penalties)/len(numeric_penalties)
935
+ max_penalty_obj = penalties[[p['numeric_value'] for p in penalties].index(max_penalty)]
936
+ min_penalty_obj = penalties[[p['numeric_value'] for p in penalties].index(min_penalty)]
937
+
938
+ stats_html = f"""
939
+ <div class='count-item'>
940
+ <span class='count-label'>Highest Penalty</span>
941
+ <span class='count-value'>{max_penalty:,.2f}{'%' if max_penalty_obj['is_percentage'] else '$'}</span>
942
+ </div>
943
+ <div class='count-item'>
944
+ <span class='count-label'>Average Penalty</span>
945
+ <span class='count-value'>{avg_penalty:,.2f}{'%' if any(p['is_percentage'] for p in penalties) else '$'}</span>
946
+ </div>
947
+ <div class='count-item'>
948
+ <span class='count-label'>Lowest Penalty</span>
949
+ <span class='count-value'>{min_penalty:,.2f}{'%' if min_penalty_obj['is_percentage'] else '$'}</span>
950
+ </div>
951
+ """
952
  else:
953
+ stats_html = "<div class='info-box'>🛈 Penalty amounts could not be quantified</div>"
954
+
955
+ # Penalty types breakdown
956
+ type_counts = {}
957
+ for penalty in penalties:
958
+ p_type = penalty['type']
959
+ type_counts[p_type] = type_counts.get(p_type, 0) + 1
960
+
961
+ types_html = "\n".join([
962
+ f"<div class='count-item'><span class='count-label'>{p_type}</span><span class='count-value'>{count}</span></div>"
963
+ for p_type, count in type_counts.items()
964
+ ])
965
+
966
+ # Example clauses
967
+ example_clauses = "\n".join([
968
+ format_clause_example(penalty['clause'], i+1)
969
+ for i, penalty in enumerate(penalties[:3])
970
+ ])
971
+
972
  return f"""
973
+ <div class='penalty-box'>
974
+ <div class='section-title'>💰 Penalty Clauses Analysis</div>
975
+ <div class='{warning_level}'>{warning_emoji} {penalty_count} penalty clauses detected!</div>
976
+
977
+ <div style='margin-top: 15px;'>
978
+ <div class='section-title' style='font-size: 16px;'>📊 Penalty Statistics</div>
979
+ {stats_html}
980
+ </div>
981
+
982
+ <div style='margin-top: 15px;'>
983
+ <div class='section-title' style='font-size: 16px;'>📝 Penalty Types</div>
984
+ {types_html}
985
+ </div>
986
+
987
+ <div style='margin-top: 15px;'>
988
+ <div class='section-title' style='font-size: 16px;'>📜 Example Penalty Clauses</div>
989
+ {example_clauses if example_clauses else "<div class='info-box'>No example clauses available</div>"}
990
+ </div>
991
  </div>
992
  """
993
 
994
+ def format_obligation_details(obligations: List[Dict]) -> str:
995
+ """Format obligation details for HTML display"""
996
+ if not obligations:
997
+ return """
998
+ <div class='obligation-box'>
999
+ <div class='section-title'>📝 Obligation Clauses Analysis</div>
1000
+ <div class='success-box'>✅ No obligation clauses detected!</div>
1001
+ </div>
1002
+ """
1003
+
1004
+ obligation_count = len(obligations)
1005
+ warning_level = "success-box" if obligation_count == 0 else "info-box" if obligation_count < 5 else "warning-box" if obligation_count < 10 else "danger-box"
1006
+ warning_emoji = "✅" if obligation_count == 0 else "🛈" if obligation_count < 5 else "⚠" if obligation_count < 10 else "🚨"
1007
+
1008
+ # Keyword breakdown
1009
+ keyword_counts = {}
1010
+ for obligation in obligations:
1011
+ keyword = obligation['keyword']
1012
+ keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1
1013
+
1014
+ keywords_html = "\n".join([
1015
+ f"<div class='count-item'><span class='count-label'>{kw}</span><span class='count-value'>{count}</span></div>"
1016
+ for kw, count in keyword_counts.items()
1017
+ ])
1018
+
1019
+ # Timeframe analysis
1020
+ timeframe_counts = {}
1021
+ for obligation in obligations:
1022
+ timeframe = obligation['timeframe']
1023
+ timeframe_counts[timeframe] = timeframe_counts.get(timeframe, 0) + 1
1024
+
1025
+ timeframes_html = "\n".join([
1026
+ f"<div class='count-item'><span class='count-label'>{tf}</span><span class='count-value'>{count}</span></div>"
1027
+ for tf, count in timeframe_counts.items()
1028
+ ]) if len(timeframe_counts) > 1 else "<div class='info-box'>Most obligations don't specify exact timeframes</div>"
1029
+
1030
+ # Example clauses
1031
+ example_clauses = "\n".join([
1032
+ format_clause_example(obligation['clause'], i+1)
1033
+ for i, obligation in enumerate(obligations[:3])
1034
+ ])
1035
+
1036
  return f"""
1037
+ <div class='obligation-box'>
1038
+ <div class='section-title'>📝 Obligation Clauses Analysis</div>
1039
+ <div class='{warning_level}'>{warning_emoji} {obligation_count} obligation clauses detected!</div>
1040
+
1041
+ <div style='margin-top: 15px;'>
1042
+ <div class='section-title' style='font-size: 16px;'>📊 Obligation Keywords</div>
1043
+ {keywords_html}
1044
+ </div>
1045
+
1046
+ <div style='margin-top: 15px;'>
1047
+ <div class='section-title' style='font-size: 16px;'>⏱ Timeframes</div>
1048
+ {timeframes_html}
1049
+ </div>
1050
+
1051
+ <div style='margin-top: 15px;'>
1052
+ <div class='section-title' style='font-size: 16px;'>📜 Example Obligation Clauses</div>
1053
+ {example_clauses if example_clauses else "<div class='info-box'>No example clauses available</div>"}
1054
+ </div>
1055
  </div>
1056
  """
1057
 
1058
+ def format_delay_details(delays: List[Dict]) -> str:
1059
+ """Format delay details for HTML display"""
1060
+ if not delays:
1061
+ return """
1062
+ <div class='delay-box'>
1063
+ <div class='section-title'>⏱ Delay Clauses Analysis</div>
1064
+ <div class='success-box'>✅ No delay clauses detected!</div>
1065
+ </div>
1066
+ """
1067
+
1068
+ delay_count = len(delays)
1069
+ warning_level = "success-box" if delay_count == 0 else "info-box" if delay_count < 3 else "warning-box" if delay_count < 5 else "danger-box"
1070
+ warning_emoji = "✅" if delay_count == 0 else "🛈" if delay_count < 3 else "⚠" if delay_count < 5 else "🚨"
1071
+
1072
+ # Consequences breakdown
1073
+ consequence_counts = {}
1074
+ for delay in delays:
1075
+ consequence = delay['consequences']
1076
+ consequence_counts[consequence] = consequence_counts.get(consequence, 0) + 1
1077
+
1078
+ consequences_html = "\n".join([
1079
+ f"<div class='count-item'><span class='count-label'>{cons}</span><span class='count-value'>{count}</span></div>"
1080
+ for cons, count in consequence_counts.items()
1081
+ ])
1082
+
1083
+ # Timeframe analysis
1084
+ timeframe_counts = {}
1085
+ for delay in delays:
1086
+ timeframe = delay['timeframe']
1087
+ timeframe_counts[timeframe] = timeframe_counts.get(timeframe, 0) + 1
1088
+
1089
+ timeframes_html = "\n".join([
1090
+ f"<div class='count-item'><span class='count-label'>{tf}</span><span class='count-value'>{count}</span></div>"
1091
+ for tf, count in timeframe_counts.items()
1092
+ ]) if len(timeframe_counts) > 1 else "<div class='info-box'>Most delay clauses don't specify exact timeframes</div>"
1093
+
1094
+ # Example clauses
1095
+ example_clauses = "\n".join([
1096
+ format_clause_example(delay['clause'], i+1)
1097
+ for i, delay in enumerate(delays[:3])
1098
+ ])
1099
+
1100
+ return f"""
1101
+ <div class='delay-box'>
1102
+ <div class='section-title'>⏱ Delay Clauses Analysis</div>
1103
+ <div class='{warning_level}'>{warning_emoji} {delay_count} delay clauses detected!</div>
1104
 
1105
+ <div style='margin-top: 15px;'>
1106
+ <div class='section-title' style='font-size: 16px;'>⚠️ Consequences</div>
1107
+ {consequences_html}
1108
+ </div>
1109
 
1110
+ <div style='margin-top: 15px;'>
1111
+ <div class='section-title' style='font-size: 16px;'>⏱ Timeframes</div>
1112
+ {timeframes_html}
1113
+ </div>
1114
 
1115
+ <div style='margin-top: 15px;'>
1116
+ <div class='section-title' style='font-size: 16px;'>📜 Example Delay Clauses</div>
1117
+ {example_clauses if example_clauses else "<div class='info-box'>No example clauses available</div>"}
1118
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1119
  </div>
1120
  """
1121
 
 
1143
  logger.warning(f"Sentiment analysis failed: {str(e)}. Using fallback score of 0.5.")
1144
  sentiment_score = 0.5
1145
 
1146
+ # Extract detailed information
1147
+ penalties = extract_penalty_clauses(text)
1148
+ obligations = extract_obligation_clauses(text)
1149
+ delays = extract_delay_clauses(text)
 
 
 
1150
 
 
 
 
 
 
 
 
 
 
 
 
1151
  try:
1152
+ risk_score, risk_level = calculate_risk_score(penalties, obligations, delays)
 
 
1153
  except Exception as e:
1154
  raise Exception(f"Risk score calculation failed: {str(e)}")
1155
 
 
1160
  except Exception as e:
1161
  raise Exception(f"Visual generation failed: {str(e)}")
1162
 
1163
+ # Format details for display
1164
+ penalty_html = format_penalty_details(penalties)
1165
+ obligation_html = format_obligation_details(obligations)
1166
+ delay_html = format_delay_details(delays)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1167
 
1168
+ # Prepare data for PDF report
1169
+ analysis_data = {
1170
+ 'document_name': os.path.basename(file_obj.name),
1171
+ 'sentiment_score': sentiment_score,
1172
+ 'risk_score': risk_score,
1173
+ 'risk_level': risk_level,
1174
+ 'penalties': penalties,
1175
+ 'obligations': obligations,
1176
+ 'delays': delays
1177
+ }
1178
+
1179
+ # Prepare data for Salesforce
1180
  record_id = str(uuid.uuid4())
1181
  sf_data = {
1182
  'sentiment_score': sentiment_score,
1183
  'risk_score': risk_score,
1184
  'risk_level': risk_level,
1185
  'record_id': record_id,
1186
+ 'penalty_examples': "\n".join([p['clause'] for p in penalties[:5]]),
1187
+ 'penalty_details': "\n".join([f"{p['type']}: {p['amount']}" for p in penalties[:5]]),
1188
+ 'penalty_amounts': "\n".join([f"{p['amount']} ({'%' if p['is_percentage'] else '$'})" for p in penalties[:5] if p['numeric_value']]),
1189
+ 'obligation_details': "\n".join([f"{o['keyword']}: {o['subject']}" for o in obligations[:5]]),
1190
+ 'delay_details': "\n".join([f"{d['keyword']}: {d['consequences']}" for d in delays[:5]])
1191
  }
1192
 
1193
  try:
 
1197
  logger.error(f"Salesforce record creation failed: {str(e)}")
1198
  salesforce_id = "N/A"
1199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1200
  try:
1201
  pdf_buffer = generate_analysis_pdf(analysis_data)
1202
  if pdf_buffer is None:
 
1254
  </div>
1255
  """,
1256
  "", # Empty string for hidden risk visualization
1257
+ penalty_html,
1258
+ obligation_html,
1259
+ delay_html,
 
 
1260
  sentiment_analysis_output,
1261
  temp_file_path # Return temporary file path for PDF download
1262
  ]
 
1274
  </div>
1275
  </div>
1276
  """
1277
+ return [error_message] * 6
1278
 
1279
  # Create Gradio interface with dark mode compatibility
1280
  with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Default(primary_hue="blue")) as demo:
1281
  gr.Markdown("""
1282
  <div style='text-align: center; margin-bottom: 30px;'>
1283
+ <h1 style='color: var(--primary-color); margin-bottom: 10px;'>Advanced Contract Risk Analyzer</h1>
1284
  <p style='color: var(--secondary-color); font-size: 16px;'>
1285
+ Upload a contract PDF to analyze risks, obligations, penalties, and sentiment with detailed reporting.
1286
  </p>
1287
  </div>
1288
  """)
 
1299
  Drag and drop your contract PDF file here.
1300
  </div>
1301
  """)
1302
+ submit_btn = gr.Button("Analyze Contract", variant="primary")
1303
 
1304
  with gr.Column(scale=3):
1305
  risk_summary = gr.HTML(label="Contract Risk Summary")
1306
  risk_visualization = gr.HTML(label="Risk Visualization", visible=False, elem_id="risk-visualization")
1307
 
1308
  with gr.Row():
1309
+ penalty_analysis = gr.HTML(label="Penalty Clauses Analysis")
1310
+
1311
+ with gr.Row():
1312
+ obligation_analysis = gr.HTML(label="Obligation Clauses Analysis")
 
 
 
 
 
1313
 
1314
  with gr.Row():
1315
+ delay_analysis = gr.HTML(label="Delay Clauses Analysis")
1316
 
1317
  with gr.Row():
1318
  sentiment_analysis = gr.HTML(label="Sentiment Analysis")
1319
+
1320
+ with gr.Row():
1321
  pdf_output = gr.File(label="Download Full Analysis Report (PDF)", file_types=[".pdf"])
1322
 
1323
  submit_btn.click(
1324
  fn=analyze_pdf,
1325
  inputs=[file_input],
1326
  outputs=[
1327
+ risk_summary,
1328
+ risk_visualization,
1329
+ penalty_analysis,
1330
+ obligation_analysis,
1331
+ delay_analysis,
1332
+ sentiment_analysis,
1333
+ pdf_output
1334
  ]
1335
  )
1336