Kushalmanda commited on
Commit
716b887
·
verified ·
1 Parent(s): 3d25a6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +156 -65
app.py CHANGED
@@ -15,10 +15,11 @@ import textwrap
15
  from reportlab.lib.pagesizes import letter
16
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
17
  from reportlab.lib.styles import getSampleStyleSheet
18
- from datetime import datetime
 
19
 
20
- # Set up logging with timestamp
21
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
22
  logger = logging.getLogger(__name__)
23
 
24
  # Custom CSS for styling with dark mode compatibility
@@ -357,6 +358,7 @@ SF_USERNAME = "Kushalpavansekharm503@agentforce.com"
357
  SF_PASSWORD = "Kushal@123"
358
  SF_TOKEN = "WwUIFWBVUjeKn9VPKyWJmawY0"
359
 
 
360
  def authenticate_salesforce() -> Salesforce:
361
  """Authenticate with Salesforce and return a Salesforce client"""
362
  try:
@@ -369,7 +371,7 @@ def authenticate_salesforce() -> Salesforce:
369
  return sf
370
  except Exception as e:
371
  logger.error(f"Failed to authenticate with Salesforce: {str(e)}")
372
- raise Exception(f"Salesforce authentication failed: {str(e)}")
373
 
374
  def get_hugging_face_sentiment(text: str) -> float:
375
  """Get sentiment score using Hugging Face model"""
@@ -383,35 +385,110 @@ def get_hugging_face_sentiment(text: str) -> float:
383
  logger.error(f"Hugging Face sentiment analysis failed: {str(e)}. Using fallback score.")
384
  return 0.5
385
 
386
- def generate_sentiment_pdf(sentiment_score: float) -> BytesIO:
387
- """Generate a PDF with sentiment analysis results"""
388
  try:
389
- buffer = BytesIO()
390
- doc = SimpleDocTemplate(buffer, pagesize=letter)
391
- styles = getSampleStyleSheet()
392
- story = []
393
-
394
- story.append(Paragraph("Sentiment Analysis Report", styles['Title']))
395
- story.append(Spacer(1, 12))
396
- sentiment_text = f"Sentiment Score: {sentiment_score:.2f} (Scale: 0.0 Negative to 1.0 Positive)"
397
- story.append(Paragraph(sentiment_text, styles['Normal']))
398
- story.append(Spacer(1, 12))
399
- interpretation = "Positive" if sentiment_score > 0.6 else "Negative" if sentiment_score < 0.4 else "Neutral"
400
- story.append(Paragraph(f"Interpretation: {interpretation}", styles['Normal']))
401
- current_time = datetime.now().strftime("%I:%M %p IST, %B %d, %Y")
402
- story.append(Spacer(1, 12))
403
- story.append(Paragraph(f"Generated on: {current_time}", styles['Normal']))
404
-
405
- doc.build(story)
406
- buffer.seek(0)
407
- return buffer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
408
  except Exception as e:
409
- logger.error(f"PDF generation failed: {str(e)}")
410
- raise Exception(f"PDF generation failed: {str(e)}")
411
 
412
- def save_to_salesforce(sf: Salesforce, data: Dict) -> str:
413
- """Save analysis results to Salesforce Custom_Risk_Analysis__c object"""
414
  try:
 
415
  record = {
416
  'Sentiment_Score__c': data['sentiment_score'],
417
  'Risk_Score__c': data['risk_score'],
@@ -425,10 +502,17 @@ def save_to_salesforce(sf: Salesforce, data: Dict) -> str:
425
  }
426
  result = sf.Custom_Risk_Analysis__c.create(record)
427
  logger.info(f"Successfully created Salesforce record: {result['id']}")
428
- return result['id']
 
 
 
 
 
 
 
429
  except Exception as e:
430
  logger.error(f"Failed to save to Salesforce: {str(e)}")
431
- raise Exception(f"Salesforce record creation failed: {str(e)}")
432
 
433
  def extract_text_from_pdf(pdf_path: str) -> str:
434
  """Extract text from PDF using pdfplumber"""
@@ -579,9 +663,12 @@ def format_clause_example(example: str, index: int) -> str:
579
  </div>
580
  """
581
 
582
- def analyze_pdf(file_obj) -> tuple:
583
  """Main analysis function for Gradio interface"""
584
  try:
 
 
 
585
  if not file_obj:
586
  raise Exception("No PDF file uploaded. Please upload a valid PDF file.")
587
 
@@ -676,18 +763,24 @@ def analyze_pdf(file_obj) -> tuple:
676
  'risk_score': risk_score,
677
  'risk_level': risk_level,
678
  'record_id': record_id,
679
- 'penalty_examples': extracted_data,
680
  'penalty_details': "\n".join([f"{kw}: {count}" for kw, count in penalty_counts.items()]),
681
  'penalty_amounts': "\n".join([f"${amt:,.2f}" for amt in penalty_values[:5]]) if penalty_values else "No specific penalty amounts found",
 
682
  'obligation_details': "\n".join([f"{kw}: {count}" for kw, count in obligation_counts.items()]),
683
- 'delay_details': "\n".join([f"{kw}: {count}" for kw, count in delay_counts.items()])
 
 
684
  }
685
 
686
  try:
687
- salesforce_record_id = save_to_salesforce(sf, sf_data)
688
- logger.info(f"Saved to Salesforce with ID: {salesforce_record_id}")
 
 
689
  except Exception as e:
690
  logger.error(f"Salesforce record creation failed: {str(e)}")
 
691
 
692
  box_class = "success-box" if risk_level == "Low" else "warning-box" if risk_level == "Medium" else "danger-box"
693
  risk_icon = "✅" if risk_level == "Low" else "⚠" if risk_level == "Medium" else "🚨"
@@ -697,24 +790,7 @@ def analyze_pdf(file_obj) -> tuple:
697
  "High": "This contract is high risk! Immediate legal review required."
698
  }
699
 
700
- # Generate PDF for download
701
- pdf_buffer = generate_sentiment_pdf(sentiment_score)
702
- pdf_file = (f"Sentiment_Analysis_{record_id}.pdf", pdf_buffer.getvalue(), "application/pdf")
703
-
704
- risk_summary_output = f"""
705
- <div class='result-box'>
706
- <div class='section-title'>{risk_icon} Contract Risk Summary</div>
707
- <div class='risk-row'>
708
- <span class='risk-label'>Overall Risk Score</span>
709
- <span class='risk-score risk-{risk_level.lower()}'>{risk_score:.1f}/100</span>
710
- </div>
711
- {risk_meter}
712
- <div style='margin-top: 15px; font-size: 16px;'>
713
- <strong>Assessment:</strong> {risk_advice[risk_level]}
714
- </div>
715
- </div>
716
- """
717
-
718
  sentiment_analysis_output = f"""
719
  <div class='result-box'>
720
  <div class='section-title'>📊 Sentiment Analysis</div>
@@ -723,20 +799,35 @@ def analyze_pdf(file_obj) -> tuple:
723
  <span class='risk-score'>{sentiment_score:.2f}</span>
724
  </div>
725
  {sentiment_meter}
 
 
 
 
726
  </div>
727
  """
728
 
729
- return (
730
- risk_summary_output,
 
 
 
 
 
 
 
 
 
 
 
 
731
  "", # Empty string for hidden risk visualization
732
  penalty_details,
733
  f"<div class='penalty-box'><div class='section-title'>💰 Penalty Amounts Found</div>{penalty_amounts}</div>",
734
  obligation_details,
735
  delay_details,
736
  f"<div class='result-box'><div class='section-title'>📜 Extracted Data</div>{extracted_data}</div>",
737
- sentiment_analysis_output,
738
- pdf_file
739
- )
740
  except Exception as e:
741
  logger.error(f"Analysis failed: {str(e)}")
742
  error_message = f"""
@@ -751,7 +842,11 @@ def analyze_pdf(file_obj) -> tuple:
751
  </div>
752
  </div>
753
  """
754
- return [error_message] * 8 + (None,)
 
 
 
 
755
 
756
  # Create Gradio interface with dark mode compatibility
757
  with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Default(primary_hue="blue")) as demo:
@@ -799,9 +894,6 @@ with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Defa
799
  with gr.Row():
800
  sentiment_analysis = gr.HTML(label="Sentiment Analysis")
801
 
802
- with gr.Row():
803
- pdf_download = gr.File(label="Download Sentiment Report", visible=True)
804
-
805
  submit_btn.click(
806
  fn=analyze_pdf,
807
  inputs=[file_input],
@@ -809,8 +901,7 @@ with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Defa
809
  risk_summary, risk_visualization,
810
  penalty_count, penalty_amounts,
811
  obligation_count, delay_count,
812
- extracted_data, sentiment_analysis,
813
- pdf_download
814
  ]
815
  )
816
 
 
15
  from reportlab.lib.pagesizes import letter
16
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
17
  from reportlab.lib.styles import getSampleStyleSheet
18
+ from reportlab.lib.units import inch
19
+ from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
20
 
21
+ # Set up logging
22
+ logging.basicConfig(level=logging.INFO)
23
  logger = logging.getLogger(__name__)
24
 
25
  # Custom CSS for styling with dark mode compatibility
 
358
  SF_PASSWORD = "Kushal@123"
359
  SF_TOKEN = "WwUIFWBVUjeKn9VPKyWJmawY0"
360
 
361
+ @retry(stop_max_attempt_number=3, wait_fixed=2000, retry=retry_if_exception_type(Exception))
362
  def authenticate_salesforce() -> Salesforce:
363
  """Authenticate with Salesforce and return a Salesforce client"""
364
  try:
 
371
  return sf
372
  except Exception as e:
373
  logger.error(f"Failed to authenticate with Salesforce: {str(e)}")
374
+ raise
375
 
376
  def get_hugging_face_sentiment(text: str) -> float:
377
  """Get sentiment score using Hugging Face model"""
 
385
  logger.error(f"Hugging Face sentiment analysis failed: {str(e)}. Using fallback score.")
386
  return 0.5
387
 
388
+ def generate_contract_analysis_pdf(data: Dict, temp_dir: str) -> Tuple[str, str, BytesIO]:
389
+ """Generate a PDF with contract analysis results"""
390
  try:
391
+ pdf_filename = f"contract_analysis_{data['record_id']}_{int(time.time())}.pdf"
392
+ pdf_path = os.path.join(temp_dir, pdf_filename)
393
+ pdf_file = BytesIO()
394
+ c = canvas.Canvas(pdf_file, pagesize=letter)
395
+
396
+ c.setFont("Helvetica-Bold", 16)
397
+ c.drawString(1 * inch, 10 * inch, "Contract Analysis Report")
398
+
399
+ c.setFont("Helvetica", 12)
400
+ c.drawString(1 * inch, 9.5 * inch, f"Date: {time.strftime('%Y-%m-%d')}")
401
+ c.drawString(1 * inch, 9.2 * inch, f"Time: {time.strftime('%H:%M:%S')} IST")
402
+
403
+ c.setFont("Helvetica-Bold", 14)
404
+ c.drawString(1 * inch, 8.7 * inch, f"Risk Score: {data['risk_score']:.1f}/100 ({data['risk_level']})")
405
+ c.drawString(1 * inch, 8.4 * inch, f"Sentiment Score: {data['sentiment_score']:.2f} (0.0 Negative to 1.0 Positive)")
406
+
407
+ y_position = 8.0 * inch
408
+ c.setFont("Helvetica-Bold", 12)
409
+ c.drawString(1 * inch, y_position, "Summary:")
410
+ y_position -= 0.3 * inch
411
+
412
+ c.setFont("Helvetica", 10)
413
+ summary_data = {
414
+ "Total Penalties": sum(data['penalty_counts'].values()),
415
+ "Total Obligations": sum(data['obligation_counts'].values()),
416
+ "Total Delays": sum(data['delay_counts'].values()),
417
+ "Analysis Timestamp": time.strftime("%Y-%m-%d %H:%M:%S IST")
418
+ }
419
+ for key, value in summary_data.items():
420
+ c.drawString(1 * inch, y_position, f"{key}: {value}")
421
+ y_position -= 0.25 * inch
422
+
423
+ y_position -= 0.5 * inch
424
+ c.setFont("Helvetica-Bold", 12)
425
+ c.drawString(1 * inch, y_position, "Detailed Analysis:")
426
+ y_position -= 0.3 * inch
427
+
428
+ c.setFont("Helvetica", 10)
429
+ sections = [
430
+ ("Penalty Clauses", data['penalty_details']),
431
+ ("Obligation Clauses", data['obligation_details']),
432
+ ("Delay Clauses", data['delay_details']),
433
+ ("Penalty Amounts", data['penalty_amounts']),
434
+ ("Extracted Data", data['penalty_examples'])
435
+ ]
436
+ for title, content in sections:
437
+ c.drawString(1 * inch, y_position, f"{title}:")
438
+ y_position -= 0.2 * inch
439
+ lines = content.split("\n")
440
+ for line in lines:
441
+ if y_position < 1 * inch:
442
+ c.showPage()
443
+ c.setFont("Helvetica", 10)
444
+ y_position = 10 * inch
445
+ c.drawString(1.2 * inch, y_position, line)
446
+ y_position -= 0.2 * inch
447
+
448
+ c.save()
449
+ pdf_file.seek(0)
450
+
451
+ with open(pdf_path, "wb") as f:
452
+ f.write(pdf_file.getvalue())
453
+
454
+ logger.info(f"PDF generated: {pdf_path}")
455
+ return pdf_path, f"file://{pdf_path}", pdf_file
456
+ except Exception as e:
457
+ logger.error(f"Error generating PDF: {e}")
458
+ return "", "", None
459
+
460
+ def upload_pdf_to_salesforce(sf: Salesforce, pdf_file: BytesIO, report_id: str) -> str:
461
+ """Upload PDF to Salesforce and return the download URL"""
462
+ try:
463
+ if not pdf_file:
464
+ logger.error("No PDF file provided for upload")
465
+ return ""
466
+
467
+ encoded_pdf = base64.b64encode(pdf_file.getvalue()).decode('utf-8')
468
+ content_version_data = {
469
+ "Title": f"Contract_Analysis_{int(time.time())}",
470
+ "PathOnClient": f"contract_analysis_{int(time.time())}.pdf",
471
+ "VersionData": encoded_pdf,
472
+ "FirstPublishLocationId": report_id
473
+ }
474
+ content_version = sf.ContentVersion.create(content_version_data)
475
+ result = sf.query(f"SELECT Id, ContentDocumentId FROM ContentVersion WHERE Id = '{content_version['id']}'")
476
+
477
+ if not result['records']:
478
+ logger.error("Failed to retrieve ContentVersion")
479
+ return ""
480
+
481
+ file_url = f"https://{sf.sf_instance}/sfc/servlet.shepherd/version/download/{content_version['id']}"
482
+ logger.info(f"PDF uploaded to Salesforce: {file_url}")
483
+ return file_url
484
  except Exception as e:
485
+ logger.error(f"Error uploading PDF to Salesforce: {e}")
486
+ return ""
487
 
488
+ def save_to_salesforce(sf: Salesforce, data: Dict, pdf_file: BytesIO) -> Dict:
489
+ """Save analysis results and PDF to Salesforce, return record and file URLs"""
490
  try:
491
+ # Save Custom_Risk_Analysis__c record
492
  record = {
493
  'Sentiment_Score__c': data['sentiment_score'],
494
  'Risk_Score__c': data['risk_score'],
 
502
  }
503
  result = sf.Custom_Risk_Analysis__c.create(record)
504
  logger.info(f"Successfully created Salesforce record: {result['id']}")
505
+
506
+ # Upload PDF if available
507
+ download_url = upload_pdf_to_salesforce(sf, pdf_file, result['id']) if pdf_file else ""
508
+
509
+ return {
510
+ 'record_id': result['id'],
511
+ 'download_url': download_url
512
+ }
513
  except Exception as e:
514
  logger.error(f"Failed to save to Salesforce: {str(e)}")
515
+ raise Exception(f"Salesforce record or file creation failed: {str(e)}")
516
 
517
  def extract_text_from_pdf(pdf_path: str) -> str:
518
  """Extract text from PDF using pdfplumber"""
 
663
  </div>
664
  """
665
 
666
+ def analyze_pdf(file_obj) -> List:
667
  """Main analysis function for Gradio interface"""
668
  try:
669
+ import tempfile
670
+ temp_dir = tempfile.mkdtemp()
671
+
672
  if not file_obj:
673
  raise Exception("No PDF file uploaded. Please upload a valid PDF file.")
674
 
 
763
  'risk_score': risk_score,
764
  'risk_level': risk_level,
765
  'record_id': record_id,
766
+ 'penalty_counts': penalty_counts,
767
  'penalty_details': "\n".join([f"{kw}: {count}" for kw, count in penalty_counts.items()]),
768
  'penalty_amounts': "\n".join([f"${amt:,.2f}" for amt in penalty_values[:5]]) if penalty_values else "No specific penalty amounts found",
769
+ 'obligation_counts': obligation_counts,
770
  'obligation_details': "\n".join([f"{kw}: {count}" for kw, count in obligation_counts.items()]),
771
+ 'delay_counts': delay_counts,
772
+ 'delay_details': "\n".join([f"{kw}: {count}" for kw, count in delay_counts.items()]),
773
+ 'penalty_examples': extracted_data
774
  }
775
 
776
  try:
777
+ pdf_path, pdf_url, pdf_file = generate_contract_analysis_pdf(sf_data, temp_dir)
778
+ salesforce_result = save_to_salesforce(sf, sf_data, pdf_file)
779
+ download_url = salesforce_result['download_url'] or pdf_url
780
+ logger.info(f"Saved to Salesforce with Record ID: {salesforce_result['record_id']}, PDF URL: {download_url}")
781
  except Exception as e:
782
  logger.error(f"Salesforce record creation failed: {str(e)}")
783
+ download_url = pdf_url if pdf_url else ""
784
 
785
  box_class = "success-box" if risk_level == "Low" else "warning-box" if risk_level == "Medium" else "danger-box"
786
  risk_icon = "✅" if risk_level == "Low" else "⚠" if risk_level == "Medium" else "🚨"
 
790
  "High": "This contract is high risk! Immediate legal review required."
791
  }
792
 
793
+ # Add sentiment analysis PDF download link
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
794
  sentiment_analysis_output = f"""
795
  <div class='result-box'>
796
  <div class='section-title'>📊 Sentiment Analysis</div>
 
799
  <span class='risk-score'>{sentiment_score:.2f}</span>
800
  </div>
801
  {sentiment_meter}
802
+ <div style='margin-top: 15px;'>
803
+ <strong>Download Full Report:</strong>
804
+ {'<a href="' + download_url + '" download="contract_analysis.pdf" style="color: var(--primary-color); text-decoration: underline;">Download PDF</a>' if download_url else 'Failed to generate PDF download link'}
805
+ </div>
806
  </div>
807
  """
808
 
809
+ return [
810
+ f"""
811
+ <div class='result-box'>
812
+ <div class='section-title'>{risk_icon} Contract Risk Summary</div>
813
+ <div class='risk-row'>
814
+ <span class='risk-label'>Overall Risk Score</span>
815
+ <span class='risk-score risk-{risk_level.lower()}'>{risk_score:.1f}/100</span>
816
+ </div>
817
+ {risk_meter}
818
+ <div style='margin-top: 15px; font-size: 16px;'>
819
+ <strong>Assessment:</strong> {risk_advice[risk_level]}
820
+ </div>
821
+ </div>
822
+ """,
823
  "", # Empty string for hidden risk visualization
824
  penalty_details,
825
  f"<div class='penalty-box'><div class='section-title'>💰 Penalty Amounts Found</div>{penalty_amounts}</div>",
826
  obligation_details,
827
  delay_details,
828
  f"<div class='result-box'><div class='section-title'>📜 Extracted Data</div>{extracted_data}</div>",
829
+ sentiment_analysis_output
830
+ ]
 
831
  except Exception as e:
832
  logger.error(f"Analysis failed: {str(e)}")
833
  error_message = f"""
 
842
  </div>
843
  </div>
844
  """
845
+ return [error_message] * 8
846
+ finally:
847
+ if os.path.exists(temp_dir):
848
+ shutil.rmtree(temp_dir, ignore_errors=True)
849
+ logger.info(f"Cleaned up temporary directory: {temp_dir}")
850
 
851
  # Create Gradio interface with dark mode compatibility
852
  with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Default(primary_hue="blue")) as demo:
 
894
  with gr.Row():
895
  sentiment_analysis = gr.HTML(label="Sentiment Analysis")
896
 
 
 
 
897
  submit_btn.click(
898
  fn=analyze_pdf,
899
  inputs=[file_input],
 
901
  risk_summary, risk_visualization,
902
  penalty_count, penalty_amounts,
903
  obligation_count, delay_count,
904
+ extracted_data, sentiment_analysis
 
905
  ]
906
  )
907