Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,11 @@ from io import BytesIO
|
|
| 12 |
import uuid
|
| 13 |
import logging
|
| 14 |
import textwrap
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# Set up logging
|
| 17 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -332,6 +337,10 @@ mark {
|
|
| 332 |
border-radius: 3px;
|
| 333 |
font-weight: bold;
|
| 334 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
/* Hide elements */
|
| 336 |
footer, .gradio-footer, .hide, [data-testid="Use via API"], [data-testid="mmsettings"],
|
| 337 |
.sentiment-analysis, .risk-visualization {
|
|
@@ -492,7 +501,13 @@ def generate_heatmap(risk_level: str):
|
|
| 492 |
|
| 493 |
ax.set_axis_off()
|
| 494 |
plt.tight_layout()
|
| 495 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
except Exception as e:
|
| 497 |
logger.error(f"Heatmap generation failed: {str(e)}")
|
| 498 |
raise Exception(f"Heatmap generation failed: {str(e)}")
|
|
@@ -570,6 +585,78 @@ def format_clause_example(example: str, index: int) -> str:
|
|
| 570 |
</div>
|
| 571 |
"""
|
| 572 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
def analyze_pdf(file_obj) -> List:
|
| 574 |
"""Main analysis function for Gradio interface"""
|
| 575 |
try:
|
|
@@ -670,7 +757,7 @@ def analyze_pdf(file_obj) -> List:
|
|
| 670 |
raise Exception(f"Risk score calculation failed: {str(e)}")
|
| 671 |
|
| 672 |
try:
|
| 673 |
-
|
| 674 |
risk_meter = generate_risk_meter(risk_score)
|
| 675 |
sentiment_meter = generate_sentiment_meter(sentiment_score)
|
| 676 |
except Exception as e:
|
|
@@ -776,6 +863,21 @@ def analyze_pdf(file_obj) -> List:
|
|
| 776 |
"High": "This contract is high risk! Immediate legal review required."
|
| 777 |
}
|
| 778 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 779 |
# Generate keyword matches section with highlighted context
|
| 780 |
def format_keyword_matches(contexts_dict, title):
|
| 781 |
sections = []
|
|
@@ -799,7 +901,7 @@ def analyze_pdf(file_obj) -> List:
|
|
| 799 |
</div>
|
| 800 |
"""
|
| 801 |
|
| 802 |
-
|
| 803 |
<div class="result-box">
|
| 804 |
<div class="section-title">🔍 Keyword Matches in Document</div>
|
| 805 |
{format_keyword_matches(penalty_contexts, "💰 Penalty Terms")}
|
|
@@ -808,29 +910,35 @@ def analyze_pdf(file_obj) -> List:
|
|
| 808 |
</div>
|
| 809 |
"""
|
| 810 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 811 |
return [
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
{sf_link_html}
|
| 815 |
-
<div class='section-title'>{risk_icon} Contract Risk Summary</div>
|
| 816 |
-
<div class='risk-row'>
|
| 817 |
-
<span class='risk-label'>Overall Risk Score</span>
|
| 818 |
-
<span class='risk-score risk-{risk_level.lower()}'>{risk_score:.1f}/100</span>
|
| 819 |
-
</div>
|
| 820 |
-
{risk_meter}
|
| 821 |
-
<div style='margin-top: 15px; font-size: 16px;'>
|
| 822 |
-
<strong>Assessment:</strong> {risk_advice[risk_level]}
|
| 823 |
-
</div>
|
| 824 |
-
</div>
|
| 825 |
-
""",
|
| 826 |
-
"", # Empty string for hidden risk visualization
|
| 827 |
penalty_details,
|
| 828 |
f"<div class='penalty-box'><div class='section-title'>💰 Penalty Amounts Found</div>{penalty_amounts}</div>",
|
| 829 |
obligation_details,
|
| 830 |
delay_details,
|
| 831 |
f"<div class='result-box'><div class='section-title'>📜 Text Extracted from PDF</div>{penalty_examples}</div>",
|
| 832 |
-
"", #
|
| 833 |
-
|
|
|
|
| 834 |
]
|
| 835 |
except Exception as e:
|
| 836 |
logger.error(f"Analysis failed: {str(e)}")
|
|
@@ -846,7 +954,7 @@ def analyze_pdf(file_obj) -> List:
|
|
| 846 |
</div>
|
| 847 |
</div>
|
| 848 |
"""
|
| 849 |
-
return [error_message] *
|
| 850 |
|
| 851 |
# Create Gradio interface with blue theme and hidden elements
|
| 852 |
with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Default(primary_hue="blue")) as demo:
|
|
@@ -895,8 +1003,11 @@ with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Defa
|
|
| 895 |
# Hidden sentiment analysis (kept in code but not displayed)
|
| 896 |
sentiment_analysis = gr.HTML(label="Contract Sentiment Analysis", visible=False)
|
| 897 |
|
| 898 |
-
#
|
| 899 |
keyword_matches = gr.HTML(label="Keyword Matches in Document")
|
|
|
|
|
|
|
|
|
|
| 900 |
|
| 901 |
submit_btn.click(
|
| 902 |
fn=analyze_pdf,
|
|
@@ -906,9 +1017,19 @@ with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Defa
|
|
| 906 |
penalty_count, penalty_amounts,
|
| 907 |
obligation_count, delay_count,
|
| 908 |
penalty_examples, sentiment_analysis,
|
| 909 |
-
keyword_matches
|
| 910 |
]
|
| 911 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 912 |
|
| 913 |
if __name__ == "__main__":
|
| 914 |
demo.launch()
|
|
|
|
| 12 |
import uuid
|
| 13 |
import logging
|
| 14 |
import textwrap
|
| 15 |
+
from reportlab.lib.pagesizes import letter
|
| 16 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image
|
| 17 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| 18 |
+
from reportlab.lib.enums import TA_CENTER
|
| 19 |
+
from reportlab.lib import colors
|
| 20 |
|
| 21 |
# Set up logging
|
| 22 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 337 |
border-radius: 3px;
|
| 338 |
font-weight: bold;
|
| 339 |
}
|
| 340 |
+
.download-btn {
|
| 341 |
+
margin-top: 20px !important;
|
| 342 |
+
background: linear-gradient(135deg, #4CAF50, #2E7D32) !important;
|
| 343 |
+
}
|
| 344 |
/* Hide elements */
|
| 345 |
footer, .gradio-footer, .hide, [data-testid="Use via API"], [data-testid="mmsettings"],
|
| 346 |
.sentiment-analysis, .risk-visualization {
|
|
|
|
| 501 |
|
| 502 |
ax.set_axis_off()
|
| 503 |
plt.tight_layout()
|
| 504 |
+
|
| 505 |
+
# Save the figure to a BytesIO object
|
| 506 |
+
buf = BytesIO()
|
| 507 |
+
plt.savefig(buf, format='png', bbox_inches='tight')
|
| 508 |
+
plt.close(fig)
|
| 509 |
+
buf.seek(0)
|
| 510 |
+
return buf
|
| 511 |
except Exception as e:
|
| 512 |
logger.error(f"Heatmap generation failed: {str(e)}")
|
| 513 |
raise Exception(f"Heatmap generation failed: {str(e)}")
|
|
|
|
| 585 |
</div>
|
| 586 |
"""
|
| 587 |
|
| 588 |
+
def generate_pdf_report(analysis_results: dict, heatmap_buffer: BytesIO) -> BytesIO:
|
| 589 |
+
"""Generate a PDF report from the analysis results"""
|
| 590 |
+
buffer = BytesIO()
|
| 591 |
+
doc = SimpleDocTemplate(buffer, pagesize=letter)
|
| 592 |
+
|
| 593 |
+
styles = getSampleStyleSheet()
|
| 594 |
+
styles.add(ParagraphStyle(name='Center', alignment=TA_CENTER))
|
| 595 |
+
styles.add(ParagraphStyle(name='RiskHigh', textColor=colors.red, fontSize=14))
|
| 596 |
+
styles.add(ParagraphStyle(name='RiskMedium', textColor=colors.orange, fontSize=14))
|
| 597 |
+
styles.add(ParagraphStyle(name='RiskLow', textColor=colors.green, fontSize=14))
|
| 598 |
+
styles.add(ParagraphStyle(name='NormalBold', fontName='Helvetica-Bold', parent=styles['Normal']))
|
| 599 |
+
|
| 600 |
+
story = []
|
| 601 |
+
|
| 602 |
+
# Title
|
| 603 |
+
story.append(Paragraph("Contract Risk Analysis Report", styles['Title']))
|
| 604 |
+
story.append(Spacer(1, 12))
|
| 605 |
+
|
| 606 |
+
# Risk Summary
|
| 607 |
+
story.append(Paragraph("<b>Risk Summary</b>", styles['Heading2']))
|
| 608 |
+
risk_style = 'RiskHigh' if analysis_results['risk_level'] == 'High' else 'RiskMedium' if analysis_results['risk_level'] == 'Medium' else 'RiskLow'
|
| 609 |
+
story.append(Paragraph(f"Risk Score: {analysis_results['risk_score']:.1f}/100 - <b>{analysis_results['risk_level']} Risk</b>", styles[risk_style]))
|
| 610 |
+
story.append(Spacer(1, 12))
|
| 611 |
+
|
| 612 |
+
# Add heatmap to PDF
|
| 613 |
+
if heatmap_buffer:
|
| 614 |
+
story.append(Image(heatmap_buffer, width=400, height=100))
|
| 615 |
+
story.append(Spacer(1, 24))
|
| 616 |
+
|
| 617 |
+
# Key Findings
|
| 618 |
+
story.append(Paragraph("<b>Key Findings</b>", styles['Heading2']))
|
| 619 |
+
story.append(Paragraph(f"<b>Penalty Clauses:</b> {analysis_results['penalty_count']}", styles['Normal']))
|
| 620 |
+
story.append(Paragraph(f"<b>Obligation Clauses:</b> {analysis_results['obligation_count']}", styles['Normal']))
|
| 621 |
+
story.append(Paragraph(f"<b>Delay Clauses:</b> {analysis_results['delay_count']}", styles['Normal']))
|
| 622 |
+
story.append(Spacer(1, 12))
|
| 623 |
+
|
| 624 |
+
if analysis_results['penalty_values']:
|
| 625 |
+
story.append(Paragraph("<b>Penalty Amounts Found</b>", styles['Heading3']))
|
| 626 |
+
for amount in analysis_results['penalty_values'][:5]:
|
| 627 |
+
story.append(Paragraph(f"${amount:,.2f}", styles['Normal']))
|
| 628 |
+
story.append(Spacer(1, 12))
|
| 629 |
+
|
| 630 |
+
# Detailed Findings
|
| 631 |
+
story.append(Paragraph("<b>Detailed Findings</b>", styles['Heading2']))
|
| 632 |
+
|
| 633 |
+
# Penalty Details
|
| 634 |
+
if analysis_results['penalty_details']:
|
| 635 |
+
story.append(Paragraph("<b>Penalty Clauses</b>", styles['Heading3']))
|
| 636 |
+
story.append(Paragraph(analysis_results['penalty_details'], styles['Normal']))
|
| 637 |
+
story.append(Spacer(1, 12))
|
| 638 |
+
|
| 639 |
+
# Obligation Details
|
| 640 |
+
if analysis_results['obligation_details']:
|
| 641 |
+
story.append(Paragraph("<b>Obligation Clauses</b>", styles['Heading3']))
|
| 642 |
+
story.append(Paragraph(analysis_results['obligation_details'], styles['Normal']))
|
| 643 |
+
story.append(Spacer(1, 12))
|
| 644 |
+
|
| 645 |
+
# Delay Details
|
| 646 |
+
if analysis_results['delay_details']:
|
| 647 |
+
story.append(Paragraph("<b>Delay Clauses</b>", styles['Heading3']))
|
| 648 |
+
story.append(Paragraph(analysis_results['delay_details'], styles['Normal']))
|
| 649 |
+
story.append(Spacer(1, 12))
|
| 650 |
+
|
| 651 |
+
# Sentiment Analysis
|
| 652 |
+
story.append(Paragraph("<b>Sentiment Analysis</b>", styles['Heading2']))
|
| 653 |
+
sentiment_text = "Positive" if analysis_results['sentiment_score'] > 0.6 else "Negative" if analysis_results['sentiment_score'] < 0.4 else "Neutral"
|
| 654 |
+
story.append(Paragraph(f"Sentiment Score: {analysis_results['sentiment_score']:.2f} ({sentiment_text})", styles['Normal']))
|
| 655 |
+
|
| 656 |
+
doc.build(story)
|
| 657 |
+
buffer.seek(0)
|
| 658 |
+
return buffer
|
| 659 |
+
|
| 660 |
def analyze_pdf(file_obj) -> List:
|
| 661 |
"""Main analysis function for Gradio interface"""
|
| 662 |
try:
|
|
|
|
| 757 |
raise Exception(f"Risk score calculation failed: {str(e)}")
|
| 758 |
|
| 759 |
try:
|
| 760 |
+
heatmap_buffer = generate_heatmap(risk_level)
|
| 761 |
risk_meter = generate_risk_meter(risk_score)
|
| 762 |
sentiment_meter = generate_sentiment_meter(sentiment_score)
|
| 763 |
except Exception as e:
|
|
|
|
| 863 |
"High": "This contract is high risk! Immediate legal review required."
|
| 864 |
}
|
| 865 |
|
| 866 |
+
risk_summary_html = f"""
|
| 867 |
+
<div class='result-box'>
|
| 868 |
+
{sf_link_html}
|
| 869 |
+
<div class='section-title'>{risk_icon} Contract Risk Summary</div>
|
| 870 |
+
<div class='risk-row'>
|
| 871 |
+
<span class='risk-label'>Overall Risk Score</span>
|
| 872 |
+
<span class='risk-score risk-{risk_level.lower()}'>{risk_score:.1f}/100</span>
|
| 873 |
+
</div>
|
| 874 |
+
{risk_meter}
|
| 875 |
+
<div style='margin-top: 15px; font-size: 16px;'>
|
| 876 |
+
<strong>Assessment:</strong> {risk_advice[risk_level]}
|
| 877 |
+
</div>
|
| 878 |
+
</div>
|
| 879 |
+
"""
|
| 880 |
+
|
| 881 |
# Generate keyword matches section with highlighted context
|
| 882 |
def format_keyword_matches(contexts_dict, title):
|
| 883 |
sections = []
|
|
|
|
| 901 |
</div>
|
| 902 |
"""
|
| 903 |
|
| 904 |
+
keyword_matches_html = f"""
|
| 905 |
<div class="result-box">
|
| 906 |
<div class="section-title">🔍 Keyword Matches in Document</div>
|
| 907 |
{format_keyword_matches(penalty_contexts, "💰 Penalty Terms")}
|
|
|
|
| 910 |
</div>
|
| 911 |
"""
|
| 912 |
|
| 913 |
+
# Prepare data for PDF report
|
| 914 |
+
analysis_results = {
|
| 915 |
+
'risk_score': risk_score,
|
| 916 |
+
'risk_level': risk_level,
|
| 917 |
+
'penalty_count': total_penalties,
|
| 918 |
+
'obligation_count': total_obligations,
|
| 919 |
+
'delay_count': total_delays,
|
| 920 |
+
'penalty_values': penalty_values,
|
| 921 |
+
'penalty_details': "Penalty clauses found in the document",
|
| 922 |
+
'obligation_details': "Obligation clauses found in the document",
|
| 923 |
+
'delay_details': "Delay clauses found in the document",
|
| 924 |
+
'sentiment_score': sentiment_score
|
| 925 |
+
}
|
| 926 |
+
|
| 927 |
+
# Generate PDF report
|
| 928 |
+
pdf_buffer = generate_pdf_report(analysis_results, heatmap_buffer)
|
| 929 |
+
|
| 930 |
+
# Return all outputs including the PDF data
|
| 931 |
return [
|
| 932 |
+
risk_summary_html,
|
| 933 |
+
"", # risk_visualization
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 934 |
penalty_details,
|
| 935 |
f"<div class='penalty-box'><div class='section-title'>💰 Penalty Amounts Found</div>{penalty_amounts}</div>",
|
| 936 |
obligation_details,
|
| 937 |
delay_details,
|
| 938 |
f"<div class='result-box'><div class='section-title'>📜 Text Extracted from PDF</div>{penalty_examples}</div>",
|
| 939 |
+
"", # sentiment_analysis
|
| 940 |
+
keyword_matches_html,
|
| 941 |
+
pdf_buffer # PDF data as the last output
|
| 942 |
]
|
| 943 |
except Exception as e:
|
| 944 |
logger.error(f"Analysis failed: {str(e)}")
|
|
|
|
| 954 |
</div>
|
| 955 |
</div>
|
| 956 |
"""
|
| 957 |
+
return [error_message] * 10 # Update to match total outputs including PDF
|
| 958 |
|
| 959 |
# Create Gradio interface with blue theme and hidden elements
|
| 960 |
with gr.Blocks(css=css, title="PDF Contract Risk Analyzer", theme=gr.themes.Default(primary_hue="blue")) as demo:
|
|
|
|
| 1003 |
# Hidden sentiment analysis (kept in code but not displayed)
|
| 1004 |
sentiment_analysis = gr.HTML(label="Contract Sentiment Analysis", visible=False)
|
| 1005 |
|
| 1006 |
+
# Keyword matches output
|
| 1007 |
keyword_matches = gr.HTML(label="Keyword Matches in Document")
|
| 1008 |
+
|
| 1009 |
+
# PDF download button
|
| 1010 |
+
pdf_download = gr.File(label="Download PDF Report", visible=False)
|
| 1011 |
|
| 1012 |
submit_btn.click(
|
| 1013 |
fn=analyze_pdf,
|
|
|
|
| 1017 |
penalty_count, penalty_amounts,
|
| 1018 |
obligation_count, delay_count,
|
| 1019 |
penalty_examples, sentiment_analysis,
|
| 1020 |
+
keyword_matches, pdf_download
|
| 1021 |
]
|
| 1022 |
)
|
| 1023 |
+
|
| 1024 |
+
# Show download button when PDF is ready
|
| 1025 |
+
def toggle_download_visibility(pdf_data):
|
| 1026 |
+
return gr.File.update(value=pdf_data, visible=pdf_data is not None)
|
| 1027 |
+
|
| 1028 |
+
keyword_matches.change(
|
| 1029 |
+
fn=toggle_download_visibility,
|
| 1030 |
+
inputs=pdf_download,
|
| 1031 |
+
outputs=pdf_download
|
| 1032 |
+
)
|
| 1033 |
|
| 1034 |
if __name__ == "__main__":
|
| 1035 |
demo.launch()
|