Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pdfplumber | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from word2number import w2n | |
| import re | |
| from typing import Tuple, List, Dict | |
| from io import BytesIO | |
| import base64 | |
| # Custom CSS for styling | |
| css = """ | |
| :root { | |
| --low-color: #28a745; | |
| --medium-color: #ffc107; | |
| --high-color: #dc3545; | |
| --inactive-color: #e9ecef; | |
| } | |
| .risk-container { | |
| display: flex; | |
| flex-direction: column; | |
| gap: 12px; | |
| margin-bottom: 25px; | |
| } | |
| .risk-row { | |
| display: flex; | |
| align-items: center; | |
| background: white; | |
| border-radius: 8px; | |
| padding: 15px; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
| transition: all 0.3s ease; | |
| } | |
| .risk-row.active { | |
| transform: scale(1.02); | |
| box-shadow: 0 4px 8px rgba(0,0,0,0.15); | |
| } | |
| .risk-label { | |
| width: 100px; | |
| font-weight: 600; | |
| font-size: 16px; | |
| color: #495057; | |
| } | |
| .risk-score { | |
| width: 80px; | |
| font-size: 20px; | |
| font-weight: 700; | |
| text-align: center; | |
| } | |
| .risk-low { color: var(--low-color); } | |
| .risk-medium { color: var(--medium-color); } | |
| .risk-high { color: var(--high-color); } | |
| .heatmap-container { | |
| flex-grow: 1; | |
| height: 30px; | |
| border-radius: 15px; | |
| overflow: hidden; | |
| position: relative; | |
| } | |
| .heatmap-bar { | |
| height: 100%; | |
| border-radius: 15px; | |
| transition: width 0.5s ease; | |
| } | |
| .risk-meter { | |
| position: absolute; | |
| right: 10px; | |
| top: 50%; | |
| transform: translateY(-50%); | |
| font-size: 12px; | |
| font-weight: 600; | |
| color: white; | |
| text-shadow: 0 1px 2px rgba(0,0,0,0.3); | |
| } | |
| .result-section { | |
| background: white; | |
| border-radius: 8px; | |
| padding: 20px; | |
| margin-bottom: 20px; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
| } | |
| .result-title { | |
| font-size: 18px; | |
| font-weight: 600; | |
| margin-bottom: 15px; | |
| color: #343a40; | |
| display: flex; | |
| align-items: center; | |
| gap: 8px; | |
| } | |
| .clause-item { | |
| margin-bottom: 8px; | |
| padding-left: 15px; | |
| position: relative; | |
| } | |
| .clause-item:before { | |
| content: "β’"; | |
| position: absolute; | |
| left: 0; | |
| color: #6c757d; | |
| } | |
| .penalty-amount { | |
| font-family: monospace; | |
| background: #f8f9fa; | |
| padding: 2px 6px; | |
| border-radius: 4px; | |
| margin-left: 5px; | |
| } | |
| .example-clause { | |
| background: #f8f9fa; | |
| padding: 12px; | |
| border-radius: 6px; | |
| margin-bottom: 10px; | |
| border-left: 3px solid #6c757d; | |
| } | |
| .example-number { | |
| font-weight: 600; | |
| margin-right: 8px; | |
| color: #6c757d; | |
| } | |
| """ | |
| def extract_text_from_pdf(pdf_path: str) -> str: | |
| """Extract text from PDF using pdfplumber""" | |
| text = "" | |
| with pdfplumber.open(pdf_path) as pdf: | |
| for page in pdf.pages: | |
| text += page.extract_text() or "" | |
| return text | |
| def count_keywords(text: str, keywords: List[str]) -> Dict[str, int]: | |
| """Count occurrences of keywords in text""" | |
| counts = {} | |
| for keyword in keywords: | |
| counts[keyword] = len(re.findall(r'\b' + re.escape(keyword) + r'\b', text, flags=re.IGNORECASE)) | |
| return counts | |
| def find_penalty_values(text: str) -> List[float]: | |
| """Find penalty amounts in the text""" | |
| patterns = [ | |
| r'\$\s*[\d,]+(?:\.\d+)?', | |
| r'(?:USD|usd)\s*[\d,]+(?:\.\d+)?', | |
| r'\d+\s*(?:percent|%)', | |
| r'(?:\b[a-z]+\s*)+dollars', | |
| ] | |
| penalties = [] | |
| for pattern in patterns: | |
| matches = re.finditer(pattern, text, flags=re.IGNORECASE) | |
| for match in matches: | |
| penalty_text = match.group() | |
| try: | |
| if any(word in penalty_text.lower() for word in ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'hundred', 'thousand', 'million']): | |
| penalty_value = w2n.word_to_num(penalty_text.split('dollars')[0].strip()) | |
| else: | |
| penalty_value = float(re.sub(r'[^\d.]', '', penalty_text)) | |
| penalties.append(penalty_value) | |
| except: | |
| continue | |
| return penalties | |
| def calculate_risk_score(penalty_count: int, penalty_values: List[float], obligation_count: int, delay_count: int) -> Tuple[float, str]: | |
| """Calculate risk score based on various factors""" | |
| score = 0 | |
| score += min(penalty_count * 5, 30) | |
| if penalty_values: | |
| avg_penalty = sum(penalty_values) / len(penalty_values) | |
| if avg_penalty > 1000000: | |
| score += 40 | |
| elif avg_penalty > 100000: | |
| score += 25 | |
| elif avg_penalty > 10000: | |
| score += 15 | |
| else: | |
| score += 5 | |
| score += min(obligation_count * 2, 20) | |
| score += min(delay_count * 10, 30) | |
| score = min(score, 100) | |
| if score < 30: | |
| return score, "Low" | |
| elif score < 70: | |
| return score, "Medium" | |
| else: | |
| return score, "High" | |
| def create_risk_display(risk_score: float, risk_level: str) -> str: | |
| """Create HTML display for all three risk levels""" | |
| risk_levels = ["Low", "Medium", "High"] | |
| colors = { | |
| "Low": "var(--low-color)", | |
| "Medium": "var(--medium-color)", | |
| "High": "var(--high-color)" | |
| } | |
| html_parts = [] | |
| html_parts.append("<div class='risk-container'>") | |
| for level in risk_levels: | |
| active = level == risk_level | |
| score = risk_score if active else 0 | |
| color = colors[level] if active else "var(--inactive-color)" | |
| opacity = "1" if active else "0.6" | |
| html_parts.append(f""" | |
| <div class='risk-row {'active' if active else ''}'> | |
| <div class='risk-label risk-{level.lower()}'>{level} Risk</div> | |
| <div class='risk-score risk-{level.lower()}'>{score:.1f}%</div> | |
| <div class='heatmap-container'> | |
| <div class='heatmap-bar" | |
| style="width: {score}%; background: {color}; opacity: {opacity}"> | |
| <span class='risk-meter'>{score:.1f}%</span> | |
| </div> | |
| </div> | |
| </div> | |
| """) | |
| html_parts.append("</div>") | |
| return "\n".join(html_parts) | |
| def format_clauses(counts: Dict[str, int]) -> str: | |
| """Format clause counts into HTML""" | |
| return "\n".join([f"<div class='clause-item'>{kw}: <strong>{count}</strong></div>" for kw, count in counts.items()]) | |
| def format_penalty_amounts(amounts: List[float]) -> str: | |
| """Format penalty amounts into HTML""" | |
| if not amounts: | |
| return "<div style='color: #6c757d;'>No specific penalty amounts found</div>" | |
| return "\n".join([f"<div class='clause-item'><span class='penalty-amount'>${amt:,.2f}</span></div>" for amt in amounts[:5]]) | |
| def format_examples(sentences: List[str]) -> str: | |
| """Format example sentences into HTML""" | |
| if not sentences: | |
| return "<div style='color: #6c757d;'>No penalty clauses found</div>" | |
| return "\n".join([f""" | |
| <div class='example-clause'> | |
| <span class='example-number'>{i+1}.</span> {sent} | |
| </div> | |
| """ for i, sent in enumerate(sentences[:3])]) | |
| def analyze_pdf(file_obj) -> List: | |
| """Main analysis function for Gradio interface""" | |
| try: | |
| # Extract text from the uploaded file | |
| text = extract_text_from_pdf(file_obj.name) | |
| # Define keywords to search for | |
| penalty_keywords = ["penalty", "fine", "forfeit", "liquidated damages", "breach"] | |
| obligation_keywords = ["shall", "must", "required to", "obligated to", "duty"] | |
| delay_keywords = ["delay", "late", "overdue", "extension", "time is of the essence"] | |
| # Count keyword occurrences | |
| penalty_counts = count_keywords(text, penalty_keywords) | |
| obligation_counts = count_keywords(text, obligation_keywords) | |
| delay_counts = count_keywords(text, delay_keywords) | |
| # Find penalty values | |
| penalty_values = find_penalty_values(text) | |
| # Calculate total counts | |
| total_penalties = sum(penalty_counts.values()) | |
| total_obligations = sum(obligation_counts.values()) | |
| total_delays = sum(delay_counts.values()) | |
| # Calculate risk score | |
| risk_score, risk_level = calculate_risk_score( | |
| total_penalties, penalty_values, total_obligations, total_delays | |
| ) | |
| # Generate risk display | |
| risk_display = create_risk_display(risk_score, risk_level) | |
| # Find example sentences with penalties | |
| penalty_sentences = [] | |
| for sentence in re.split(r'(?<=[.!?])\s+', text): | |
| if any(kw.lower() in sentence.lower() for kw in penalty_keywords): | |
| penalty_sentences.append(sentence.strip()) | |
| # Format all results | |
| penalty_html = f""" | |
| <div class='result-section'> | |
| <div class='result-title'>π Penalty Clauses: <strong>{total_penalties}</strong> found</div> | |
| {format_clauses(penalty_counts)} | |
| </div> | |
| """ | |
| amounts_html = f""" | |
| <div class='result-section'> | |
| <div class='result-title'>π° Penalty Amounts: <strong>{len(penalty_values)}</strong> found</div> | |
| {format_penalty_amounts(penalty_values)} | |
| </div> | |
| """ | |
| obligation_html = f""" | |
| <div class='result-section'> | |
| <div class='result-title'>βοΈ Obligation Clauses: <strong>{total_obligations}</strong> found</div> | |
| {format_clauses(obligation_counts)} | |
| </div> | |
| """ | |
| delay_html = f""" | |
| <div class='result-section'> | |
| <div class='result-title'>β±οΈ Delay Clauses: <strong>{total_delays}</strong> found</div> | |
| {format_clauses(delay_counts)} | |
| </div> | |
| """ | |
| examples_html = f""" | |
| <div class='result-section'> | |
| <div class='result-title'>π Example Penalty Clauses</div> | |
| {format_examples(penalty_sentences)} | |
| </div> | |
| """ | |
| return [ | |
| risk_display, | |
| penalty_html, | |
| amounts_html, | |
| obligation_html, | |
| delay_html, | |
| examples_html | |
| ] | |
| except Exception as e: | |
| error_html = f""" | |
| <div class='result-section' style='background: #fff3cd;'> | |
| <div class='result-title'>β Error</div> | |
| <div>{str(e)}</div> | |
| </div> | |
| """ | |
| return [error_html] * 6 | |
| # Create Gradio interface | |
| with gr.Blocks(css=css, title="PDF Contract Risk Analyzer") as demo: | |
| gr.Markdown(""" | |
| <div style='text-align: center; margin-bottom: 30px;'> | |
| <h1 style='margin-bottom: 10px;'>π PDF Contract Risk Analyzer</h1> | |
| <p style='color: #6c757d;'>Upload a contract PDF to analyze penalties, obligations, and delays</p> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| submit_btn = gr.Button("Analyze Contract", variant="primary") | |
| with gr.Column(scale=3): | |
| gr.Markdown("### π Risk Assessment Summary") | |
| risk_display = gr.HTML() | |
| with gr.Row(): | |
| with gr.Column(): | |
| penalty_count = gr.HTML() | |
| penalty_amounts = gr.HTML() | |
| with gr.Column(): | |
| obligation_count = gr.HTML() | |
| delay_count = gr.HTML() | |
| penalty_examples = gr.HTML() | |
| submit_btn.click( | |
| fn=analyze_pdf, | |
| inputs=file_input, | |
| outputs=[risk_display, penalty_count, penalty_amounts, | |
| obligation_count, delay_count, penalty_examples] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |