Spaces:

ABDALLAH31
/

Heatmap_generator

Sleeping

App Files Files Community

ABDALLAH31 commited on May 10, 2025

Commit

ee8bd9f

verified ·

1 Parent(s): fb99b8d

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -15

app.py CHANGED Viewed

@@ -2,28 +2,47 @@ import gradio as gr
 import matplotlib.pyplot as plt
 import seaborn as sns
 import numpy as np
 import os
-def generate_heatmap(results):
-    # Check the structure of the results
-    print("Results:", results)
-    # If the results are strings (e.g., just the clauses)
-    if isinstance(results, list) and isinstance(results[0], str):
-        clauses = results  # Directly use clauses
-        # For simplicity, assume all clauses are "high risk" here for testing purposes
-        risk_values = [3 for _ in clauses]  # Replace with actual risk assessment logic
-    else:
-        # Assuming results are in the format [{'clause': ..., 'risk_level': ...}, ...]
-        clauses = [r['clause'] for r in results]  # Extract clause text
-        risk_levels = {"High": 3, "Medium": 2, "Low": 1}
-        risk_values = [risk_levels.get(r['risk_level'], 1) for r in results]  # Map risk level to value
     # Plot heatmap
     fig = plt.figure(figsize=(10, 6))
     sns.heatmap([risk_values], annot=True, xticklabels=clauses, yticklabels=["Risk Levels"], cmap="YlOrRd")
-    # Save heatmap image
     heatmap_path = os.path.join(os.getcwd(), 'contract_risk_heatmap.png')
     plt.savefig(heatmap_path)

 import matplotlib.pyplot as plt
 import seaborn as sns
 import numpy as np
+import pdfplumber
 import os
+from transformers import pipeline
+# Load the zero-shot classification model
+classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+# Function to extract text from PDF
+def extract_text_from_pdf(file_path):
+    text = ""
+    with pdfplumber.open(file_path) as pdf:
+        for page in pdf.pages:
+            text += page.extract_text()
+    return text
+# Generate heatmap
+def generate_heatmap(file):
+    # Step 1: Extract text from the uploaded PDF
+    text = extract_text_from_pdf(file.name)
+    # Step 2: Split text into individual clauses (simple split by periods)
+    clauses = text.split(". ")
+    # Step 3: Define candidate labels for risk
+    labels = ["high risk", "medium risk", "low risk"]
+    # Step 4: Classify each clause and store the scores
+    scores = []
+    for clause in clauses:
+        result = classifier(clause, labels)
+        scores.append(result['scores'])
+    # Step 5: Create the heatmap data
+    risk_levels = {"High": 3, "Medium": 2, "Low": 1}
+    risk_values = [risk_levels.get(r['label'], 1) for r in result['labels']]
     # Plot heatmap
     fig = plt.figure(figsize=(10, 6))
     sns.heatmap([risk_values], annot=True, xticklabels=clauses, yticklabels=["Risk Levels"], cmap="YlOrRd")
+    # Save the heatmap as an image
     heatmap_path = os.path.join(os.getcwd(), 'contract_risk_heatmap.png')
     plt.savefig(heatmap_path)