Komal133 commited on
Commit
3bae832
·
verified ·
1 Parent(s): b75387b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -56
app.py CHANGED
@@ -1,65 +1,69 @@
1
- import os
 
2
  import json
3
- from transformers import BertTokenizer, BertForSequenceClassification
4
- import torch
5
- from flask import Flask, request, jsonify
6
- from pathlib import Path
7
- from datetime import datetime
8
 
9
- # Initialize Flask app
10
- app = Flask(__name__)
 
11
 
12
- # Load pre-trained model and tokenizer
13
- MODEL_PATH = "path/to/your/model" # Update with your Hugging Face model path
14
- tokenizer = BertTokenizer.from_pretrained(MODEL_PATH)
15
- model = BertForSequenceClassification.from_pretrained(MODEL_PATH)
16
-
17
- # Function to process contract text and classify clauses
18
- def classify_clause(contract_text):
19
- inputs = tokenizer(contract_text, return_tensors="pt", truncation=True, padding=True, max_length=512)
20
- with torch.no_grad():
21
- outputs = model(**inputs)
22
- logits = outputs.logits
23
- predicted_class = torch.argmax(logits, dim=-1).item() # Get predicted class (risk tag)
24
-
25
- # Define risk labels (assuming 3 risk levels: low, medium, high)
26
- risk_labels = ["low", "medium", "high"]
27
- predicted_risk = risk_labels[predicted_class]
28
-
29
- # Get confidence score (softmax output)
30
- softmax = torch.nn.Softmax(dim=-1)
31
- confidence = softmax(logits).squeeze().tolist()[predicted_class]
32
-
33
- return {"predicted_risk": predicted_risk, "confidence_score": confidence}
34
-
35
- # Define route to handle file uploads
36
- @app.route("/upload_contract", methods=["POST"])
37
- def upload_contract():
38
- # Extract file from the request
39
- if 'file' not in request.files:
40
- return jsonify({"error": "No file part"}), 400
41
 
42
- file = request.files['file']
 
 
43
 
44
- if file.filename == '':
45
- return jsonify({"error": "No selected file"}), 400
 
 
 
46
 
47
- contract_text = file.read().decode('utf-8') # Assuming the file is a text-based contract
48
-
49
- # Classify the contract text
50
- result = classify_clause(contract_text)
51
-
52
- # Prepare JSON response
53
- response_data = {
54
- "contract_title": "Sample Contract", # Placeholder, can be parsed from the file
55
- "overall_risk_score": result["predicted_risk"], # Risk classification
56
- "high_risk_clauses": ["Termination Clause", "Penalty Clause"], # Example (this should be dynamically extracted)
57
- "risk_map_url": "https://example.com/risk_map", # Placeholder (use actual URL for visualization)
58
- "evaluation_date": datetime.now().strftime("%Y-%m-%d")
59
  }
60
 
61
- # Return response as JSON
62
- return jsonify(response_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- if __name__ == "__main__":
65
- app.run(debug=True, host="0.0.0.0", port=5000)
 
1
+ import streamlit as st
2
+ import requests
3
  import json
4
+ from transformers import pipeline
 
 
 
 
5
 
6
+ # Initialize the BERT-based NLP pipeline
7
+ model_name = "your-huggingface-model-name" # Replace this with your model
8
+ nlp_pipeline = pipeline("ner", model=model_name)
9
 
10
+ # Function to analyze contract text
11
+ def analyze_contract(contract_text):
12
+ # Run the contract through the NLP pipeline
13
+ results = nlp_pipeline(contract_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Parse and score clauses (this is a simplified version)
16
+ risk_score = 0
17
+ high_risk_clauses = []
18
 
19
+ for result in results:
20
+ # This assumes 'labels' are risk-related; adjust as per model output
21
+ if result['label'] in ["PENALTY", "OBLIGATION", "DELAY"]: # Customize as per your model's tags
22
+ high_risk_clauses.append(result['word'])
23
+ risk_score += 10 # Example scoring logic, modify as needed
24
 
25
+ return {
26
+ "high_risk_clauses": high_risk_clauses,
27
+ "risk_score": risk_score
 
 
 
 
 
 
 
 
 
28
  }
29
 
30
+ # Streamlit UI
31
+ st.title("Contract Risk Analyzer")
32
+
33
+ # File upload
34
+ contract_file = st.file_uploader("Upload Contract", type=["pdf", "docx", "txt"])
35
+
36
+ if contract_file is not None:
37
+ contract_text = ""
38
+ if contract_file.type == "application/pdf":
39
+ import PyPDF2
40
+ # Read PDF
41
+ pdf_reader = PyPDF2.PdfReader(contract_file)
42
+ for page in pdf_reader.pages:
43
+ contract_text += page.extract_text()
44
+ elif contract_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
45
+ import docx
46
+ # Read DOCX
47
+ doc = docx.Document(contract_file)
48
+ for para in doc.paragraphs:
49
+ contract_text += para.text
50
+ elif contract_file.type == "text/plain":
51
+ contract_text = contract_file.read().decode("utf-8")
52
+
53
+ # Analyze the contract text
54
+ if contract_text:
55
+ analysis_results = analyze_contract(contract_text)
56
+
57
+ # Display the high-risk clauses and risk score
58
+ st.subheader("High Risk Clauses")
59
+ st.write(", ".join(analysis_results["high_risk_clauses"]))
60
+
61
+ st.subheader("Overall Risk Score")
62
+ st.write(analysis_results["risk_score"])
63
+
64
+ # Generate the risk heatmap (simplified here, you might want a more complex rendering)
65
+ st.subheader("Risk Heatmap")
66
+ st.write(f"Risk Score: {analysis_results['risk_score']}")
67
+ # Visualize as per your design (here we can display a simple score)
68
 
69
+ # Here you could add logic to save the results to Salesforce or other systems