Spaces:
Build error
Build error
File size: 1,416 Bytes
7d9d8b5 a238aa1 7d9d8b5 a238aa1 7d9d8b5 a238aa1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
model_name = "distilbert-base-uncased"
# Explicitly load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# Apply truncation and max length
classifier = pipeline(
"text-classification",
model=model,
tokenizer=tokenizer,
truncation=True, # ✅ This enforces 512-token limit
max_length=512,
return_all_scores=False
)
def classify_clauses(clauses):
results = []
for clause in clauses:
try:
result = classifier(clause)
score = result[0]['score']
label = result[0]['label']
risk_score = score if label == 'POSITIVE' else 1 - score
risk_level = (
"High" if risk_score > 0.7 else
"Medium" if risk_score > 0.4 else
"Low"
)
results.append({
"clause": clause[:300], # Optional: Truncate for display only
"risk_score": risk_score,
"risk_level": risk_level
})
except Exception as e:
results.append({
"clause": clause[:300],
"risk_score": 0,
"risk_level": "Unknown",
"error": str(e)
})
return results
|