Spaces:
Sleeping
Sleeping
| from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
| model_name = "distilbert-base-uncased" | |
| # Explicitly load tokenizer and model | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| # Apply truncation and max length | |
| classifier = pipeline( | |
| "text-classification", | |
| model=model, | |
| tokenizer=tokenizer, | |
| truncation=True, # ✅ This enforces 512-token limit | |
| max_length=512, | |
| return_all_scores=False | |
| ) | |
| def classify_clauses(clauses): | |
| results = [] | |
| for clause in clauses: | |
| try: | |
| result = classifier(clause) | |
| score = result[0]['score'] | |
| label = result[0]['label'] | |
| risk_score = score if label == 'POSITIVE' else 1 - score | |
| risk_level = ( | |
| "High" if risk_score > 0.7 else | |
| "Medium" if risk_score > 0.4 else | |
| "Low" | |
| ) | |
| results.append({ | |
| "clause": clause[:300], # Optional: Truncate for display only | |
| "risk_score": risk_score, | |
| "risk_level": risk_level | |
| }) | |
| except Exception as e: | |
| results.append({ | |
| "clause": clause[:300], | |
| "risk_score": 0, | |
| "risk_level": "Unknown", | |
| "error": str(e) | |
| }) | |
| return results | |