Upload 2 files

Files changed (2) hide show

legal_inference/__init__.py ADDED Viewed


1	+ from .inference import LegalSectionRetriever
2	+

legal_inference/inference.py ADDED Viewed

+from transformers import BertTokenizer, BertForSequenceClassification
+import torch, json, os
+class LegalSectionRetriever:
+    def __init__(self, repo_id: str = "harsh580g/bert-query-section"):
+        # Load model + tokenizer from HF
+        self.model = BertForSequenceClassification.from_pretrained(repo_id)
+        self.tokenizer = BertTokenizer.from_pretrained(repo_id)
+        self.model.eval()
+        # Load threshold (from config.json or external json)
+        self.threshold = getattr(self.model.config, "threshold", 0.5)  # fallback to 0.5
+        # Load sections
+        sections_path = os.path.join(os.path.dirname(__file__), "sections.json")
+        if not os.path.exists(sections_path):
+            raise FileNotFoundError("sections.json missing. Please download from HF repo.")
+        with open(sections_path, "r") as f:
+            self.sections = json.load(f)
+    def get_relevant_sections(self, query: str):
+        results = []
+        for sec_id, sec_text in self.sections.items():
+            inputs = self.tokenizer(query, sec_text, return_tensors="pt", padding=True, truncation=True)
+            with torch.no_grad():
+                logits = self.model(**inputs).logits.squeeze(-1)
+                prob = torch.sigmoid(logits).item()
+            if prob >= self.threshold:
+                results.append((sec_id, prob))
+        results.sort(key=lambda x: x[1], reverse=True)
+        return results