Spaces:

negi2725
/

LegalLens-API

Runtime error

b24122 commited on Jul 25, 2025

Commit

9ae222c

1 Parent(s): 9844436

Add LegalBERT model loading from zip and direct files for case analysis

Implement LegalBERT model loading from zip and directory; update `predictVerdict` in `LegalBertService` and documentation.

Replit-Commit-Author: Agent
Replit-Commit-Session-Id: 63975d62-3d3b-48af-8685-b7e915f31f2b
Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/a5a12774-3181-414d-89e4-a4da8e3fb1ca/63975d62-3d3b-48af-8685-b7e915f31f2b/i8A93Md

Files changed (3) hide show

app/api/routes.py +1 -1
app/services/legal_bert.py +109 -22
models/README.md +20 -4

app/api/routes.py CHANGED Viewed

@@ -42,7 +42,7 @@ async def analyze_case(request: CaseAnalysisRequest):
         logger.info(f"Analyzing case with text length: {len(request.caseText)}")
         # Step 1: Get initial verdict from LegalBERT
-        initial_verdict = legal_bert_service.predict_verdict(request.caseText)
         confidence = legal_bert_service.getConfidence(request.caseText)
         logger.info(f"Initial verdict: {initial_verdict}, confidence: {confidence}")

         logger.info(f"Analyzing case with text length: {len(request.caseText)}")
         # Step 1: Get initial verdict from LegalBERT
+        initial_verdict = legal_bert_service.predictVerdict(request.caseText)
         confidence = legal_bert_service.getConfidence(request.caseText)
         logger.info(f"Initial verdict: {initial_verdict}, confidence: {confidence}")

app/services/legal_bert.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from app.core.config import settings
 import logging
 import os
 logger = logging.getLogger(__name__)
@@ -11,45 +13,130 @@ class LegalBertService:
         self.model = None
         self._load_model()
     def _load_model(self):
         try:
-            if os.path.exists(settings.legal_bert_model_path):
-                logger.info(f"LegalBERT model path found: {settings.legal_bert_model_path}")
-                # TODO: Load actual model when torch/transformers are available
-                logger.info("Model loading placeholder - install torch and transformers to enable")
             else:
-                logger.warning(f"LegalBERT model path does not exist: {settings.legal_bert_model_path}")
-                logger.info("Model will be loaded when files are available")
         except Exception as e:
-            logger.error(f"Failed to load LegalBERT model: {str(e)}")
-    def predict_verdict(self, inputText: str) -> str:
         if not self.is_model_loaded():
-            # Return placeholder prediction for development
             logger.info("Using placeholder verdict prediction")
-            import hashlib
-            text_hash = int(hashlib.md5(inputText.encode()).hexdigest(), 16)
-            return "guilty" if text_hash % 2 == 1 else "not guilty"
-        # TODO: Implement actual prediction when model is loaded
-        return "not guilty"
     def getConfidence(self, inputText: str) -> float:
         if not self.is_model_loaded():
-            # Return placeholder confidence for development
             logger.info("Using placeholder confidence score")
-            import hashlib
-            text_hash = int(hashlib.md5(inputText.encode()).hexdigest(), 16)
-            return 0.5 + (text_hash % 100) / 200.0  # Returns 0.5-0.99
-        # TODO: Implement actual confidence when model is loaded
-        return 0.75
     def is_model_loaded(self) -> bool:
-        return False  # Always False until actual model is loaded
     def get_device(self) -> str:
         return str(self.device)
     def is_healthy(self) -> bool:
-        return True  # Always healthy for placeholder implementation

 from app.core.config import settings
 import logging
 import os
+import zipfile
+import hashlib
 logger = logging.getLogger(__name__)
         self.model = None
         self._load_model()
+    def _extract_model_from_zip(self, zipPath: str, extractPath: str):
+        """Extract LegalBERT model from zip file"""
+        try:
+            if not os.path.exists(zipPath):
+                logger.warning(f"Model zip file not found: {zipPath}")
+                return False
+            if not os.path.exists(extractPath):
+                os.makedirs(extractPath)
+                logger.info(f"Created model directory: {extractPath}")
+            # Check if model is already extracted
+            if os.path.exists(os.path.join(extractPath, "config.json")):
+                logger.info("Model already extracted")
+                return True
+            logger.info(f"Extracting model from {zipPath} to {extractPath}")
+            with zipfile.ZipFile(zipPath, 'r') as zipRef:
+                zipRef.extractall(extractPath)
+            logger.info("Model extraction completed")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to extract model: {str(e)}")
+            return False
     def _load_model(self):
         try:
+            # Check for zip file first
+            zipPath = os.path.join("./models", "legalbert_epoch4.zip")
+            if os.path.exists(zipPath):
+                if self._extract_model_from_zip(zipPath, settings.legal_bert_model_path):
+                    logger.info("Model zip file found and extracted")
+            # Try to load the actual model
+            if os.path.exists(settings.legal_bert_model_path) and os.path.exists(os.path.join(settings.legal_bert_model_path, "config.json")):
+                try:
+                    import torch
+                    import torch.nn.functional as F
+                    from transformers import AutoTokenizer, AutoModelForSequenceClassification
+                    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+                    logger.info(f"Loading LegalBERT model from {settings.legal_bert_model_path}")
+                    self.tokenizer = AutoTokenizer.from_pretrained(settings.legal_bert_model_path)
+                    self.model = AutoModelForSequenceClassification.from_pretrained(
+                        settings.legal_bert_model_path
+                    ).to(self.device)
+                    logger.info(f"LegalBERT model loaded successfully on {self.device}")
+                except ImportError:
+                    logger.warning("torch/transformers not installed - using placeholder mode")
+                except Exception as e:
+                    logger.error(f"Failed to load actual model: {str(e)}")
             else:
+                logger.warning(f"LegalBERT model files not found in: {settings.legal_bert_model_path}")
+                logger.info("Place your legalbert_epoch4.zip in ./models/ or model files directly in ./models/legalbert_model/")
         except Exception as e:
+            logger.error(f"Failed to initialize LegalBERT service: {str(e)}")
+    def predictVerdict(self, inputText: str) -> str:
         if not self.is_model_loaded():
             logger.info("Using placeholder verdict prediction")
+            textHash = int(hashlib.md5(inputText.encode()).hexdigest(), 16)
+            return "guilty" if textHash % 2 == 1 else "not guilty"
+        try:
+            import torch
+            import torch.nn.functional as F
+            inputs = self.tokenizer(
+                inputText,
+                return_tensors="pt",
+                truncation=True,
+                padding=True
+            ).to(self.device)
+            with torch.no_grad():
+                logits = self.model(**inputs).logits
+                probabilities = F.softmax(logits, dim=1)
+                predictedLabel = torch.argmax(probabilities, dim=1).item()
+            return "guilty" if predictedLabel == 1 else "not guilty"
+        except Exception as e:
+            logger.error(f"Error predicting verdict: {str(e)}")
+            return "not guilty"
     def getConfidence(self, inputText: str) -> float:
         if not self.is_model_loaded():
             logger.info("Using placeholder confidence score")
+            textHash = int(hashlib.md5(inputText.encode()).hexdigest(), 16)
+            return 0.5 + (textHash % 100) / 200.0
+        try:
+            import torch
+            import torch.nn.functional as F
+            inputs = self.tokenizer(
+                inputText,
+                return_tensors="pt",
+                truncation=True,
+                padding=True
+            ).to(self.device)
+            with torch.no_grad():
+                logits = self.model(**inputs).logits
+                probabilities = F.softmax(logits, dim=1)
+            return float(torch.max(probabilities).item())
+        except Exception as e:
+            logger.error(f"Error getting confidence: {str(e)}")
+            return 0.5
     def is_model_loaded(self) -> bool:
+        return self.model is not None and self.tokenizer is not None
     def get_device(self) -> str:
         return str(self.device)
     def is_healthy(self) -> bool:
+        return True

models/README.md CHANGED Viewed

@@ -2,8 +2,19 @@
 ## LegalBERT Model
-Place your LegalBERT model files in the `legalbert_model/` subdirectory:
 ```
 models/
 └── legalbert_model/
@@ -14,8 +25,6 @@ models/
     └── vocab.txt
 ```
-The model should be compatible with Hugging Face transformers library and fine-tuned for legal text classification.
 ## Installation
 Once you have the model files:
@@ -32,4 +41,11 @@ Once you have the model files:
 - Should output binary classification (guilty/not guilty)
 - Compatible with AutoModelForSequenceClassification
 - Supports text truncation and padding
-- Returns logits that can be converted to probabilities

 ## LegalBERT Model
+You can add your fine-tuned LegalBERT model in two ways:
+### Option 1: Zip File (Recommended)
+Place your model zip file as `legalbert_epoch4.zip` in this directory:
+```
+models/
+└── legalbert_epoch4.zip
+```
+The system will automatically extract it to `legalbert_model/` when the server starts.
+### Option 2: Direct Files
+Place your LegalBERT model files directly in the `legalbert_model/` subdirectory:
 ```
 models/
 └── legalbert_model/
     └── vocab.txt
 ```
 ## Installation
 Once you have the model files:
 - Should output binary classification (guilty/not guilty)
 - Compatible with AutoModelForSequenceClassification
 - Supports text truncation and padding
+- Returns logits that can be converted to probabilities
+## Auto-Detection
+The service checks for models in this order:
+1. `legalbert_epoch4.zip` (extracts automatically)
+2. `legalbert_model/` directory with model files
+3. Falls back to placeholder mode if neither found