Legal_AI_Agent

Build error

App Files Files Community

cryogenic22 commited on Dec 10, 2024

Commit

90451e9

verified ·

1 Parent(s): dc3b7e9

Update utils/case_manager.py

Browse files

Files changed (1) hide show

utils/case_manager.py +115 -318

utils/case_manager.py CHANGED Viewed

@@ -1,392 +1,189 @@
 import os
 import json
-import shutil
 from datetime import datetime
-from typing import List, Dict, Optional, Union
 from pathlib import Path
-import uuid
 class CaseManager:
     def __init__(self, base_path: str = "data/cases"):
-        """Initialize CaseManager with enhanced storage and indexing."""
         self.base_path = Path(base_path)
         self.base_path.mkdir(parents=True, exist_ok=True)
-        # Initialize indexes
-        self.case_index_path = self.base_path / "case_index.json"
-        self.document_index_path = self.base_path / "document_index.json"
-        self.cases = {}
-        self.document_index = {}
-        self._load_indexes()
-        self._verify_integrity()
-    def _load_indexes(self):
-        """Load case and document indexes with error handling."""
-        try:
-            if self.case_index_path.exists():
-                with open(self.case_index_path, 'r') as f:
-                    self.cases = json.load(f)
-            if self.document_index_path.exists():
-                with open(self.document_index_path, 'r') as f:
-                    self.document_index = json.load(f)
-        except json.JSONDecodeError as e:
-            print(f"Error loading indexes: {e}")
-            self._backup_and_reset_indexes()
-    def _backup_and_reset_indexes(self):
-        """Create backup of corrupted indexes and reset."""
-        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-        if self.case_index_path.exists():
-            backup_path = self.case_index_path.with_suffix(f'.backup_{timestamp}')
-            shutil.copy2(self.case_index_path, backup_path)
-        if self.document_index_path.exists():
-            backup_path = self.document_index_path.with_suffix(f'.backup_{timestamp}')
-            shutil.copy2(self.document_index_path, backup_path)
         self.cases = {}
-        self.document_index = {}
-        self._save_indexes()
-    def _verify_integrity(self):
-        """Verify and repair case and document integrity."""
-        for case_id, case in list(self.cases.items()):
-            case_path = self.base_path / case_id
-            if not case_path.exists():
-                print(f"Case directory missing for {case_id}, removing from index")
-                del self.cases[case_id]
-                continue
-            # Verify and repair document references
-            valid_docs = []
-            for doc in case.get('documents', []):
-                doc_id = doc.get('id')
-                if doc_id in self.document_index:
-                    valid_docs.append(doc)
-                else:
-                    print(f"Document {doc_id} missing from index, removing reference")
-            case['documents'] = valid_docs
-        self._save_indexes()
-    def _save_indexes(self):
-        """Save case and document indexes atomically."""
-        # Save with temporary files first
-        temp_case_index = self.case_index_path.with_suffix('.tmp')
-        temp_doc_index = self.document_index_path.with_suffix('.tmp')
         try:
-            with open(temp_case_index, 'w') as f:
-                json.dump(self.cases, f, indent=2)
-            with open(temp_doc_index, 'w') as f:
-                json.dump(self.document_index, f, indent=2)
-            # Rename temporary files to actual files
-            if temp_case_index.exists():
-                temp_case_index.replace(self.case_index_path)
-            if temp_doc_index.exists():
-                temp_doc_index.replace(self.document_index_path)
         except Exception as e:
-            print(f"Error saving indexes: {e}")
-            # Clean up temporary files
-            temp_case_index.unlink(missing_ok=True)
-            temp_doc_index.unlink(missing_ok=True)
-            raise
-    def create_case(self, title: str, description: str, case_type: str,
-                   tags: List[str] = None, priority: str = "normal") -> str:
-        """Create a new case with enhanced metadata."""
-        case_id = str(uuid.uuid4())
         case_path = self.base_path / case_id
         case_path.mkdir(exist_ok=True)
         (case_path / 'documents').mkdir(exist_ok=True)
         case_data = {
             'id': case_id,
             'title': title,
             'description': description,
             'case_type': case_type,
-            'status': 'active',
-            'priority': priority,
-            'tags': tags or [],
             'created_at': datetime.now().isoformat(),
             'updated_at': datetime.now().isoformat(),
-            'documents': [],
-            'notes': [],
-            'statistics': {
-                'document_count': 0,
-                'total_pages': 0,
-                'last_activity': datetime.now().isoformat()
-            }
         }
         self.cases[case_id] = case_data
-        self._save_indexes()
         return case_id
-    def add_document(self, case_id: str, document_data: Dict) -> str:
-        """Add a document with enhanced metadata and validation."""
-        case = self.cases.get(case_id)
-        if not case:
-            raise ValueError(f"Case with ID {case_id} not found.")
-        # Generate document ID and enhance metadata
-        doc_id = str(uuid.uuid4())
-        document_data.update({
-            'id': doc_id,
-            'case_id': case_id,
-            'added_at': datetime.now().isoformat(),
-            'updated_at': datetime.now().isoformat(),
-            'status': document_data.get('status', 'active'),
-            'version': 1,
-            'tags': document_data.get('tags', []),
-            'metadata': {
-                **document_data.get('metadata', {}),
-                'file_type': document_data.get('file_type', 'unknown'),
-                'page_count': document_data.get('page_count', 0),
-                'word_count': document_data.get('word_count', 0)
-            }
-        })
-        # Update indexes
-        self.document_index[doc_id] = document_data
-        case['documents'].append({
-            'id': doc_id,
-            'title': document_data.get('title', 'Untitled'),
-            'added_at': document_data['added_at']
-        })
-        # Update case statistics
-        case['statistics']['document_count'] += 1
-        case['statistics']['total_pages'] += document_data['metadata']['page_count']
-        case['statistics']['last_activity'] = datetime.now().isoformat()
         case['updated_at'] = datetime.now().isoformat()
-        self._save_indexes()
-        return doc_id
-    def update_document(self, doc_id: str, updates: Dict) -> Dict:
-        """Update document metadata and content."""
-        if doc_id not in self.document_index:
-            raise ValueError(f"Document with ID {doc_id} not found.")
-        document = self.document_index[doc_id]
-        document.update(updates)
-        document['updated_at'] = datetime.now().isoformat()
-        document['version'] += 1
-        # Update case statistics if needed
-        case = self.cases.get(document['case_id'])
-        if case:
-            case['statistics']['last_activity'] = datetime.now().isoformat()
-            case['updated_at'] = datetime.now().isoformat()
-        self._save_indexes()
-        return document
-    def delete_document(self, doc_id: str) -> bool:
-        """Delete a document and update case statistics."""
-        if doc_id not in self.document_index:
             return False
-        document = self.document_index[doc_id]
-        case_id = document['case_id']
-        case = self.cases.get(case_id)
-        if case:
-            # Update case statistics
-            case['statistics']['document_count'] -= 1
-            case['statistics']['total_pages'] -= document['metadata']['page_count']
-            case['statistics']['last_activity'] = datetime.now().isoformat()
-            case['updated_at'] = datetime.now().isoformat()
-            # Remove document reference from case
-            case['documents'] = [doc for doc in case['documents'] if doc['id'] != doc_id]
-        # Delete document from index
-        del self.document_index[doc_id]
-        # Delete document files if they exist
-        doc_path = self.base_path / case_id / 'documents' / doc_id
-        if doc_path.exists():
-            shutil.rmtree(doc_path)
-        self._save_indexes()
-        return True
-    def update_case(self, case_id: str, updates: Dict) -> Dict:
-        """Update case metadata and properties."""
         if case_id not in self.cases:
             raise ValueError(f"Case with ID {case_id} not found.")
-        case = self.cases[case_id]
-        for key, value in updates.items():
-            if key not in ['id', 'created_at', 'documents']:
-                case[key] = value
         case['updated_at'] = datetime.now().isoformat()
-        self._save_indexes()
-        return case
-    def delete_case(self, case_id: str) -> bool:
-        """Delete a case and all associated documents."""
         if case_id not in self.cases:
             return False
         case = self.cases[case_id]
-        # Delete all associated documents
-        for doc in case['documents']:
-            self.delete_document(doc['id'])
-        # Delete case directory
-        case_path = self.base_path / case_id
-        if case_path.exists():
-            shutil.rmtree(case_path)
-        # Remove case from index
-        del self.cases[case_id]
-        self._save_indexes()
-        return True
-    def search(self, query: str, filters: Dict = None) -> List[Dict]:
-        """Enhanced search with filtering and sorting."""
         results = []
         query = query.lower()
         for case in self.cases.values():
-            # Apply filters if provided
-            if filters:
-                if not self._matches_filters(case, filters):
-                    continue
             # Search in case metadata
             if (query in case['title'].lower() or
                 query in case['description'].lower() or
-                query in case['case_type'].lower() or
-                any(query in tag.lower() for tag in case['tags'])):
                 results.append({
                     'type': 'case',
-                    'data': case,
-                    'relevance': self._calculate_relevance(query, case)
                 })
             # Search in documents
-            for doc_ref in case['documents']:
-                doc = self.document_index.get(doc_ref['id'])
-                if doc and (query in doc['title'].lower() or
-                           any(query in tag.lower() for tag in doc.get('tags', []))):
                     results.append({
                         'type': 'document',
-                        'data': doc,
                         'case_id': case['id'],
-                        'relevance': self._calculate_relevance(query, doc)
                     })
-        # Sort results by relevance
-        results.sort(key=lambda x: x['relevance'], reverse=True)
         return results
-    def _matches_filters(self, case: Dict, filters: Dict) -> bool:
-        """Check if case matches all specified filters."""
-        for key, value in filters.items():
-            if key == 'date_range':
-                case_date = datetime.fromisoformat(case['created_at'])
-                if not (value['start'] <= case_date <= value['end']):
-                    return False
-            elif key == 'tags':
-                if not any(tag in case['tags'] for tag in value):
-                    return False
-            elif key in case and case[key] != value:
-                return False
-        return True
-    def _calculate_relevance(self, query: str, item: Dict) -> float:
-        """Calculate search result relevance score."""
-        score = 0.0
-        # Title match
-        if query in item['title'].lower():
-            score += 1.0
-        # Tag matches
-        for tag in item.get('tags', []):
-            if query in tag.lower():
-                score += 0.5
-        # Recent items get higher score
-        days_old = (datetime.now() - datetime.fromisoformat(item['created_at'])).days
-        score += max(0, 1 - (days_old / 365))  # Decay over a year
-        return score
-    def get_case_statistics(self, case_id: str) -> Dict:
-        """Get detailed statistics for a case."""
-        case = self.cases.get(case_id)
-        if not case:
-            raise ValueError(f"Case with ID {case_id} not found.")
-        stats = case['statistics'].copy()
-        stats.update({
-            'document_types': self._count_document_types(case),
-            'activity_timeline': self._generate_activity_timeline(case),
-            'tag_distribution': self._count_tags(case)
-        })
-        return stats
-    def _count_document_types(self, case: Dict) -> Dict:
-        """Count documents by type in a case."""
-        type_counts = {}
-        for doc_ref in case['documents']:
-            doc = self.document_index.get(doc_ref['id'])
-            if doc:
-                doc_type = doc['metadata']['file_type']
-                type_counts[doc_type] = type_counts.get(doc_type, 0) + 1
-        return type_counts
-    def _generate_activity_timeline(self, case: Dict) -> List[Dict]:
-        """Generate activity timeline for a case."""
-        timeline = []
-        # Add case creation
-        timeline.append({
-            'date': case['created_at'],
-            'type': 'case_created',
-            'description': f"Case '{case['title']}' created"
-        })
-        # Add document activities
-        for doc_ref in case['documents']:
-            doc = self.document_index.get(doc_ref['id'])
-            if doc:
-                timeline.append({
-                    'date': doc['added_at'],
-                    'type': 'document_added',
-                    'description': f"Document '{doc['title']}' added"
-                })
-        # Sort timeline by date
-        timeline.sort(key=lambda x: x['date'])
-        return timeline
-    def _count_tags(self, case: Dict) -> Dict:
-        """Count tag occurrences in a case."""
-        tag_counts = {}
-        # Count case tags
-        for tag in case['tags']:
-            tag_counts[tag] = tag_counts.get(tag, 0) + 1
-        # Count document tags
-        for doc_ref in case['documents']:
-            doc = self.document_index.get(doc_ref['id'])
-            if doc:
-                for tag in doc.get('tags', []):
-                    tag_counts[tag] = tag_counts.get(tag, 0) + 1
-        return tag_counts

 import os
 import json
 from datetime import datetime
+from typing import List, Dict, Optional
 from pathlib import Path
+import shutil
 class CaseManager:
     def __init__(self, base_path: str = "data/cases"):
+        """Initialize CaseManager with a base directory to store cases."""
         self.base_path = Path(base_path)
         self.base_path.mkdir(parents=True, exist_ok=True)
         self.cases = {}
+        self._load_cases()
+    def _load_cases(self):
+        """Load existing cases from storage."""
         try:
+            for case_dir in self.base_path.iterdir():
+                if case_dir.is_dir():
+                    metadata_file = case_dir / 'metadata.json'
+                    if metadata_file.exists():
+                        with open(metadata_file, 'r') as f:
+                            case_data = json.load(f)
+                            self.cases[case_dir.name] = case_data
         except Exception as e:
+            print(f"Error loading cases: {e}")
+            self.cases = {}
+    def create_case(self, title: str, description: str, case_type: str) -> str:
+        """Create a new case and save it to storage."""
+        case_id = datetime.now().strftime('%Y%m%d_%H%M%S')
         case_path = self.base_path / case_id
+        # Create case directory structure
         case_path.mkdir(exist_ok=True)
         (case_path / 'documents').mkdir(exist_ok=True)
+        # Prepare case data
         case_data = {
             'id': case_id,
             'title': title,
             'description': description,
             'case_type': case_type,
             'created_at': datetime.now().isoformat(),
             'updated_at': datetime.now().isoformat(),
+            'status': 'active',
+            'documents': []
         }
+        # Save case metadata
+        with open(case_path / 'metadata.json', 'w') as f:
+            json.dump(case_data, f, indent=2)
         self.cases[case_id] = case_data
         return case_id
+    def get_all_cases(self) -> List[Dict]:
+        """Get a list of all cases."""
+        return list(self.cases.values())
+    def get_case(self, case_id: str) -> Optional[Dict]:
+        """Get details of a specific case."""
+        return self.cases.get(case_id)
+    def update_case(self, case_id: str, updates: Dict) -> Optional[Dict]:
+        """Update case details."""
+        if case_id not in self.cases:
+            return None
+        case = self.cases[case_id]
+        case.update(updates)
         case['updated_at'] = datetime.now().isoformat()
+        # Save updated metadata
+        with open(self.base_path / case_id / 'metadata.json', 'w') as f:
+            json.dump(case, f, indent=2)
+        return case
+    def delete_case(self, case_id: str) -> bool:
+        """Delete a case and all its files."""
+        if case_id not in self.cases:
             return False
+        try:
+            # Remove case directory and all contents
+            shutil.rmtree(self.base_path / case_id)
+            del self.cases[case_id]
+            return True
+        except Exception as e:
+            print(f"Error deleting case {case_id}: {e}")
+            return False
+    def add_document(self, case_id: str, document_data: Dict):
+        """Add a document to a case."""
         if case_id not in self.cases:
             raise ValueError(f"Case with ID {case_id} not found.")
+        # Update document metadata
+        document_data['id'] = document_data.get('id', datetime.now().strftime('%Y%m%d_%H%M%S'))
+        document_data['added_at'] = document_data.get('added_at', datetime.now().isoformat())
+        # Add document to case
+        case = self.cases[case_id]
+        case['documents'].append(document_data)
         case['updated_at'] = datetime.now().isoformat()
+        # Save updated case metadata
+        with open(self.base_path / case_id / 'metadata.json', 'w') as f:
+            json.dump(case, f, indent=2)
+    def remove_document(self, case_id: str, document_id: str) -> bool:
+        """Remove a document from a case."""
         if case_id not in self.cases:
             return False
         case = self.cases[case_id]
+        case['documents'] = [doc for doc in case['documents'] if doc['id'] != document_id]
+        case['updated_at'] = datetime.now().isoformat()
+        # Save updated case metadata
+        with open(self.base_path / case_id / 'metadata.json', 'w') as f:
+            json.dump(case, f, indent=2)
+        # Remove document files
+        try:
+            doc_path = self.base_path / case_id / 'documents' / document_id
+            if doc_path.exists():
+                shutil.rmtree(doc_path)
+            return True
+        except Exception as e:
+            print(f"Error removing document files: {e}")
+            return False
+    def list_documents(self, case_id: str) -> List[Dict]:
+        """List all documents in a case."""
+        if case_id not in self.cases:
+            raise ValueError(f"Case with ID {case_id} not found.")
+        return self.cases[case_id].get('documents', [])
+    def get_document(self, case_id: str, document_id: str) -> Optional[Dict]:
+        """Get a specific document from a case."""
+        if case_id not in self.cases:
+            return None
+        for doc in self.cases[case_id].get('documents', []):
+            if doc['id'] == document_id:
+                return doc
+        return None
+    def search(self, query: str) -> List[Dict]:
+        """Search for cases or documents."""
         results = []
         query = query.lower()
         for case in self.cases.values():
             # Search in case metadata
             if (query in case['title'].lower() or
                 query in case['description'].lower() or
+                query in case['case_type'].lower()):
                 results.append({
                     'type': 'case',
+                    'data': case
                 })
             # Search in documents
+            for doc in case.get('documents', []):
+                if query in doc.get('title', '').lower():
                     results.append({
                         'type': 'document',
                         'case_id': case['id'],
+                        'data': doc
                     })
         return results
+    def get_case_stats(self, case_id: str) -> Optional[Dict]:
+        """Get statistics for a case."""
+        if case_id not in self.cases:
+            return None
+        case = self.cases[case_id]
+        return {
+            'document_count': len(case.get('documents', [])),
+            'created_at': case['created_at'],
+            'last_updated': case['updated_at'],
+            'status': case.get('status', 'active')
+        }