cryogenic22 commited on
Commit
5219dc9
·
verified ·
1 Parent(s): 68a9ffc

Create utils/legal_prompt_generator.py

Browse files
Files changed (1) hide show
  1. utils/legal_prompt_generator.py +248 -0
utils/legal_prompt_generator.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import Dict, List, Optional
3
+ from datetime import datetime
4
+
5
+ class LegalPromptGenerator:
6
+ def __init__(self, ontology_path: str = "data/legal_ontology.json"):
7
+ """Initialize prompt generator with legal ontology."""
8
+ self.ontology = self._load_ontology(ontology_path)
9
+ self.jurisdiction_hierarchies = self._build_jurisdiction_hierarchy()
10
+
11
+ def _load_ontology(self, path: str) -> Dict:
12
+ """Load and validate legal ontology."""
13
+ try:
14
+ with open(path, 'r') as f:
15
+ return json.load(f)
16
+ except FileNotFoundError:
17
+ print(f"Ontology file not found at {path}, using empty ontology")
18
+ return {"@graph": []}
19
+
20
+ def _build_jurisdiction_hierarchy(self) -> Dict:
21
+ """Build hierarchy of jurisdictions from ontology."""
22
+ hierarchies = {}
23
+ for entry in self.ontology.get("@graph", []):
24
+ if entry.get("@type") == "vocab:Jurisdiction":
25
+ parent = entry.get("vocab:parentJurisdiction")
26
+ jurisdiction_id = entry.get("@id")
27
+ if parent and jurisdiction_id:
28
+ if parent not in hierarchies:
29
+ hierarchies[parent] = []
30
+ hierarchies[parent].append(jurisdiction_id)
31
+ return hierarchies
32
+
33
+ def generate_system_message(self,
34
+ context_chunks: List[Dict],
35
+ query: str,
36
+ case_metadata: Optional[Dict] = None) -> str:
37
+ """Generate enhanced system message using legal context and ontology."""
38
+
39
+ # Extract document types and jurisdictions
40
+ doc_types = set(chunk['metadata'].get('type', 'unknown') for chunk in context_chunks)
41
+ jurisdictions = set(chunk['metadata'].get('jurisdiction', 'unknown') for chunk in context_chunks)
42
+
43
+ # Get relevant legal concepts from ontology
44
+ relevant_concepts = self._get_relevant_concepts(query, context_chunks)
45
+
46
+ # Get related jurisdictions
47
+ related_jurisdictions = self._get_related_jurisdictions(jurisdictions)
48
+
49
+ # Build instruction sets based on document types
50
+ instruction_sets = self._get_type_specific_instructions(doc_types)
51
+
52
+ # Format the system message
53
+ system_message = f"""You are a specialized legal AI assistant analyzing legal documents with the following context:
54
+
55
+ DOCUMENT CONTEXT:
56
+ Document Types Present: {', '.join(doc_types)}
57
+ Primary Jurisdictions: {', '.join(jurisdictions)}
58
+ Related Jurisdictions: {', '.join(related_jurisdictions)}
59
+
60
+ LEGAL FRAMEWORK:
61
+ {self._format_legal_concepts(relevant_concepts)}
62
+
63
+ RESPONSE GUIDELINES:
64
+ 1. Legal Analysis Requirements:
65
+ - Maintain formal legal language and terminology
66
+ - Cite specific sections and precedents when relevant
67
+ - Consider jurisdictional hierarchies and relationships
68
+ - Apply appropriate legal principles based on document type
69
+
70
+ 2. Document-Specific Instructions:
71
+ {self._format_instructions(instruction_sets)}
72
+
73
+ 3. Citation and Reference Requirements:
74
+ - Reference specific document sections with clear citations
75
+ - Link analysis to relevant legal concepts and principles
76
+ - Maintain proper legal citation format
77
+ - Include paragraph or page numbers when available
78
+
79
+ 4. Jurisdictional Considerations:
80
+ - Consider jurisdictional hierarchy and precedence
81
+ - Apply jurisdiction-specific interpretations when relevant
82
+ - Note any cross-jurisdictional implications
83
+
84
+ 5. Quality Standards:
85
+ - Maintain professional legal terminology
86
+ - Provide balanced analysis considering all relevant factors
87
+ - Structure response in a clear, logical manner
88
+ - Include relevant caveats and limitations
89
+
90
+ 6. Ontological Integration:
91
+ - Incorporate relevant legal concepts from the ontology
92
+ - Link analysis to established legal principles
93
+ - Consider conceptual relationships and hierarchies
94
+ {self._get_case_specific_instructions(case_metadata) if case_metadata else ""}
95
+ """
96
+ return system_message
97
+
98
+ def generate_user_message(self, query: str, context: str) -> str:
99
+ """Generate enhanced user message with context."""
100
+ return f"""Please analyze the following legal question with reference to the provided documents and legal framework.
101
+
102
+ QUESTION:
103
+ {query}
104
+
105
+ CONTEXT:
106
+ {context}
107
+
108
+ Please provide a comprehensive legal analysis that:
109
+ 1. Addresses the specific question
110
+ 2. References relevant document sections
111
+ 3. Applies appropriate legal principles
112
+ 4. Considers jurisdictional implications
113
+ 5. Provides clear citations and references"""
114
+
115
+ def _get_relevant_concepts(self, query: str, context_chunks: List[Dict]) -> List[Dict]:
116
+ """Extract relevant legal concepts from ontology based on query and context."""
117
+ relevant_concepts = []
118
+ combined_text = f"{query} {' '.join(chunk['text'] for chunk in context_chunks)}"
119
+
120
+ for concept in self.ontology.get("@graph", []):
121
+ if "rdfs:label" not in concept:
122
+ continue
123
+
124
+ label = concept["rdfs:label"].lower()
125
+ if label in combined_text.lower():
126
+ relevant_concepts.append({
127
+ "concept": concept["rdfs:label"],
128
+ "type": concept.get("@type", "Unknown"),
129
+ "description": concept.get("rdfs:comment", ""),
130
+ "relationships": concept.get("vocab:relatedConcepts", [])
131
+ })
132
+
133
+ return relevant_concepts
134
+
135
+ def _get_related_jurisdictions(self, jurisdictions: set) -> set:
136
+ """Get related jurisdictions based on hierarchy."""
137
+ related = set()
138
+ for jurisdiction in jurisdictions:
139
+ # Add parent jurisdictions
140
+ for parent, children in self.jurisdiction_hierarchies.items():
141
+ if jurisdiction in children:
142
+ related.add(parent)
143
+ # Add sibling jurisdictions
144
+ for parent, children in self.jurisdiction_hierarchies.items():
145
+ if jurisdiction in children:
146
+ related.update(children)
147
+ return related - jurisdictions
148
+
149
+ def _get_type_specific_instructions(self, doc_types: set) -> Dict[str, List[str]]:
150
+ """Get specific instructions based on document types."""
151
+ instructions = {}
152
+
153
+ type_instructions = {
154
+ "judgment": [
155
+ "Analyze ratio decidendi and obiter dicta",
156
+ "Consider precedential value",
157
+ "Examine judicial reasoning and principles",
158
+ "Note dissenting opinions if present"
159
+ ],
160
+ "legislation": [
161
+ "Focus on statutory interpretation",
162
+ "Consider legislative intent",
163
+ "Note any amendments or repealed sections",
164
+ "Examine definitions and scope"
165
+ ],
166
+ "contract": [
167
+ "Analyze contractual terms and conditions",
168
+ "Consider contract formation elements",
169
+ "Examine rights and obligations",
170
+ "Note any breach or performance issues"
171
+ ],
172
+ "regulatory": [
173
+ "Focus on compliance requirements",
174
+ "Consider regulatory framework",
175
+ "Examine enforcement mechanisms",
176
+ "Note reporting obligations"
177
+ ]
178
+ }
179
+
180
+ for doc_type in doc_types:
181
+ if doc_type in type_instructions:
182
+ instructions[doc_type] = type_instructions[doc_type]
183
+
184
+ return instructions
185
+
186
+ def _format_legal_concepts(self, concepts: List[Dict]) -> str:
187
+ """Format legal concepts for system message."""
188
+ if not concepts:
189
+ return "No specific legal concepts identified."
190
+
191
+ formatted = "Key Legal Concepts:\n"
192
+ for concept in concepts:
193
+ formatted += f"- {concept['concept']}\n"
194
+ if concept['description']:
195
+ formatted += f" Description: {concept['description']}\n"
196
+ if concept['relationships']:
197
+ formatted += f" Related Concepts: {', '.join(concept['relationships'])}\n"
198
+ return formatted
199
+
200
+ def _format_instructions(self, instruction_sets: Dict[str, List[str]]) -> str:
201
+ """Format type-specific instructions."""
202
+ if not instruction_sets:
203
+ return "Apply general legal analysis principles."
204
+
205
+ formatted = ""
206
+ for doc_type, instructions in instruction_sets.items():
207
+ formatted += f"\nFor {doc_type.title()} Documents:\n"
208
+ formatted += "\n".join(f"- {instruction}" for instruction in instructions)
209
+ return formatted
210
+
211
+ def _get_case_specific_instructions(self, case_metadata: Dict) -> str:
212
+ """Generate case-specific instructions based on metadata."""
213
+ if not case_metadata:
214
+ return ""
215
+
216
+ return f"""
217
+
218
+ CASE-SPECIFIC CONSIDERATIONS:
219
+ - Case Type: {case_metadata.get('case_type', 'Unknown')}
220
+ - Priority: {case_metadata.get('priority', 'Normal')}
221
+ - Key Parties: {', '.join(case_metadata.get('key_parties', []))}
222
+ - Timeline: Consider events from {case_metadata.get('created_at', 'case creation')}
223
+ - Tags: {', '.join(case_metadata.get('tags', []))}
224
+ """
225
+
226
+ def generate_follow_up_prompt(self,
227
+ original_query: str,
228
+ follow_up_query: str,
229
+ previous_response: str,
230
+ context_chunks: List[Dict]) -> str:
231
+ """Generate prompt for follow-up questions."""
232
+ return f"""This is a follow-up question to a previous legal inquiry.
233
+
234
+ Original Question:
235
+ {original_query}
236
+
237
+ Previous Response Summary:
238
+ {previous_response[:500]}...
239
+
240
+ Follow-up Question:
241
+ {follow_up_query}
242
+
243
+ Please provide a response that:
244
+ 1. Maintains consistency with the previous analysis
245
+ 2. Addresses the specific follow-up inquiry
246
+ 3. Builds upon the established legal framework
247
+ 4. Provides additional relevant context
248
+ 5. References any new relevant documents or principles"""