Ashar086 commited on
Commit
e92a1df
·
verified ·
1 Parent(s): adb8308

Upload 12 files

Browse files
Files changed (12) hide show
  1. __init__.py +2 -0
  2. app (5).py +319 -0
  3. clause_agents.py +89 -0
  4. clause_tasks.py +106 -0
  5. crew.py +41 -0
  6. env.example +3 -0
  7. gitignore.txt +170 -0
  8. models.py +16 -0
  9. pre-commit-config.yaml +44 -0
  10. pyproject.toml +35 -0
  11. streamlit_app.py +118 -0
  12. tools.py +18 -0
__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ def hello() -> str:
2
+ return "Hello from lawgenie!"
app (5).py ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import re
4
+
5
+ import regex
6
+ import requests
7
+ from dotenv import load_dotenv
8
+ from flask import Flask, jsonify, request
9
+ from together import Together
10
+ from werkzeug.utils import secure_filename
11
+
12
+ from lawgenie.crew import get_agent_output
13
+
14
+ app = Flask(__name__)
15
+
16
+ UPLOAD_FOLDER = "uploads"
17
+ ALLOWED_EXTENSIONS = {"pdf", "docx"}
18
+
19
+ app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER
20
+
21
+ load_dotenv()
22
+
23
+ TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY")
24
+ print("TOGETHER_API_KEY: ", TOGETHER_API_KEY)
25
+ client = Together(api_key=TOGETHER_API_KEY)
26
+
27
+
28
+ def allowed_file(filename):
29
+ return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
30
+
31
+
32
+ def inspect_and_serialize(obj):
33
+ """
34
+ Inspect the object and return a JSON-serializable version of it.
35
+ """
36
+ if isinstance(obj, (str, int, float, bool, type(None))):
37
+ return obj
38
+ elif isinstance(obj, list):
39
+ return [inspect_and_serialize(item) for item in obj]
40
+ elif isinstance(obj, dict):
41
+ return {key: inspect_and_serialize(value) for key, value in obj.items()}
42
+ elif hasattr(obj, "__dict__"):
43
+ return inspect_and_serialize(obj.__dict__)
44
+ else:
45
+ return str(obj)
46
+
47
+
48
+ def debug_crew_output(crew_output):
49
+ print("Type of crew_output:", type(crew_output))
50
+ print("Content of crew_output:")
51
+ print(json.dumps(inspect_and_serialize(crew_output), indent=2))
52
+
53
+
54
+ def parse_combined_output(combined_output):
55
+ sections = {}
56
+ current_section = None
57
+ current_summary = ""
58
+ current_full_text = ""
59
+ lines = combined_output.splitlines()
60
+
61
+ for line in lines:
62
+ line = line.strip()
63
+ if line.startswith("Section Name:"):
64
+ if current_section:
65
+ sections[current_section] = {
66
+ "summary": current_summary.strip(),
67
+ "full_text": current_full_text.strip(),
68
+ }
69
+
70
+ current_section = line[len("Section Name:") :].strip()
71
+ current_summary = ""
72
+ current_full_text = ""
73
+ elif line.startswith("Summary:"):
74
+ current_summary = line[len("Summary:") :].strip()
75
+ elif line.startswith("Full Text:"):
76
+ current_full_text = line[len("Full Text:") :].strip()
77
+ elif current_section:
78
+ current_full_text += line + " "
79
+
80
+ if current_section:
81
+ sections[current_section] = {
82
+ "summary": current_summary.strip(),
83
+ "full_text": current_full_text.strip(),
84
+ }
85
+
86
+ return sections
87
+
88
+
89
+ def extract_json(text):
90
+ match = regex.search(r"\{(?:[^{}]|(?R))*\}", text, regex.DOTALL)
91
+ if match:
92
+ return match.group(0)
93
+ else:
94
+ raise ValueError("No JSON object found in the response.")
95
+
96
+
97
+ def call_llama_via_together_ai(prompt):
98
+ response = client.chat.completions.create(
99
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
100
+ messages=[{"role": "user", "content": prompt}],
101
+ max_tokens=2048,
102
+ temperature=0.0,
103
+ top_p=0.7,
104
+ top_k=50,
105
+ repetition_penalty=1,
106
+ stop=["<|eot_id|>", "<|eom_id|>"],
107
+ stream=False,
108
+ )
109
+ return response.choices[0].message.content
110
+
111
+
112
+ def segment_contract(contract_text):
113
+ print("In segment")
114
+ print(f"Contract text length: {len(contract_text)}")
115
+
116
+ chunk_size = 16000
117
+ chunks = [
118
+ contract_text[i : i + chunk_size]
119
+ for i in range(0, len(contract_text), chunk_size)
120
+ ]
121
+ print(f"Number of chunks: {len(chunks)}")
122
+ combined_output = ""
123
+
124
+ for idx, chunk in enumerate(chunks):
125
+ prompt = f"""
126
+ Analyze the following part {idx+1}/{len(chunks)} of a Non-Disclosure Agreement (NDA) and segment it into key sections.
127
+
128
+ Focus on identifying these common NDA sections:
129
+
130
+ 1. Parties
131
+ 2. Definition of Confidential Information
132
+ 3. Obligations of Receiving Party
133
+ 4. Exclusions from Confidential Information
134
+ 5. Term and Termination
135
+ 6. Return of Confidential Information
136
+ 7. Remedies
137
+
138
+ For each identified section, provide:
139
+
140
+ - Section Name: [Name of the section]
141
+ - Summary: [A brief summary of what the section covers (1-2 sentences)]
142
+ - Full Text: [The full text of the section. Do not skip any text within each section.]
143
+
144
+ If a section is not present in this part, ignore it.
145
+
146
+ If you find additional important sections not listed above, include them as well.
147
+
148
+ Output Format:
149
+
150
+ For each section, output in the following format:
151
+
152
+ Section Name: [Name] Summary: [Summary] Full Text: [Full text]
153
+
154
+ Do not include any additional text outside this format.
155
+
156
+ NDA text part {idx+1}/{len(chunks)}:
157
+ {chunk}
158
+ """
159
+ try:
160
+ print(f"Processing chunk {idx+1}/{len(chunks)}")
161
+ response_content = call_llama_via_together_ai(prompt)
162
+ print(f"Received response for chunk {idx+1}")
163
+ combined_output += response_content + "\n"
164
+ except Exception as e:
165
+ print(f"An error occurred while processing chunk {idx+1}: {e}")
166
+ continue
167
+
168
+ print("Segmentation complete.")
169
+ return combined_output
170
+
171
+
172
+ def parse_contract(file_path):
173
+ api_key = os.getenv("UPSTAGE_API_KEY")
174
+ if not api_key:
175
+ raise Exception("API key is missing")
176
+ print(f"API Key: {api_key}")
177
+
178
+ url = "https://api.upstage.ai/v1/document-ai/document-parse"
179
+ headers = {"Authorization": f"Bearer {api_key}"}
180
+
181
+ try:
182
+ with open(file_path, "rb") as file:
183
+ files = {"document": file}
184
+ data = {"ocr": "auto", "coordinates": "false", "output_formats": "['text']"}
185
+ print("Sending request to Document Parse API...")
186
+ response = requests.post(url, headers=headers, files=files, data=data)
187
+
188
+ if response.status_code == 200:
189
+ result = response.json()
190
+ print(f"API Response: {json.dumps(result, indent=2)}")
191
+
192
+ contract_text = ""
193
+ if "content" in result and "text" in result["content"]:
194
+ contract_text = result["content"]["text"]
195
+ else:
196
+ print("Warning: 'content' or 'text' not found in API response")
197
+ for page in result.get("pages", []):
198
+ for element in page.get("elements", []):
199
+ if element.get("category") == "text":
200
+ contract_text += element.get("text", "") + "\n"
201
+
202
+ print(f"Extracted text length: {len(contract_text)}")
203
+ print(f"First 500 characters of extracted text: {contract_text[:500]}")
204
+
205
+ if len(contract_text) == 0:
206
+ print("Warning: No text extracted from the document")
207
+ print("API Response structure:")
208
+ print(json.dumps(result, indent=2))
209
+
210
+ return contract_text
211
+ else:
212
+ raise Exception(
213
+ f"Error in Document Parse API: {response.status_code}, {response.text}"
214
+ )
215
+ except Exception as e:
216
+ print(f"An error occurred: {e}")
217
+ raise
218
+
219
+
220
+ def segment_clauses(text):
221
+ return [
222
+ clause.strip() for clause in re.split(r"\n\n|\r\n\r\n", text) if clause.strip()
223
+ ]
224
+
225
+
226
+ def generate_recommendation(clause, analysis):
227
+ recommendations = []
228
+ if "payment" in analysis:
229
+ recommendations.append(
230
+ "Ensure payment terms are clearly defined and favorable."
231
+ )
232
+ if "deadline" in analysis:
233
+ recommendations.append(
234
+ "Review deadlines to ensure they are realistic and include buffer time."
235
+ )
236
+ if "confidentiality" in analysis:
237
+ recommendations.append(
238
+ "Verify that confidentiality clauses protect your interests adequately."
239
+ )
240
+ if "termination" in analysis:
241
+ recommendations.append(
242
+ "Check termination conditions and ensure they are fair to both parties."
243
+ )
244
+
245
+ return (
246
+ recommendations
247
+ if recommendations
248
+ else ["No specific recommendations. The clause appears standard."]
249
+ )
250
+
251
+
252
+ @app.route("/analyze", methods=["POST"])
253
+ def analyze_contract():
254
+ data = request.json
255
+ contract_text = data.get("text", "")
256
+ clauses = segment_clauses(contract_text)
257
+
258
+ analysis = [
259
+ {"clause": clause, "analysis": analyze_clause(clause)} # noqa: F821
260
+ for clause in clauses
261
+ ]
262
+ return jsonify({"analysis": analysis})
263
+
264
+
265
+ @app.route("/upload", methods=["POST"])
266
+ def upload_file():
267
+ if "file" not in request.files:
268
+ return jsonify({"error": "No file part"}), 400
269
+ file = request.files["file"]
270
+ if file.filename == "":
271
+ return jsonify({"error": "No selected file"}), 400
272
+ if file and allowed_file(file.filename):
273
+ filename = secure_filename(file.filename)
274
+ file_path = os.path.join(app.config["UPLOAD_FOLDER"], filename)
275
+ file.save(file_path)
276
+ print(f"File saved: {file_path}")
277
+ print(f"File size: {os.path.getsize(file_path)} bytes")
278
+ try:
279
+ print("Parsing contract...")
280
+ contract_text = parse_contract(file_path)
281
+ print(f"Parsed contract text length: {len(contract_text)}")
282
+ print("Contract parsed. Starting segmentation...")
283
+ combined_output = segment_contract(contract_text)
284
+ print("Parsing combined output...")
285
+ segmented_contract = parse_combined_output(combined_output)
286
+ print("Segmentation complete.")
287
+
288
+ crew_output = get_agent_output(segmented_contract)
289
+ debug_crew_output(crew_output)
290
+
291
+ response_data = {
292
+ "message": "File uploaded and processed successfully",
293
+ "segmented_contract": segmented_contract,
294
+ "crew_analysis": inspect_and_serialize(crew_output),
295
+ }
296
+ print("Response Data:", response_data)
297
+
298
+ return jsonify(response_data)
299
+ except Exception as e:
300
+ print(f"An error occurred: {str(e)}")
301
+ import traceback
302
+
303
+ traceback.print_exc()
304
+ return jsonify({"error": str(e)}), 500
305
+ return jsonify({"error": "File type not allowed"}), 400
306
+
307
+
308
+ @app.route("/recommend", methods=["POST"])
309
+ def recommend():
310
+ data = request.json
311
+ clause = data.get("clause", "")
312
+ analysis = data.get("analysis", [])
313
+ recommendations = generate_recommendation(clause, " ".join(analysis))
314
+ return jsonify({"recommendations": recommendations})
315
+
316
+
317
+ if __name__ == "__main__":
318
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
319
+ app.run(host="0.0.0.0", debug=True, port=5002)
clause_agents.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from crewai import Agent
4
+ from dotenv import load_dotenv
5
+ from langchain_openai import ChatOpenAI
6
+
7
+ from lawgenie.tools import rag_query_tools, rag_tools
8
+
9
+ load_dotenv()
10
+
11
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
12
+ OPENAI_API_BASE = os.environ.get("OPENAI_API_BASE")
13
+ OPENAI_MODEL_NAME = os.environ.get("OPENAI_MODEL_NAME")
14
+ llm = ChatOpenAI(
15
+ openai_api_key=OPENAI_API_KEY,
16
+ # openai_api_base=OPENAI_API_BASE,
17
+ model_name="gpt-4o-mini",
18
+ )
19
+
20
+
21
+ corporate_lawyer_agent = Agent(
22
+ role="Corporate Lawyer",
23
+ goal="Use the documents you're given and the tools you have to build a knowledge base of NDAs that you can refer later. First, check if the documents have already been added.",
24
+ backstory="""You are a corporate lawyer who has vast knowledge of NDAs, different sections within them, and how they are supposed to work.
25
+ You also have the ability to call the RAG tool to ingest new documents that using the paths of files given to you and building a knowledge base of NDAs.""",
26
+ tools=rag_tools,
27
+ verbose=True,
28
+ llm=llm,
29
+ )
30
+
31
+ ### Clause agents and tasks here - try to check if one agent can be used for multiple clauses, since handoff between agents takes time
32
+ parties_corporate_lawyer = Agent(
33
+ role="Parties Corporate Lawyer",
34
+ goal="To compare the current NDA parties clause to the ones in our RAG database and identify how good it is.",
35
+ backstory="""You are a corporate lawyer who specialises in identifying who the parties in a certain NDA are.
36
+ There's no one who does it as well as you do. Things that others miss, you don't.""",
37
+ tools=rag_query_tools,
38
+ verbose=True,
39
+ llm=llm,
40
+ )
41
+
42
+ # obligations of receiving party
43
+ obligation_information_lawyer = Agent(
44
+ role="Obligations of Receiving Party Lawyer",
45
+ goal="To compare the current NDA obligations of receiving party clause to the ones in our RAG database and identify how good it is.",
46
+ backstory="""You are an obligations of receiving party lawyer who is an expert in identifying what the obligations of receiving party is in a certain NDA.
47
+ You have never failed to identify obligations of receiving party in an NDA. You are a lawyer with many years of experience and know how to identify obligations of receiving party.
48
+ """,
49
+ tools=rag_query_tools,
50
+ verbose=True,
51
+ llm=llm,
52
+ )
53
+
54
+
55
+ # terms and termination
56
+ terms_and_termination_lawyer = Agent(
57
+ role="Terms and Termination Lawyer",
58
+ goal="To compare the current NDA terms and termination clause to the ones in our RAG database and identify how good it is.",
59
+ backstory="""You are a terms and termination lawyer who is an expert in identifying what the terms and termination is in a certain NDA.
60
+ Terms and terminatioin are in your DNA. When given an NDA, you're eyes first go to terms and termination clause and you can identify fallacies well.
61
+ """,
62
+ tools=rag_query_tools,
63
+ verbose=True,
64
+ llm=llm,
65
+ )
66
+
67
+ # remedies
68
+ remedies_lawyer = Agent(
69
+ role="Remedies Lawyer",
70
+ goal="To compare the current NDA remedies clause to the ones in our RAG database and identify how good it is.",
71
+ backstory="""You are a remedies lawyer who is an expert in identifying what the remedies is in a certain NDA.
72
+ You craft perfect remedies in an NDA in the case of breach or conflict. You are the go to person for remedies in an NDA.
73
+ """,
74
+ tools=rag_query_tools,
75
+ verbose=True,
76
+ llm=llm,
77
+ )
78
+
79
+ # additional important information
80
+ additional_information_lawyer = Agent(
81
+ role="Additional Important Information Lawyer",
82
+ goal="To compare the current NDA additional important information clause to the ones in our RAG database and identify how good it is.",
83
+ backstory="""You are an additional important information lawyer who is an expert in identifying what the additional important information is in a certain NDA.
84
+ You identify up all the missing information in an NDA. You carefully craft perfect additional important information in an NDA.
85
+ """,
86
+ tools=rag_query_tools,
87
+ verbose=True,
88
+ llm=llm,
89
+ )
clause_tasks.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai import Task
2
+ from dotenv import load_dotenv
3
+
4
+ from lawgenie.clause_agents import (
5
+ additional_information_lawyer,
6
+ corporate_lawyer_agent,
7
+ obligation_information_lawyer,
8
+ parties_corporate_lawyer,
9
+ remedies_lawyer,
10
+ terms_and_termination_lawyer,
11
+ )
12
+ from lawgenie.models import AgentOutput
13
+
14
+ load_dotenv()
15
+
16
+ EXPECTED_TASK_OUTPUT = """
17
+ A JSON that has two keys: an `analysis` of the current clause in laymen terms as a paragraph as well as a `recommendation` of how the current clause deviates from the benchmark clauses (in short, numbered points)."""
18
+
19
+
20
+ def create_accumulating_task(original_task, key):
21
+ def accumulating_task(agent, context):
22
+ result = original_task.function(agent, context)
23
+ if "accumulated_results" not in context:
24
+ context["accumulated_results"] = {}
25
+ context["accumulated_results"][key] = result
26
+ return context["accumulated_results"]
27
+
28
+ return Task(
29
+ description=original_task.description,
30
+ agent=original_task.agent,
31
+ function=accumulating_task,
32
+ expected_output=original_task.expected_output,
33
+ output_pydantic=original_task.output_pydantic,
34
+ context=original_task.context,
35
+ )
36
+
37
+
38
+ def get_tasks(input_document):
39
+ tasks = []
40
+
41
+ ingest_documents_task = Task(
42
+ description="""Ingest benchmark NDAs that will be used as a yardstick to compare NDAs we will judge later.
43
+ Check all the files with NDA in their title in the ndas folder inside the current directory and ingest all the documents using the RAG tool.
44
+ Don't bother with the files inside the uploads folder.
45
+ Only ingest files with docx, doc, and pdf extensions. You don't need to analyze these documents.
46
+ If you pass the path of the documents to the RAG tool, it should be able to parse the documents.""",
47
+ expected_output=EXPECTED_TASK_OUTPUT,
48
+ agent=corporate_lawyer_agent,
49
+ )
50
+ tasks.append(create_accumulating_task(ingest_documents_task, "ingest_documents"))
51
+
52
+ identify_parties = Task(
53
+ description=f"""Take the current parties clause, which is inside this: `{input_document}`, and compare it with similar clauses in our RAG database to check how good it is.
54
+ Your task is to identify the parties in our NDA, and see if the current NDA clause abides by all the best practices of similar clauses.
55
+ There is a party that offers services, and there's a party that consumes services. This should be well defined within the clauses.""",
56
+ expected_output=EXPECTED_TASK_OUTPUT,
57
+ agent=parties_corporate_lawyer,
58
+ output_pydantic=AgentOutput,
59
+ )
60
+ tasks.append(create_accumulating_task(identify_parties, "identify_parties"))
61
+
62
+ identify_obligations_of_receiving_party = Task(
63
+ description=f"""Take the current obligations of receiving party clause, which is inside this: `{input_document}`, and compare it with similar clauses in our RAG database to check how good it is.
64
+ Your task is to identify the obligations of receiving party in our NDA, and see if the current NDA clause abides by all the best practices of similar clauses.""",
65
+ expected_output=EXPECTED_TASK_OUTPUT,
66
+ agent=obligation_information_lawyer,
67
+ output_pydantic=AgentOutput,
68
+ )
69
+ tasks.append(
70
+ create_accumulating_task(identify_obligations_of_receiving_party, "obligations")
71
+ )
72
+
73
+ identify_terms_and_termination = Task(
74
+ description=f"""Take the current terms and termination clause, which is inside this: `{input_document}`, and compare it with similar clauses in our RAG database to check how good it is.
75
+ Your task is to identify the terms and termination in our NDA, and see if the current NDA clause abides by all the best practices of similar clauses.""",
76
+ expected_output=EXPECTED_TASK_OUTPUT,
77
+ agent=terms_and_termination_lawyer,
78
+ output_pydantic=AgentOutput,
79
+ )
80
+ tasks.append(
81
+ create_accumulating_task(
82
+ identify_terms_and_termination, "terms_and_termination"
83
+ )
84
+ )
85
+
86
+ identify_remedies = Task(
87
+ description=f"""Take the current remedies clause, which is inside this: `{input_document}`, and compare it with similar clauses in our RAG database to check how good it is.
88
+ Your task is to identify the remedies in our NDA, and see if the current NDA clause abides by all the best practices of similar clauses.""",
89
+ expected_output=EXPECTED_TASK_OUTPUT,
90
+ agent=remedies_lawyer,
91
+ output_pydantic=AgentOutput,
92
+ )
93
+ tasks.append(create_accumulating_task(identify_remedies, "remedies"))
94
+
95
+ identify_additional_information = Task(
96
+ description=f"""Take the current additional important information clause, which is inside this: `{input_document}`, and compare it with similar clauses in our RAG database to check how good it is.
97
+ Your task is to identify the additional important information in our NDA, and see if the current NDA clause abides by all the best practices of similar clauses.""",
98
+ expected_output=EXPECTED_TASK_OUTPUT,
99
+ agent=additional_information_lawyer,
100
+ output_pydantic=AgentOutput,
101
+ )
102
+ tasks.append(
103
+ create_accumulating_task(identify_additional_information, "additional_info")
104
+ )
105
+
106
+ return tasks
crew.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai import Crew, Process
2
+ from dotenv import load_dotenv
3
+
4
+ from lawgenie.clause_agents import (
5
+ additional_information_lawyer,
6
+ corporate_lawyer_agent,
7
+ obligation_information_lawyer,
8
+ remedies_lawyer,
9
+ terms_and_termination_lawyer,
10
+ )
11
+ from lawgenie.clause_tasks import get_tasks
12
+
13
+ load_dotenv()
14
+
15
+
16
+ def get_crew(input_doc):
17
+ crew = Crew(
18
+ agents=[
19
+ corporate_lawyer_agent,
20
+ obligation_information_lawyer,
21
+ terms_and_termination_lawyer,
22
+ remedies_lawyer,
23
+ additional_information_lawyer,
24
+ ],
25
+ tasks=get_tasks(input_doc),
26
+ process=Process.sequential,
27
+ verbose=True,
28
+ )
29
+
30
+ return crew
31
+
32
+
33
+ def get_agent_output(document_from_frontend):
34
+ crew = get_crew(document_from_frontend)
35
+ result = crew.kickoff()
36
+
37
+ if isinstance(result, dict) and "accumulated_results" in result:
38
+ return result["accumulated_results"]
39
+ else:
40
+ # Fallback in case the modification didn't work as expected
41
+ return {"final_recommendation": result}
env.example ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ OPENAI_API_KEY='your-api-key'
2
+ OPENAI_API_BASE='https://api.aimlapi.com/v1'
3
+ OPENAI_MODEL_NAME='meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo'
gitignore.txt ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ .vscode/
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+ cover/
54
+
55
+ # Translations
56
+ *.mo
57
+ *.pot
58
+
59
+ # Django stuff:
60
+ *.log
61
+ local_settings.py
62
+ db.sqlite3
63
+ db.sqlite3-journal
64
+
65
+ # Flask stuff:
66
+ instance/
67
+ .webassets-cache
68
+
69
+ # Scrapy stuff:
70
+ .scrapy
71
+
72
+ # Sphinx documentation
73
+ docs/_build/
74
+
75
+ # PyBuilder
76
+ .pybuilder/
77
+ target/
78
+
79
+ # Jupyter Notebook
80
+ .ipynb_checkpoints
81
+
82
+ # IPython
83
+ profile_default/
84
+ ipython_config.py
85
+
86
+ # pyenv
87
+ # For a library or package, you might want to ignore these files since the code is
88
+ # intended to run in multiple environments; otherwise, check them in:
89
+ # .python-version
90
+
91
+ # pipenv
92
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
94
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
95
+ # install all needed dependencies.
96
+ #Pipfile.lock
97
+
98
+ # poetry
99
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
101
+ # commonly ignored for libraries.
102
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103
+ #poetry.lock
104
+
105
+ # pdm
106
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107
+ #pdm.lock
108
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109
+ # in version control.
110
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
111
+ .pdm.toml
112
+ .pdm-python
113
+ .pdm-build/
114
+
115
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116
+ __pypackages__/
117
+
118
+ # Celery stuff
119
+ celerybeat-schedule
120
+ celerybeat.pid
121
+
122
+ # SageMath parsed files
123
+ *.sage.py
124
+
125
+ # Environments
126
+ .env
127
+ .venv
128
+ env/
129
+ venv/
130
+ ENV/
131
+ env.bak/
132
+ venv.bak/
133
+
134
+ # Spyder project settings
135
+ .spyderproject
136
+ .spyproject
137
+
138
+ # Rope project settings
139
+ .ropeproject
140
+
141
+ # mkdocs documentation
142
+ /site
143
+
144
+ # mypy
145
+ .mypy_cache/
146
+ .dmypy.json
147
+ dmypy.json
148
+
149
+ # Pyre type checker
150
+ .pyre/
151
+
152
+ # pytype static type analyzer
153
+ .pytype/
154
+
155
+ # Cython debug symbols
156
+ cython_debug/
157
+
158
+ # PyCharm
159
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
162
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
163
+ #.idea/
164
+
165
+ *.lock
166
+ *.doc
167
+ *.docx
168
+ *.pdf
169
+
170
+ db
models.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class AgentOutput(BaseModel):
7
+ """Output of each clause agent"""
8
+
9
+ analysis: str = Field(description="An analysis of the section in laymen terms")
10
+ recommendation: str = Field(
11
+ description="How the current clause deviates from the benchmark documents"
12
+ )
13
+
14
+
15
+ class FinalOutput(BaseModel):
16
+ data: Dict[str, AgentOutput]
pre-commit-config.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v4.6.0
4
+ hooks:
5
+ - id: check-yaml
6
+ - id: end-of-file-fixer
7
+ - id: trailing-whitespace
8
+ - id: check-added-large-files
9
+ args: ['--maxkb=100']
10
+ - id: check-json
11
+ - id: check-merge-conflict
12
+ - id: check-toml
13
+ - id: detect-private-key
14
+ - id: forbid-submodules
15
+ - id: name-tests-test
16
+ - id: no-commit-to-branch
17
+ - id: pretty-format-json
18
+ - repo: https://github.com/psf/black
19
+ rev: 24.8.0
20
+ hooks:
21
+ - id: black
22
+ - repo: https://github.com/astral-sh/ruff-pre-commit
23
+ # Ruff version.
24
+ rev: v0.6.4
25
+ hooks:
26
+ # Run the linter.
27
+ - id: ruff
28
+ types_or: [ python, pyi ]
29
+ args: [ --fix ]
30
+ # Run the formatter.
31
+ - id: ruff-format
32
+ types_or: [ python, pyi ]
33
+ - repo: https://github.com/standard/standard
34
+ rev: v17.1.1
35
+ hooks:
36
+ - id: standard
37
+ - repo: https://github.com/pycqa/isort
38
+ rev: 5.13.2
39
+ hooks:
40
+ - id: isort
41
+ - repo: https://github.com/thlorenz/doctoc
42
+ rev: v2.2.0
43
+ hooks:
44
+ - id: doctoc
pyproject.toml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "lawgenie"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "requests>=2.32.3",
9
+ "flask>=3.0.3",
10
+ "werkzeug>=3.0.4",
11
+ "pdfminer>=20191125",
12
+ "openai>=1.45.0",
13
+ "docx>=0.2.4",
14
+ "python-dotenv>=1.0.1",
15
+ "streamlit>=1.38.0",
16
+ "langgraph>=0.2.21",
17
+ "pre-commit>=3.8.0",
18
+ "crewai>=0.55.2",
19
+ "composio-crewai>=0.5.23",
20
+ "e2b>=0.17.2a37",
21
+ "pydantic>=2.9.1",
22
+ "langchain-openai>=0.1.25",
23
+ "together>=1.2.12",
24
+ "watchdog>=4.0.2",
25
+ ]
26
+
27
+ [build-system]
28
+ requires = ["hatchling"]
29
+ build-backend = "hatchling.build"
30
+
31
+ [tool.black]
32
+ line-length = 88
33
+
34
+ [tool.isort]
35
+ profile = "black"
streamlit_app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import streamlit as st
3
+ from together import Together
4
+ import os
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+ # Initialize Together AI client
10
+ TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
11
+ client = Together(api_key=TOGETHER_API_KEY)
12
+
13
+ def call_llama_for_response(clauses_data):
14
+ prompt = "As an AI assistant specializing in contract analysis, draft a professional and courteous response to a contract drafter based on the following clause analyses and decisions:\n\n"
15
+
16
+ for clause in clauses_data:
17
+ prompt += f"Clause: {clause['agent']}\n"
18
+ prompt += f"Analysis: {clause['analysis']}\n"
19
+ prompt += f"Recommendation: {clause['recommendation']}\n"
20
+ prompt += f"Decision: {clause['action']}\n"
21
+ if clause['action'] == 'Negotiate':
22
+ prompt += f"Negotiation points: {clause['negotiation_points']}\n"
23
+ prompt += "\n"
24
+
25
+ prompt += "Draft a response that addresses each clause, explaining our position on acceptance, rejection, or negotiation. The tone should be professional, courteous, and constructive."
26
+
27
+ response = client.chat.completions.create(
28
+ model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
29
+ messages=[{"role": "user", "content": prompt}],
30
+ max_tokens=2048,
31
+ temperature=0.3,
32
+ top_p=0.8,
33
+ top_k=50,
34
+ repetition_penalty=1,
35
+ stop=["<|eot_id|>", "<|eom_id|>"],
36
+ stream=False
37
+ )
38
+ return response.choices[0].message.content
39
+
40
+ st.title("Contract Negotiation Assistant")
41
+
42
+ # Use session state to store the uploaded file and analysis results
43
+ if 'uploaded_file' not in st.session_state:
44
+ st.session_state.uploaded_file = None
45
+ if 'analysis_results' not in st.session_state:
46
+ st.session_state.analysis_results = None
47
+
48
+ # File uploader
49
+ uploaded_file = st.file_uploader("Upload Contract", type=["pdf", "docx"])
50
+
51
+ # If a new file is uploaded, update the session state and clear previous results
52
+ if uploaded_file is not None and uploaded_file != st.session_state.uploaded_file:
53
+ st.session_state.uploaded_file = uploaded_file
54
+ st.session_state.analysis_results = None
55
+
56
+ # If we have an uploaded file, process it
57
+ if st.session_state.uploaded_file is not None:
58
+ # Only call the API if we don't have analysis results yet
59
+ if st.session_state.analysis_results is None:
60
+ files = {"file": st.session_state.uploaded_file}
61
+ response = requests.post("http://localhost:5002/upload", files=files)
62
+ if response.status_code == 200:
63
+ st.write("Contract uploaded successfully. Analyzing...")
64
+ st.session_state.analysis_results = response.json()
65
+ else:
66
+ st.error("Failed to analyze the contract. Please try again.")
67
+
68
+ # If we have analysis results, display them and allow user interaction
69
+ if st.session_state.analysis_results is not None:
70
+ data = st.session_state.analysis_results
71
+ segmented_contract = data.get("segmented_contract", {})
72
+ crew_analysis = data.get("crew_analysis", {})
73
+
74
+ # Extract the tasks_output from the nested structure
75
+ tasks_output = crew_analysis.get("final_recommendation", {}).get("tasks_output", [])
76
+
77
+ clauses_data = []
78
+ for task in tasks_output:
79
+ agent = task.get("agent", "")
80
+ if task.get("pydantic"):
81
+ clause_analysis = task["pydantic"].get("analysis", "")
82
+ recommendation = task["pydantic"].get("recommendation", "")
83
+
84
+ st.subheader(f"Clause: {agent}")
85
+ st.write("Analysis:")
86
+ st.write(clause_analysis)
87
+ st.write("Recommendation:")
88
+ st.write(recommendation)
89
+
90
+ action = st.selectbox(
91
+ f"Action for {agent}",
92
+ ["Accept", "Negotiate", "Reject"],
93
+ key=f"action_{agent}"
94
+ )
95
+ negotiation_points = ""
96
+ if action == "Negotiate":
97
+ negotiation_points = st.text_area("Enter your negotiation points:", key=f"negotiate_{agent}")
98
+
99
+ clauses_data.append({
100
+ "agent": agent,
101
+ "analysis": clause_analysis,
102
+ "recommendation": recommendation,
103
+ "action": action,
104
+ "negotiation_points": negotiation_points
105
+ })
106
+
107
+ st.markdown("---") # Add a separator between clauses
108
+
109
+ # Finalize Contract button
110
+ if st.button("Finalize Contract"):
111
+ with st.spinner("Generating response..."):
112
+ response_to_drafter = call_llama_for_response(clauses_data)
113
+ st.subheader("Response to Contract Drafter:")
114
+ st.text_area("", response_to_drafter, height=400)
115
+ st.success("Contract negotiation completed. Response generated for review.")
116
+
117
+ else:
118
+ st.write("Please upload a contract to begin the analysis.")
tools.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from composio_crewai import Action, App, ComposioToolSet
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+
6
+ tool_set = ComposioToolSet()
7
+ rag_tools = tool_set.get_tools(
8
+ apps=[App.RAGTOOL],
9
+ actions=[
10
+ Action.FILETOOL_LIST_FILES,
11
+ Action.FILETOOL_CHANGE_WORKING_DIRECTORY,
12
+ Action.FILETOOL_FIND_FILE,
13
+ ],
14
+ )
15
+
16
+ rag_query_tools = tool_set.get_tools(
17
+ apps=[App.RAGTOOL],
18
+ )