omm7 commited on
Commit
9a4e8af
·
verified ·
1 Parent(s): ea4d61f

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +30 -24
app.py CHANGED
@@ -8,13 +8,17 @@ from pathlib import Path
8
  from sentence_transformers import SentenceTransformer
9
  from huggingface_hub import CommitScheduler
10
  from openai import OpenAI
 
11
 
12
  # Load embedding model
13
  embed_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
14
 
15
- # Load ChromaDB client and collection
16
  chroma_client = chromadb.PersistentClient(path="./clause_index")
17
- collection = chroma_client.get_collection("legal_clauses")
 
 
 
18
 
19
  # Setup OpenAI client
20
  client = OpenAI(
@@ -23,8 +27,8 @@ client = OpenAI(
23
  )
24
 
25
  # Prompt template
26
- system_message = """You are a legal AI assistant. You answer legal questions using contract clauses retrieved from the database.
27
- If no specific context is found, use your legal knowledge to infer the most likely answer based on standard contract practices."""
28
  user_template = """
29
  ### Context:
30
  {context}
@@ -48,25 +52,27 @@ scheduler = CommitScheduler(
48
  def predict(question):
49
  try:
50
  query_embedding = embed_model.encode([question], normalize_embeddings=True)[0]
51
-
52
- # Retrieve documents from vector DB
53
- results = collection.query(
54
- query_embeddings=[query_embedding.tolist()],
55
- n_results=3
56
- )
57
-
58
- documents = results["documents"][0]
59
- metadatas = results["metadatas"][0]
60
-
61
- # If nothing is found, simulate default context
62
- if not documents:
63
- context = "No directly relevant clauses were retrieved from the contract database."
64
- else:
65
- context_parts = [
66
- f"[Clause Type: {m['clause_type']}] {doc}"
67
- for doc, m in zip(documents, metadatas)
68
- ]
69
- context = "\n\n".join(context_parts)
 
 
70
 
71
  prompt = [
72
  {"role": "system", "content": system_message},
@@ -106,7 +112,7 @@ demo = gr.Interface(
106
  inputs=gr.Textbox(label="Enter your legal question:", lines=4),
107
  outputs=gr.Textbox(label="Answer"),
108
  title="⚖️ GL_LegalMind",
109
- description="Ask legal contract-related questions. Answers are grounded in ChromaDB or inferred using legal AI knowledge."
110
  )
111
 
112
  demo.queue()
 
8
  from sentence_transformers import SentenceTransformer
9
  from huggingface_hub import CommitScheduler
10
  from openai import OpenAI
11
+ from chromadb.errors import NotFoundError
12
 
13
  # Load embedding model
14
  embed_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
15
 
16
+ # Load ChromaDB client and try to get the collection
17
  chroma_client = chromadb.PersistentClient(path="./clause_index")
18
+ try:
19
+ collection = chroma_client.get_collection("legal_clauses")
20
+ except NotFoundError:
21
+ collection = None
22
 
23
  # Setup OpenAI client
24
  client = OpenAI(
 
27
  )
28
 
29
  # Prompt template
30
+ system_message = """You are a legal AI assistant trained on contract clause examples from the CUAD dataset.
31
+ If no clauses are retrieved from the database, infer the answer using your understanding of common contractual standards."""
32
  user_template = """
33
  ### Context:
34
  {context}
 
52
  def predict(question):
53
  try:
54
  query_embedding = embed_model.encode([question], normalize_embeddings=True)[0]
55
+ context = "No relevant clauses were found in the database. Please answer using your legal understanding from the CUAD dataset."
56
+
57
+ if collection:
58
+ try:
59
+ results = collection.query(
60
+ query_embeddings=[query_embedding.tolist()],
61
+ n_results=3
62
+ )
63
+ documents = results["documents"][0]
64
+ metadatas = results["metadatas"][0]
65
+
66
+ if documents:
67
+ context_parts = [
68
+ f"[Clause Type: {m['clause_type']}] {doc}"
69
+ for doc, m in zip(documents, metadatas)
70
+ ]
71
+ context = "\n\n".join(context_parts)
72
+
73
+ except Exception as e:
74
+ # Log internal error, but let LLM proceed with generic context
75
+ context = "Due to an internal retrieval issue, please answer based on your legal knowledge from CUAD dataset."
76
 
77
  prompt = [
78
  {"role": "system", "content": system_message},
 
112
  inputs=gr.Textbox(label="Enter your legal question:", lines=4),
113
  outputs=gr.Textbox(label="Answer"),
114
  title="⚖️ GL_LegalMind",
115
+ description="Ask legal contract-related questions. Answers are based on ChromaDB if available or inferred using CUAD-based legal knowledge."
116
  )
117
 
118
  demo.queue()