Spaces:

omm7
/

test_law

Sleeping

App Files Files Community

omm7 commited on Jul 17, 2025

Commit

8ea81be

verified ·

1 Parent(s): b208056

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +20 -23

app.py CHANGED Viewed

@@ -7,34 +7,29 @@ import numpy as np
 from pathlib import Path
 from sentence_transformers import SentenceTransformer
 from huggingface_hub import CommitScheduler
-from openai import OpenAI
 from chromadb.errors import NotFoundError
 # Load embedding model
 embed_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
-# Load ChromaDB client and try to get the collection
 chroma_client = chromadb.PersistentClient(path="./clause_index")
 try:
     collection = chroma_client.get_collection("legal_clauses")
 except NotFoundError:
     collection = None
-# Setup OpenAI client
 client = OpenAI(
     base_url="https://router.huggingface.co/featherless-ai/v1",
     api_key=os.getenv("HF_TOKEN"),
 )
 # Prompt template
-system_message =  """You are a legal AI assistant trained on contract clause examples from the CUAD dataset.
-Your task is to answer questions using relevant clauses from contract documents.
-If no relevant clause is retrieved, infer the answer using your legal reasoning based on common contractual standards."""
 user_template = """
 ### Context:
 {context}
@@ -42,7 +37,7 @@ user_template = """
 {question}
 """
-# Setup query logging
 log_file = Path("logs/") / f"query_{uuid.uuid4()}.json"
 log_file.parent.mkdir(exist_ok=True)
 scheduler = CommitScheduler(
@@ -53,12 +48,16 @@ scheduler = CommitScheduler(
     every=2
 )
-# Main predict function
 def predict(question):
     try:
         query_embedding = embed_model.encode([question], normalize_embeddings=True)[0]
         context = "No relevant clauses were found in the database. Please answer using your legal understanding from the CUAD dataset."
         if collection:
             try:
                 results = collection.query(
@@ -69,21 +68,20 @@ def predict(question):
                 metadatas = results["metadatas"][0]
                 if documents:
-                    context_parts = [
                         f"[Clause Type: {m['clause_type']}] {doc}"
                         for doc, m in zip(documents, metadatas)
-                    ]
-                    context = "\n\n".join(context_parts)
-            except Exception as e:
-                # Log internal error, but let LLM proceed with generic context
                 context = "Due to an internal retrieval issue, please answer based on your legal knowledge from CUAD dataset."
         prompt = [
             {"role": "system", "content": system_message},
             {"role": "user", "content": user_template.format(context=context, question=question)}
         ]
         stream = client.chat.completions.create(
             model="mistralai/Mistral-7B-Instruct-v0.2",
             messages=prompt,
@@ -94,13 +92,12 @@ def predict(question):
         output = ""
         for chunk in stream:
-            delta = chunk.choices[0].delta.content or ""
-            output += delta
     except Exception as e:
         output = f"An internal error occurred while generating the response: {str(e)}"
-    # Log the interaction
     with scheduler.lock:
         with log_file.open("a") as f:
             f.write(json.dumps({
@@ -111,13 +108,13 @@ def predict(question):
     return output
-# Launch Gradio app
 demo = gr.Interface(
     fn=predict,
     inputs=gr.Textbox(label="Enter your legal question:", lines=4),
     outputs=gr.Textbox(label="Answer"),
     title="⚖️ GL_LegalMind",
-    description="Ask legal contract-related questions. Answers are based on ChromaDB if available or inferred using CUAD-based legal knowledge."
 )
 demo.queue()

 from pathlib import Path
 from sentence_transformers import SentenceTransformer
 from huggingface_hub import CommitScheduler
 from chromadb.errors import NotFoundError
+from openai import OpenAI
 # Load embedding model
 embed_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
+# Load ChromaDB client
 chroma_client = chromadb.PersistentClient(path="./clause_index")
 try:
     collection = chroma_client.get_collection("legal_clauses")
 except NotFoundError:
     collection = None
+# Setup OpenAI/Hugging Face client
 client = OpenAI(
     base_url="https://router.huggingface.co/featherless-ai/v1",
     api_key=os.getenv("HF_TOKEN"),
 )
 # Prompt template
+system_message = """You are a legal AI assistant trained on contract clause examples from the CUAD dataset.
+If no clauses are retrieved from the database, infer the answer using your understanding of common contractual standards. and report that no clause retrieved"""
 user_template = """
 ### Context:
 {context}
 {question}
 """
+# Setup logging
 log_file = Path("logs/") / f"query_{uuid.uuid4()}.json"
 log_file.parent.mkdir(exist_ok=True)
 scheduler = CommitScheduler(
     every=2
 )
+# Main QA function
 def predict(question):
     try:
+        # Encode query
         query_embedding = embed_model.encode([question], normalize_embeddings=True)[0]
+        # Default fallback context
         context = "No relevant clauses were found in the database. Please answer using your legal understanding from the CUAD dataset."
+        # If collection exists, try retrieval
         if collection:
             try:
                 results = collection.query(
                 metadatas = results["metadatas"][0]
                 if documents:
+                    context = "\n\n".join(
                         f"[Clause Type: {m['clause_type']}] {doc}"
                         for doc, m in zip(documents, metadatas)
+                    )
+            except Exception:
                 context = "Due to an internal retrieval issue, please answer based on your legal knowledge from CUAD dataset."
+        # Construct prompt
         prompt = [
             {"role": "system", "content": system_message},
             {"role": "user", "content": user_template.format(context=context, question=question)}
         ]
+        # Generate response
         stream = client.chat.completions.create(
             model="mistralai/Mistral-7B-Instruct-v0.2",
             messages=prompt,
         output = ""
         for chunk in stream:
+            output += chunk.choices[0].delta.content or ""
     except Exception as e:
         output = f"An internal error occurred while generating the response: {str(e)}"
+    # Log to file
     with scheduler.lock:
         with log_file.open("a") as f:
             f.write(json.dumps({
     return output
+# Gradio UI
 demo = gr.Interface(
     fn=predict,
     inputs=gr.Textbox(label="Enter your legal question:", lines=4),
     outputs=gr.Textbox(label="Answer"),
     title="⚖️ GL_LegalMind",
+    description="Ask contract-related legal questions. Answers are based on retrieved clauses or inferred from CUAD knowledge."
 )
 demo.queue()