Spaces:

omm7
/

test_law

Sleeping

File size: 3,718 Bytes

e810b6d
 
 
 
3705a5d
 
 
 
e810b6d
9a4e8af
8ea81be
e810b6d
ea4d61f
3705a5d
 
8ea81be
3705a5d
9a4e8af
 
 
 
2d14e21
8ea81be
e810b6d
 
9d00612
e810b6d
 
ea4d61f
8ea81be
 
e810b6d
ea4d61f
e810b6d
 
ea4d61f
e810b6d
 
 
8ea81be
3705a5d
 
 
 
 
 
 
 
 
 
8ea81be
e810b6d
3705a5d
8ea81be
3705a5d
8ea81be
 
9a4e8af
 
8ea81be
9a4e8af
 
 
 
 
 
 
 
 
 
8ea81be
9a4e8af
 
8ea81be
 
9a4e8af
ea4d61f
8ea81be
3705a5d
 
 
 
 
8ea81be
e810b6d
 
 
ea4d61f
e810b6d
ea4d61f
e810b6d
ea4d61f
e810b6d
 
8ea81be
3705a5d
e810b6d
ea4d61f
e810b6d
8ea81be
e810b6d
 
 
 
 
 
 
8b6e435
e810b6d
 
8ea81be
e810b6d
 
 
 
 
8ea81be
e810b6d

import os
import json
import uuid
import gradio as gr
import chromadb
import numpy as np
from pathlib import Path
from sentence_transformers import SentenceTransformer
from huggingface_hub import CommitScheduler
from chromadb.errors import NotFoundError
from openai import OpenAI

# Load embedding model
embed_model = SentenceTransformer("BAAI/bge-small-en-v1.5")

# Load ChromaDB client
chroma_client = chromadb.PersistentClient(path="./clause_index")
try:
    collection = chroma_client.get_collection("legal_clauses")
except NotFoundError:
    collection = None

# Setup OpenAI/Hugging Face client
client = OpenAI(
    base_url="https://router.huggingface.co/featherless-ai/v1",
    api_key=os.getenv("HF_TOKEN"),
)

# Prompt template
system_message = """You are a legal AI assistant trained on contract clause examples from the CUAD dataset.
If no clauses are retrieved from the database, infer the answer using your understanding of common contractual standards. and report that no clause retrieved"""
user_template = """
### Context:
{context}

### Question:
{question}
"""

# Setup logging
log_file = Path("logs/") / f"query_{uuid.uuid4()}.json"
log_file.parent.mkdir(exist_ok=True)
scheduler = CommitScheduler(
    repo_id="legal-rag-output",
    repo_type="dataset",
    folder_path=log_file.parent,
    path_in_repo="logs",
    every=2
)

# Main QA function
def predict(question):
    try:
        # Encode query
        query_embedding = embed_model.encode([question], normalize_embeddings=True)[0]

        # Default fallback context
        context = "No relevant clauses were found in the database. Please answer using your legal understanding from the CUAD dataset."

        # If collection exists, try retrieval
        if collection:
            try:
                results = collection.query(
                    query_embeddings=[query_embedding.tolist()],
                    n_results=3
                )
                documents = results["documents"][0]
                metadatas = results["metadatas"][0]

                if documents:
                    context = "\n\n".join(
                        f"[Clause Type: {m['clause_type']}] {doc}"
                        for doc, m in zip(documents, metadatas)
                    )
            except Exception:
                context = "Due to an internal retrieval issue, please answer based on your legal knowledge from CUAD dataset."

        # Construct prompt
        prompt = [
            {"role": "system", "content": system_message},
            {"role": "user", "content": user_template.format(context=context, question=question)}
        ]

        # Generate response
        stream = client.chat.completions.create(
            model="mistralai/Mistral-7B-Instruct-v0.2",
            messages=prompt,
            temperature=0.4,
            top_p=0.7,
            stream=True
        )

        output = ""
        for chunk in stream:
            output += chunk.choices[0].delta.content or ""

    except Exception as e:
        output = f"An internal error occurred while generating the response: {str(e)}"

    # Log to file
    with scheduler.lock:
        with log_file.open("a") as f:
            f.write(json.dumps({
                "question": question,
                "context": context,
                "response": output
            }) + "\n")

    return output

# Gradio UI
demo = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(label="Enter your legal question:", lines=4),
    outputs=gr.Textbox(label="Answer"),
    title="⚖️ GL_LegalMind",
    description="Ask contract-related legal questions. Answers are based on retrieved clauses or inferred from CUAD knowledge."
)

demo.queue()
demo.launch()