Spaces:

Kan05
/

Clause-AI

Sleeping

File size: 5,592 Bytes

87553a7

import os
import operator
import json
from typing import Annotated, List, TypedDict, Union
from dotenv import load_dotenv
from supabase import create_client
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, AIMessage
from langchain_huggingface import HuggingFaceEmbeddings

load_dotenv()

GROQ_API_KEY = os.getenv("GROQ_API_KEY")
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")
MODEL_NAME = os.getenv("MODEL_NAME")

supabase = create_client(SUPABASE_URL, SUPABASE_KEY)

llm = ChatGroq(
    temperature=0.1,
    model_name=MODEL_NAME,
    api_key=GROQ_API_KEY
)

embeddings = HuggingFaceEmbeddings(
    model_name="jinaai/jina-embeddings-v2-base-en",
    model_kwargs={"device": "cpu", "trust_remote_code": True},
    encode_kwargs={"normalize_embeddings": True}
)


class AgentState(TypedDict, total=False):
    query: str
    messages: Annotated[List[Union[HumanMessage, AIMessage]], operator.add]
    context: str
    reference_clause: str
    final_draft: str
    phase: str
    missing_info: List[str]
    clarification_question: str
    intent: str


def guardrail_node(state: AgentState):
    prompt = ChatPromptTemplate.from_messages([
    (
        "system",
        """

You are the gatekeeper for Clause.ai.



Classify the user input into exactly one category.



GREETING

OFF_TOPIC

LEGAL_REQUEST



Return ONLY valid JSON.



Format:

{{

  "classification": "GREETING | OFF_TOPIC | LEGAL_REQUEST",

  "response": "string"

}}



Rules:

GREETING gets a polite intro.

OFF_TOPIC gets a refusal.

LEGAL_REQUEST response must be empty.

"""
    ),
    ("human", "{query}")
])

    raw = (prompt | llm).invoke({"query": state["query"]}).content.strip()

    try:
        start = raw.index("{")
        end = raw.rindex("}") + 1
        data = json.loads(raw[start:end])
    except Exception:
        return {
            "intent": "chat",
            "phase": "chat",
            "final_draft": "",
            "context": "",
            "reference_clause": "",
            "clarification_question": "Hello. I am Clause.ai. How can I help with legal drafting today?"
        }

    classification = data.get("classification")

    if classification == "LEGAL_REQUEST":
        return {
            "intent": "legal",
            "phase": "legal"
        }

    return {
        "intent": "chat",
        "phase": "chat",
        "final_draft": "",
        "context": "",
        "reference_clause": "",
        "clarification_question": data.get("response", "")
    }


def triage_node(state: AgentState):
    prompt = ChatPromptTemplate.from_messages([
        (
            "system",
            """

You are a Legal Intake AI.



If the user provided any concrete parameters, output READY.



If vague, output 3 to 5 critical missing variables as a comma separated list.

"""
        ),
        ("human", "{query}")
    ])

    result = (prompt | llm).invoke({"query": state["query"]}).content.strip()

    if "READY" in result:
        return {
            "phase": "drafting",
            "missing_info": []
        }

    missing_items = [
        item.strip().replace("-", "").replace("*", "")
        for item in result.split(",")
        if item.strip()
    ][:5]

    return {
        "phase": "planning",
        "missing_info": missing_items,
        "clarification_question": "I can draft that. Please confirm or skip to use defaults."
    }


def retrieve_node(state: AgentState):
    query_vector = embeddings.embed_query(state["query"])

    response = supabase.rpc(
        "match_parent_documents",
        {
            "query_embedding": query_vector,
            "match_threshold": 0.5,
            "match_count": 1
        }
    ).execute()

    if response.data:
        content = response.data[0]["content"]
        return {
            "context": content,
            "reference_clause": content
        }

    return {
        "context": "Standard commercial terms apply.",
        "reference_clause": "None found."
    }

def draft_node(state: AgentState):
    """

    Writes the final clause. 

    Crucial: Takes the User Query + Context and enforces strict formatting.

    """
    print("✍️ Drafting Clause...")
    
    prompt = ChatPromptTemplate.from_messages([
        ("system", """

        You are a Senior Legal Drafter. 

        Draft a high-quality legal clause based on the User Request and the Reference Context.

        

        STRICT FORMATTING RULES (CRITICAL):

        1. **HEADERS:** Use **Bold Uppercase** for all Section Headings (e.g., **1. DEFINITIONS**).

        2. **SPACING:** Add a blank line between every paragraph.

        3. **LISTS:** Use proper Markdown lists for subsections:

           (a) First item...

           (b) Second item...

        4. **NO CODE BLOCKS:** Do NOT wrap the output in ```markdown or ```. Return raw text only.

        5. **NO SEPARATORS:** Do NOT use horizontal rules (---) or long lines of dashes (________________). They break the PDF renderer.

        6. **DEFAULTS:** If a detail is missing in the request, use a reasonable market standard default.

        

        [REFERENCE CONTEXT]:

        {context}

        """),
        ("human", "{query}")
    ])
    
    result = (prompt | llm).invoke({"context": state['context'], "query": state['query']})
    return {"final_draft": result.content}