Spaces:

deepthi6
/

clausewise_full_project_v2

Sleeping

File size: 7,400 Bytes

import streamlit as st
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
from multilingual import UI_TEXT, translate_text
from util import extract_text, split_into_clauses, simplify_clause, chat_with_model

# ---------------------------------------------------
# ✅ PAGE CONFIG
# ---------------------------------------------------
st.set_page_config(
    page_title="ClauseWise – NDA Assistant",
    layout="wide"
)

st.markdown(
    "<h2 style='text-align:center;'>ClauseWise – Multilingual NDA Legal Assistant</h2>",
    unsafe_allow_html=True
)

# ---------------------------------------------------
# ✅ LANGUAGE HANDLING
# ---------------------------------------------------
LANGUAGES = {
    "English": "en",
    "हिन्दी (Hindi)": "hi",
    "தமிழ் (Tamil)": "ta",
    "తెలుగు (Telugu)": "te",
    "ಕನ್ನಡ (Kannada)": "kn"
}

if "lang" not in st.session_state:
    st.session_state.lang = "en"

selected_label = st.selectbox("🌐 Language", list(LANGUAGES.keys()))
st.session_state.lang = LANGUAGES[selected_label]
T = {k: v[st.session_state.lang] for k, v in UI_TEXT.items()}

# ---------------------------------------------------
# ✅ LOAD CHAT MODEL (DistilGPT2 – HF SAFE)
# ---------------------------------------------------
@st.cache_resource
def load_chat_model():
    model_name = "distilgpt2"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)

    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.eos_token_id

    return model, tokenizer


model, tokenizer = load_chat_model()

# Chat history
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []


# ---------------------------------------------------
# ✅ FILE UPLOAD
# ---------------------------------------------------
st.subheader(T["upload_title"])
uploaded = st.file_uploader(T["upload_instruction"], type=["pdf", "txt", "docx"])

if uploaded:
    st.info("⏳ Reading file...")
    text = extract_text(uploaded)

    # ---------------------------------------------------
    # ✅ STRICT NDA DETECTION
    # ---------------------------------------------------
    NDA_KEYWORDS = [
        "non-disclosure", "non disclosure", "nda",
        "confidential information", "disclosing party",
        "receiving party", "confidentiality",
        "confidential materials", "protected information"
    ]

    if len(text) < 50 or not any(k.lower() in text.lower() for k in NDA_KEYWORDS):
        st.error(T["error_not_nda"])
        st.stop()

    st.success(T["success_nda"])

    # ---------------------------------------------------
    # ✅ ANALYSIS TABS
    # ---------------------------------------------------
    st.subheader(T["analysis_title"])
    tabs = st.tabs([
        T["tab_clauses"],
        T["tab_risks"],
        T["tab_fairness"],
        T["tab_entities"],
        T["tab_alternatives"],
        T["tab_chat"],
    ])

    # ===================================================
    # ✅ TAB 1 — CLAUSE SIMPLIFICATION
    # ===================================================
    with tabs[0]:
        st.markdown(f"### {T['clause_simplify']}")

        mode = st.radio(
            T["choose_mode"],
            [("eli5", T["eli5"]), ("simple", T["simple"]), ("pro", T["pro"])],
            format_func=lambda x: x[1]
        )[0]

        clauses = split_into_clauses(text)

        for i, c in enumerate(clauses):
            with st.expander(f"Clause {i+1}"):
                st.write("**Original:**")
                st.write(c)

                st.write("**Explanation:**")
                st.write(simplify_clause(c, mode))

    # ===================================================
    # ✅ TAB 2 — RISK ANALYSIS
    # ===================================================
    with tabs[1]:
        st.markdown(f"### {T['risk_title']}")

        # Simple risk detector
        RISK_PATTERNS = {
            "Broad confidentiality definition": ["broad", "all information", "any information"],
            "Unlimited liability": ["unlimited", "full liability", "all damages"],
            "One-sided obligations": ["shall not", "only the receiving party"],
            "Long duration (>5 years)": ["5 years", "7 years", "perpetual"],
            "No termination rights": ["cannot terminate", "no termination"]
        }

        risks_found = []

        for clause in clauses:
            lower_c = clause.lower()
            for risk_label, kws in RISK_PATTERNS.items():
                if any(k in lower_c for k in kws):
                    risks_found.append(risk_label)

        risks_found = list(dict.fromkeys(risks_found))[:5]  # top 5

        if not risks_found:
            st.success("✅ No major risks detected.")
        else:
            for r in risks_found:
                st.error("⚠️ " + r)

    # ===================================================
    # ✅ TAB 3 — FAIRNESS METER
    # ===================================================
    with tabs[2]:
        st.markdown(f"### {T['fairness_title']}")

        fairness_score = max(20, min(90, 50 - len(risks_found) * 7))

        st.write(f"**{T['your_position']}:** {fairness_score}%")
        st.write(f"**{T['company_position']}:** {100 - fairness_score}%")

        st.progress(fairness_score / 100)

    # ===================================================
    # ✅ TAB 4 — ENTITIES
    # ===================================================
    with tabs[3]:
        st.markdown(f"### {T['entities_title']}")

        parties = []
        dates = []
        money = []

        import re

        for clause in clauses:
            if "party" in clause.lower():
                parties.append(clause[:80] + "...")

            money.extend(re.findall(r"\$[\d,]+", clause))
            dates.extend(re.findall(r"\b(?:\d{1,2}\/\d{1,2}\/\d{2,4}|20\d{2})\b", clause))

        st.write("**Parties:**", list(set(parties)))
        st.write("**Dates:**", list(set(dates)))
        st.write("**Amounts:**", list(set(money)))

    # ===================================================
    # ✅ TAB 5 — ALTERNATIVE CLAUSES
    # ===================================================
    with tabs[4]:
        st.markdown(f"### {T['alt_title']}")

        ALTS = [
            "A mutual confidentiality clause where both parties share equal protection.",
            "A time-limited confidentiality period of 2–3 years.",
            "Liability capped at a fixed reasonable amount."
        ]

        for alt in ALTS:
            st.info(alt)

    # ===================================================
    # ✅ TAB 6 — LEGAL CHAT ASSISTANT
    # ===================================================
    with tabs[5]:
        st.markdown(f"### {T['chat_title']}")

        user_input = st.text_input(T["chat_placeholder"])

        if user_input:
            reply = chat_with_model(model, tokenizer, user_input, st.session_state.chat_history)

            st.session_state.chat_history.append(("User", user_input))
            st.session_state.chat_history.append(("AI", reply))

        for role, msg in st.session_state.chat_history[-10:]:
            if role == "User":
                st.markdown(f"🧑 **You:** {msg}")
            else:
                st.markdown(f"🤖 **ClauseWise:** {msg}")