Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from multilingual import UI_TEXT, translate_text | |
| from util import extract_text, split_into_clauses, simplify_clause, chat_with_model | |
| # --------------------------------------------------- | |
| # ✅ PAGE CONFIG | |
| # --------------------------------------------------- | |
| st.set_page_config( | |
| page_title="ClauseWise – NDA Assistant", | |
| layout="wide" | |
| ) | |
| st.markdown( | |
| "<h2 style='text-align:center;'>ClauseWise – Multilingual NDA Legal Assistant</h2>", | |
| unsafe_allow_html=True | |
| ) | |
| # --------------------------------------------------- | |
| # ✅ LANGUAGE HANDLING | |
| # --------------------------------------------------- | |
| LANGUAGES = { | |
| "English": "en", | |
| "हिन्दी (Hindi)": "hi", | |
| "தமிழ் (Tamil)": "ta", | |
| "తెలుగు (Telugu)": "te", | |
| "ಕನ್ನಡ (Kannada)": "kn" | |
| } | |
| if "lang" not in st.session_state: | |
| st.session_state.lang = "en" | |
| selected_label = st.selectbox("🌐 Language", list(LANGUAGES.keys())) | |
| st.session_state.lang = LANGUAGES[selected_label] | |
| T = {k: v[st.session_state.lang] for k, v in UI_TEXT.items()} | |
| # --------------------------------------------------- | |
| # ✅ LOAD CHAT MODEL (DistilGPT2 – HF SAFE) | |
| # --------------------------------------------------- | |
| def load_chat_model(): | |
| model_name = "distilgpt2" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| model.config.pad_token_id = tokenizer.eos_token_id | |
| return model, tokenizer | |
| model, tokenizer = load_chat_model() | |
| # Chat history | |
| if "chat_history" not in st.session_state: | |
| st.session_state.chat_history = [] | |
| # --------------------------------------------------- | |
| # ✅ FILE UPLOAD | |
| # --------------------------------------------------- | |
| st.subheader(T["upload_title"]) | |
| uploaded = st.file_uploader(T["upload_instruction"], type=["pdf", "txt", "docx"]) | |
| if uploaded: | |
| st.info("⏳ Reading file...") | |
| text = extract_text(uploaded) | |
| # --------------------------------------------------- | |
| # ✅ STRICT NDA DETECTION | |
| # --------------------------------------------------- | |
| NDA_KEYWORDS = [ | |
| "non-disclosure", "non disclosure", "nda", | |
| "confidential information", "disclosing party", | |
| "receiving party", "confidentiality", | |
| "confidential materials", "protected information" | |
| ] | |
| if len(text) < 50 or not any(k.lower() in text.lower() for k in NDA_KEYWORDS): | |
| st.error(T["error_not_nda"]) | |
| st.stop() | |
| st.success(T["success_nda"]) | |
| # --------------------------------------------------- | |
| # ✅ ANALYSIS TABS | |
| # --------------------------------------------------- | |
| st.subheader(T["analysis_title"]) | |
| tabs = st.tabs([ | |
| T["tab_clauses"], | |
| T["tab_risks"], | |
| T["tab_fairness"], | |
| T["tab_entities"], | |
| T["tab_alternatives"], | |
| T["tab_chat"], | |
| ]) | |
| # =================================================== | |
| # ✅ TAB 1 — CLAUSE SIMPLIFICATION | |
| # =================================================== | |
| with tabs[0]: | |
| st.markdown(f"### {T['clause_simplify']}") | |
| mode = st.radio( | |
| T["choose_mode"], | |
| [("eli5", T["eli5"]), ("simple", T["simple"]), ("pro", T["pro"])], | |
| format_func=lambda x: x[1] | |
| )[0] | |
| clauses = split_into_clauses(text) | |
| for i, c in enumerate(clauses): | |
| with st.expander(f"Clause {i+1}"): | |
| st.write("**Original:**") | |
| st.write(c) | |
| st.write("**Explanation:**") | |
| st.write(simplify_clause(c, mode)) | |
| # =================================================== | |
| # ✅ TAB 2 — RISK ANALYSIS | |
| # =================================================== | |
| with tabs[1]: | |
| st.markdown(f"### {T['risk_title']}") | |
| # Simple risk detector | |
| RISK_PATTERNS = { | |
| "Broad confidentiality definition": ["broad", "all information", "any information"], | |
| "Unlimited liability": ["unlimited", "full liability", "all damages"], | |
| "One-sided obligations": ["shall not", "only the receiving party"], | |
| "Long duration (>5 years)": ["5 years", "7 years", "perpetual"], | |
| "No termination rights": ["cannot terminate", "no termination"] | |
| } | |
| risks_found = [] | |
| for clause in clauses: | |
| lower_c = clause.lower() | |
| for risk_label, kws in RISK_PATTERNS.items(): | |
| if any(k in lower_c for k in kws): | |
| risks_found.append(risk_label) | |
| risks_found = list(dict.fromkeys(risks_found))[:5] # top 5 | |
| if not risks_found: | |
| st.success("✅ No major risks detected.") | |
| else: | |
| for r in risks_found: | |
| st.error("⚠️ " + r) | |
| # =================================================== | |
| # ✅ TAB 3 — FAIRNESS METER | |
| # =================================================== | |
| with tabs[2]: | |
| st.markdown(f"### {T['fairness_title']}") | |
| fairness_score = max(20, min(90, 50 - len(risks_found) * 7)) | |
| st.write(f"**{T['your_position']}:** {fairness_score}%") | |
| st.write(f"**{T['company_position']}:** {100 - fairness_score}%") | |
| st.progress(fairness_score / 100) | |
| # =================================================== | |
| # ✅ TAB 4 — ENTITIES | |
| # =================================================== | |
| with tabs[3]: | |
| st.markdown(f"### {T['entities_title']}") | |
| parties = [] | |
| dates = [] | |
| money = [] | |
| import re | |
| for clause in clauses: | |
| if "party" in clause.lower(): | |
| parties.append(clause[:80] + "...") | |
| money.extend(re.findall(r"\$[\d,]+", clause)) | |
| dates.extend(re.findall(r"\b(?:\d{1,2}\/\d{1,2}\/\d{2,4}|20\d{2})\b", clause)) | |
| st.write("**Parties:**", list(set(parties))) | |
| st.write("**Dates:**", list(set(dates))) | |
| st.write("**Amounts:**", list(set(money))) | |
| # =================================================== | |
| # ✅ TAB 5 — ALTERNATIVE CLAUSES | |
| # =================================================== | |
| with tabs[4]: | |
| st.markdown(f"### {T['alt_title']}") | |
| ALTS = [ | |
| "A mutual confidentiality clause where both parties share equal protection.", | |
| "A time-limited confidentiality period of 2–3 years.", | |
| "Liability capped at a fixed reasonable amount." | |
| ] | |
| for alt in ALTS: | |
| st.info(alt) | |
| # =================================================== | |
| # ✅ TAB 6 — LEGAL CHAT ASSISTANT | |
| # =================================================== | |
| with tabs[5]: | |
| st.markdown(f"### {T['chat_title']}") | |
| user_input = st.text_input(T["chat_placeholder"]) | |
| if user_input: | |
| reply = chat_with_model(model, tokenizer, user_input, st.session_state.chat_history) | |
| st.session_state.chat_history.append(("User", user_input)) | |
| st.session_state.chat_history.append(("AI", reply)) | |
| for role, msg in st.session_state.chat_history[-10:]: | |
| if role == "User": | |
| st.markdown(f"🧑 **You:** {msg}") | |
| else: | |
| st.markdown(f"🤖 **ClauseWise:** {msg}") | |