deepthi6 commited on
Commit
7bfb077
Β·
verified Β·
1 Parent(s): 23ac016

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -85
app.py CHANGED
@@ -1,86 +1,172 @@
1
- import re
2
  from pypdf import PdfReader
3
- import docx
4
- import io
5
- from transformers import AutoTokenizer, AutoModelForCausalLM
6
-
7
-
8
- import torch
9
-
10
- # -------------------------------------------------------------
11
- # βœ… Extract text from files
12
- # -------------------------------------------------------------
13
- def extract_text(uploaded_file):
14
- name = uploaded_file.name.lower()
15
-
16
- if name.endswith(".txt"):
17
- return uploaded_file.read().decode("utf-8", errors="ignore")
18
-
19
- elif name.endswith(".pdf"):
20
- reader = PdfReader(uploaded_file)
21
- text = ""
22
- for page in reader.pages:
23
- text += page.extract_text() or ""
24
- return text
25
-
26
- elif name.endswith(".docx"):
27
- doc = docx.Document(uploaded_file)
28
- return "\n".join([p.text for p in doc.paragraphs])
29
-
30
- return ""
31
-
32
- # -------------------------------------------------------------
33
- # βœ… Clause splitting (very stable)
34
- # -------------------------------------------------------------
35
- def split_into_clauses(text):
36
- parts = re.split(r"\n\s*\d+\.\s+|\n\s*-\s+|\n{2,}", text)
37
- clauses = [p.strip() for p in parts if len(p.strip()) > 40]
38
- return clauses[:15] # keep app fast for HF cpu
39
-
40
- # -------------------------------------------------------------
41
- # βœ… Clause simplifier (dummy logic)
42
- # -------------------------------------------------------------
43
- def simplify_clause(clause, mode):
44
- clause = clause.strip()
45
-
46
- if mode == "eli5":
47
- return f"This clause basically means: {clause[:120]}..."
48
-
49
- if mode == "simple":
50
- return f"Simplified meaning: {clause[:150]}..."
51
-
52
- if mode == "pro":
53
- return f"Professional interpretation: {clause}"
54
-
55
- return clause
56
-
57
- # -------------------------------------------------------------
58
- # βœ… Chat with DistilGPT2 (working HF CPU-safe chat)
59
- # -------------------------------------------------------------
60
- def chat_with_model(model, tokenizer, prompt, history):
61
- full_prompt = ""
62
-
63
- # Build few-shot conversation context (last 6 messages)
64
- for role, text in history[-6:]:
65
- full_prompt += f"{role}: {text}\n"
66
-
67
- full_prompt += f"User: {prompt}\nAI:"
68
-
69
- inputs = tokenizer(full_prompt, return_tensors="pt")
70
-
71
- with torch.no_grad():
72
- output = model.generate(
73
- **inputs,
74
- max_length=200,
75
- num_beams=1,
76
- no_repeat_ngram_size=2,
77
- pad_token_id=tokenizer.eos_token_id
78
- )
79
-
80
- result = tokenizer.decode(output[0], skip_special_tokens=True)
81
-
82
- # Clean only last assistant message
83
- if "AI:" in result:
84
- result = result.split("AI:")[-1].strip()
85
-
86
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  from pypdf import PdfReader
3
+ from docx import Document
4
+ import re
5
+ from multilingual import UI_TEXT, translate_text
6
+ from backup_features import extract_entities, extract_clauses, get_risks, fairness_score, alternative_clauses
7
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
8
+
9
+ # -------------------------------
10
+ # Load Local T5 Chat Model
11
+ # -------------------------------
12
+ @st.cache_resource
13
+ def load_chat_model():
14
+ tokenizer = T5Tokenizer.from_pretrained("t5-small")
15
+ model = T5ForConditionalGeneration.from_pretrained("t5-small")
16
+ return tokenizer, model
17
+
18
+ tokenizer, chat_model = load_chat_model()
19
+
20
+ def chat_response(user_msg, lang):
21
+ prompt = f"Legal NDA Question: {user_msg}\nProvide educational information only."
22
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
23
+ outputs = chat_model.generate(**inputs, max_length=150)
24
+ reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
25
+ return translate_text(reply, lang)
26
+
27
+ # -------------------------------
28
+ # Utility Functions
29
+ # -------------------------------
30
+ def is_nda(text):
31
+ return "nda" in text.lower() or "non-disclosure" in text.lower()
32
+
33
+ def read_pdf(file):
34
+ reader = PdfReader(file)
35
+ return "\n".join(page.extract_text() for page in reader.pages)
36
+
37
+ def read_docx(file):
38
+ doc = Document(file)
39
+ return "\n".join(p.text for p in doc.paragraphs)
40
+
41
+ def read_txt(file):
42
+ return file.read().decode("utf-8")
43
+
44
+ # -------------------------------
45
+ # Streamlit UI
46
+ # -------------------------------
47
+ st.set_page_config(page_title="ClauseWise", layout="wide")
48
+
49
+ # Language Selector
50
+ lang = st.sidebar.selectbox("🌐 Language", ["en", "hi", "ta", "te", "kn"])
51
+
52
+ # Title
53
+ st.title("πŸ“„ ClauseWise – Multilingual NDA Analyzer")
54
+
55
+ uploaded = st.file_uploader(UI_TEXT["upload_title"][lang], type=["pdf", "txt", "docx"])
56
+
57
+ if uploaded:
58
+ ext = uploaded.name.split(".")[-1]
59
+
60
+ if ext == "pdf":
61
+ text = read_pdf(uploaded)
62
+ elif ext == "txt":
63
+ text = read_txt(uploaded)
64
+ elif ext == "docx":
65
+ text = read_docx(uploaded)
66
+ else:
67
+ st.error(UI_TEXT["error_not_nda"][lang])
68
+ st.stop()
69
+
70
+ if not is_nda(text):
71
+ st.error(UI_TEXT["error_not_nda"][lang])
72
+ st.stop()
73
+
74
+ st.success(UI_TEXT["success_nda"][lang])
75
+
76
+ tabs = st.tabs([
77
+ UI_TEXT["tab_clauses"][lang],
78
+ UI_TEXT["tab_risks"][lang],
79
+ UI_TEXT["tab_fairness"][lang],
80
+ UI_TEXT["tab_entities"][lang],
81
+ UI_TEXT["tab_alternatives"][lang],
82
+ UI_TEXT["tab_chat"][lang]
83
+ ])
84
+
85
+ # -----------------------------
86
+ # TAB 1 β€” CLAUSE SIMPLIFICATION
87
+ # -----------------------------
88
+ with tabs[0]:
89
+ st.header(UI_TEXT["clause_simplify"][lang])
90
+
91
+ clauses = extract_clauses(text)
92
+
93
+ mode = st.radio(UI_TEXT["choose_mode"][lang],
94
+ [UI_TEXT["eli5"][lang],
95
+ UI_TEXT["simple"][lang],
96
+ UI_TEXT["pro"][lang]])
97
+
98
+ for c in clauses:
99
+ st.subheader(c["title"])
100
+ if mode == UI_TEXT["eli5"][lang]:
101
+ st.write(translate_text(c["eli5"], lang))
102
+ elif mode == UI_TEXT["simple"][lang]:
103
+ st.write(translate_text(c["simple"], lang))
104
+ else:
105
+ st.write(translate_text(c["pro"], lang]))
106
+
107
+ # -----------------------------
108
+ # TAB 2 β€” RISKS
109
+ # -----------------------------
110
+ with tabs[1]:
111
+ st.header(UI_TEXT["risk_title"][lang])
112
+ risks = get_risks(text)
113
+ for r in risks:
114
+ st.error("⚠️ " + translate_text(r, lang))
115
+
116
+ # -----------------------------
117
+ # TAB 3 β€” FAIRNESS
118
+ # -----------------------------
119
+ with tabs[2]:
120
+ st.header(UI_TEXT["fairness_title"][lang])
121
+ score = fairness_score(text)
122
+
123
+ st.write(f"**{UI_TEXT['your_position'][lang]}:** {score['user']}%")
124
+ st.write(f"**{UI_TEXT['company_position'][lang]}:** {score['company']}%")
125
+
126
+ st.progress(score["user"] / 100)
127
+
128
+ # -----------------------------
129
+ # TAB 4 β€” ENTITIES
130
+ # -----------------------------
131
+ with tabs[3]:
132
+ st.header(UI_TEXT["entities_title"][lang])
133
+ ents = extract_entities(text)
134
+
135
+ st.write("**Parties:**", ents["parties"])
136
+ st.write("**Dates:**", ents["dates"])
137
+ st.write("**Amounts:**", ents["money"])
138
+
139
+ # -----------------------------
140
+ # TAB 5 β€” ALTERNATIVES
141
+ # -----------------------------
142
+ with tabs[4]:
143
+ st.header(UI_TEXT["alt_title"][lang])
144
+ alts = alternative_clauses(text)
145
+ for a in alts:
146
+ st.info(translate_text(a, lang))
147
+
148
+ # -----------------------------
149
+ # TAB 6 β€” LEGAL CHAT
150
+ # -----------------------------
151
+ with tabs[5]:
152
+ st.header(UI_TEXT["chat_title"][lang])
153
+
154
+ if "chat_history" not in st.session_state:
155
+ st.session_state.chat_history = []
156
+
157
+ for msg in st.session_state.chat_history:
158
+ st.chat_message(msg["role"]).write(msg["text"])
159
+
160
+ user_msg = st.chat_input(UI_TEXT["chat_placeholder"][lang])
161
+
162
+ if user_msg:
163
+ st.session_state.chat_history.append({"role": "user", "text": user_msg})
164
+ st.chat_message("user").write(user_msg)
165
+
166
+ bot_reply = chat_response(user_msg, lang)
167
+ st.session_state.chat_history.append({"role": "assistant", "text": bot_reply})
168
+
169
+ st.chat_message("assistant").write(bot_reply)
170
+
171
+ # Footer Disclaimer
172
+ st.info("⚠️ ClauseWise provides **educational legal insights only** β€” this is **NOT legal advice**.")