deepthi6 commited on
Commit
6edbcf8
Β·
verified Β·
1 Parent(s): 36b1906

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -81
app.py CHANGED
@@ -19,19 +19,19 @@ from io import BytesIO
19
  import spacy
20
 
21
  # -----------------------------
22
- # PAGE CONFIG
23
  # -----------------------------
24
- st.set_page_config(page_title="βš–οΈ ClauseWise: Multilingual Legal AI Assistant", page_icon="βš–οΈ", layout="wide")
25
 
26
  st.title("βš–οΈ ClauseWise: Multilingual Legal AI Assistant")
27
  st.markdown("""
28
- ClauseWise helps you **simplify, translate, and understand legal documents** in your preferred language.
29
- Upload contracts, extract clauses, check fairness, and chat with your AI legal assistant β€” all multilingual and with audio output.
30
  ---
31
  """)
32
 
33
  # -----------------------------
34
- # LANGUAGE MAP
35
  # -----------------------------
36
  LANG_MAP = {
37
  "English": "en", "French": "fr", "Spanish": "es", "German": "de",
@@ -41,10 +41,10 @@ LANG_MAP = {
41
  LANG_NAMES = list(LANG_MAP.keys())
42
 
43
  # -----------------------------
44
- # LOAD MODELS
45
  # -----------------------------
46
  @st.cache_resource
47
- def load_all_models():
48
  simplify_model_name = "mrm8488/t5-small-finetuned-text-simplification"
49
  tokenizer_simplify = AutoTokenizer.from_pretrained(simplify_model_name)
50
  simplify_model = AutoModelForSeq2SeqLM.from_pretrained(simplify_model_name)
@@ -53,7 +53,7 @@ def load_all_models():
53
  gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_id)
54
  gen_model = AutoModelForCausalLM.from_pretrained(gen_model_id)
55
 
56
- # βœ… Auto-download SpaCy model if missing
57
  try:
58
  nlp = spacy.load("en_core_web_sm")
59
  except OSError:
@@ -66,14 +66,16 @@ def load_all_models():
66
 
67
  return tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer
68
 
69
- tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer = load_all_models()
70
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
71
  gen_model.to(DEVICE)
72
 
73
  # -----------------------------
74
- # UTILS
75
  # -----------------------------
76
  def extract_text(file):
 
 
77
  name = file.name.lower()
78
  with tempfile.NamedTemporaryFile(delete=False) as tmp:
79
  tmp.write(file.read())
@@ -83,16 +85,16 @@ def extract_text(file):
83
  if name.endswith(".pdf"):
84
  reader = PdfReader(tmp_path)
85
  for page in reader.pages:
86
- t = page.extract_text()
87
- if t:
88
- text += t + "\n"
89
  elif name.endswith(".docx"):
90
  doc = Document(tmp_path)
91
  text = "\n".join([p.text for p in doc.paragraphs])
92
  else:
93
  text = open(tmp_path, "r", encoding="utf-8", errors="ignore").read()
94
  except Exception as e:
95
- st.error(f"Failed to read file: {e}")
96
  finally:
97
  os.remove(tmp_path)
98
  return text.strip()
@@ -108,15 +110,15 @@ def translate_text(text, target_lang):
108
  return f"(Translation unavailable for {target_lang})"
109
 
110
  def text_to_speech(text, lang):
111
- lang_code = LANG_MAP[lang]
112
  try:
 
113
  tts = gTTS(text=text, lang=lang_code)
114
  audio_fp = BytesIO()
115
  tts.write_to_fp(audio_fp)
116
  audio_fp.seek(0)
117
  return audio_fp
118
  except Exception:
119
- st.warning("Speech generation failed for this language.")
120
  return None
121
 
122
  def clause_simplification(text, mode):
@@ -139,63 +141,53 @@ def fairness_score_visual(text, lang):
139
  "Aspect": ["Party A Favored", "Balanced", "Party B Favored"],
140
  "Score": [100 - score, score // 2, score]
141
  })
142
- fig = px.bar(
143
- fairness_df, x="Score", y="Aspect", orientation="h",
144
- color="Aspect", text="Score", title="Fairness Score Representation"
145
- )
146
  fig.update_layout(showlegend=False, xaxis_title="Score", yaxis_title="")
147
  st.plotly_chart(fig, use_container_width=True)
148
-
149
- translated_info = translate_text(f"Fairness Score: {score}% (Educational Estimate Only)", lang)
150
- st.info(translated_info)
151
 
152
  def chat_response(prompt, lang):
153
  inputs = gen_tokenizer(prompt, return_tensors="pt").to(DEVICE)
154
- outputs = gen_model.generate(**inputs, max_new_tokens=350, do_sample=True, temperature=0.7, top_p=0.9)
155
- resp = gen_tokenizer.decode(outputs[0], skip_special_tokens=True)
156
- return translate_text(resp, lang)
157
 
158
  # -----------------------------
159
- # MAIN TABS
160
  # -----------------------------
161
- tab1, tab2, tab3, tab4 = st.tabs(["πŸ“„ Analyzer", "🌐 Translate & Audio", "πŸ’¬ Chatbot", "βš™οΈ About"])
162
 
163
- # -----------------------------
164
- # TAB 1: Analyzer
165
- # -----------------------------
166
  with tab1:
167
- st.subheader("πŸ“ Upload or Paste Legal Document")
168
- lang = st.selectbox("Select Working Language:", LANG_NAMES, index=0)
169
- file = st.file_uploader("Upload Document (PDF/DOCX/TXT)", type=["pdf", "docx", "txt"])
170
  text_input = st.text_area("Or Paste Text Here:", height=200)
171
 
172
  if file or text_input:
173
  text = extract_text(file) if file else text_input
174
- st.markdown("---")
175
- col1, col2 = st.columns(2)
176
- with col1:
177
- mode = st.radio("Simplify Mode", ["Explain like I'm 5", "Simplified", "Professional"])
178
- if st.button("🧾 Simplify Clauses"):
179
- with st.spinner("Simplifying..."):
180
- simplified = clause_simplification(text, mode)
181
- translated_output = translate_text(simplified, lang)
182
- st.success(translated_output)
183
- audio_data = text_to_speech(translated_output, lang)
184
- if audio_data:
185
- st.audio(audio_data, format="audio/mp3")
186
-
187
- with col2:
188
- if st.button("βš–οΈ Fairness Analysis"):
189
- fairness_score_visual(text, lang)
190
 
191
- # -----------------------------
192
- # TAB 2: Translate & Audio
193
- # -----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
194
  with tab2:
195
- st.subheader("🌐 Translate & Hear Content")
196
- text_input = st.text_area("Enter text to translate or listen:", height=200)
197
- lang = st.selectbox("Choose Translation Language:", LANG_NAMES, index=4)
198
- if st.button("Translate Text"):
 
199
  translated = translate_text(text_input, lang)
200
  st.success(translated)
201
  if st.button("🎧 Generate Audio"):
@@ -203,39 +195,31 @@ with tab2:
203
  if audio_data:
204
  st.audio(audio_data, format="audio/mp3")
205
 
206
- # -----------------------------
207
- # TAB 3: Chatbot
208
- # -----------------------------
209
  with tab3:
210
- st.subheader("πŸ’¬ ClauseWise Multilingual Chatbot")
211
- lang = st.selectbox("Chatbot Language:", LANG_NAMES, index=4)
212
- st.markdown("Ask questions about contract clauses, fairness, or legal basics. *(Educational only β€” not legal advice.)*")
213
- query = st.text_area("Your question:", height=150)
214
- if st.button("Ask ClauseWise"):
215
  with st.spinner("Thinking..."):
216
- response = chat_response(f"Answer this like a legal assistant: {query}", lang)
217
  st.success(response)
218
  audio_data = text_to_speech(response, lang)
219
  if audio_data:
220
  st.audio(audio_data, format="audio/mp3")
221
 
222
- # -----------------------------
223
- # TAB 4: About
224
- # -----------------------------
225
  with tab4:
226
  st.markdown("""
227
- ### 🌍 About ClauseWise
228
- ClauseWise is an **AI-powered multilingual legal document assistant** that helps users:
229
- - Simplify complex legal clauses
230
- - Translate and listen in **10+ languages**
231
- - Analyze fairness visually
232
- - Ask questions interactively in any supported language
233
-
234
- **Supported Languages:**
235
  English, French, Spanish, German, Hindi, Tamil, Telugu, Kannada, Marathi, Gujarati, Bengali
236
 
237
- **Disclaimer:**
238
- ClauseWise provides educational insights only and does not offer legal advice.
239
  """)
240
-
241
- st.markdown("<p style='text-align:center; color:gray;'>Β© 2025 ClauseWise | Multilingual Legal AI Assistant</p>", unsafe_allow_html=True)
 
19
  import spacy
20
 
21
  # -----------------------------
22
+ # STREAMLIT CONFIG
23
  # -----------------------------
24
+ st.set_page_config(page_title="βš–οΈ ClauseWise", page_icon="βš–οΈ", layout="wide")
25
 
26
  st.title("βš–οΈ ClauseWise: Multilingual Legal AI Assistant")
27
  st.markdown("""
28
+ Simplify, translate, and analyze legal documents in **10+ languages**.
29
+ ClauseWise helps you understand clauses, fairness, and contract structure β€” plus chat with an AI legal assistant.
30
  ---
31
  """)
32
 
33
  # -----------------------------
34
+ # LANGUAGE SUPPORT
35
  # -----------------------------
36
  LANG_MAP = {
37
  "English": "en", "French": "fr", "Spanish": "es", "German": "de",
 
41
  LANG_NAMES = list(LANG_MAP.keys())
42
 
43
  # -----------------------------
44
+ # LOAD ALL MODELS
45
  # -----------------------------
46
  @st.cache_resource
47
+ def load_models():
48
  simplify_model_name = "mrm8488/t5-small-finetuned-text-simplification"
49
  tokenizer_simplify = AutoTokenizer.from_pretrained(simplify_model_name)
50
  simplify_model = AutoModelForSeq2SeqLM.from_pretrained(simplify_model_name)
 
53
  gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_id)
54
  gen_model = AutoModelForCausalLM.from_pretrained(gen_model_id)
55
 
56
+ # βœ… Safe SpaCy load
57
  try:
58
  nlp = spacy.load("en_core_web_sm")
59
  except OSError:
 
66
 
67
  return tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer
68
 
69
+ tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer = load_models()
70
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
71
  gen_model.to(DEVICE)
72
 
73
  # -----------------------------
74
+ # UTILITIES
75
  # -----------------------------
76
  def extract_text(file):
77
+ if not file:
78
+ return ""
79
  name = file.name.lower()
80
  with tempfile.NamedTemporaryFile(delete=False) as tmp:
81
  tmp.write(file.read())
 
85
  if name.endswith(".pdf"):
86
  reader = PdfReader(tmp_path)
87
  for page in reader.pages:
88
+ page_text = page.extract_text()
89
+ if page_text:
90
+ text += page_text + "\n"
91
  elif name.endswith(".docx"):
92
  doc = Document(tmp_path)
93
  text = "\n".join([p.text for p in doc.paragraphs])
94
  else:
95
  text = open(tmp_path, "r", encoding="utf-8", errors="ignore").read()
96
  except Exception as e:
97
+ st.error(f"Error reading file: {e}")
98
  finally:
99
  os.remove(tmp_path)
100
  return text.strip()
 
110
  return f"(Translation unavailable for {target_lang})"
111
 
112
  def text_to_speech(text, lang):
 
113
  try:
114
+ lang_code = LANG_MAP[lang]
115
  tts = gTTS(text=text, lang=lang_code)
116
  audio_fp = BytesIO()
117
  tts.write_to_fp(audio_fp)
118
  audio_fp.seek(0)
119
  return audio_fp
120
  except Exception:
121
+ st.warning("Audio unavailable for this language.")
122
  return None
123
 
124
  def clause_simplification(text, mode):
 
141
  "Aspect": ["Party A Favored", "Balanced", "Party B Favored"],
142
  "Score": [100 - score, score // 2, score]
143
  })
144
+ fig = px.bar(fairness_df, x="Score", y="Aspect", orientation="h", text="Score", color="Aspect")
 
 
 
145
  fig.update_layout(showlegend=False, xaxis_title="Score", yaxis_title="")
146
  st.plotly_chart(fig, use_container_width=True)
147
+ st.info(translate_text(f"Fairness Score: {score}% (Approximate)", lang))
 
 
148
 
149
  def chat_response(prompt, lang):
150
  inputs = gen_tokenizer(prompt, return_tensors="pt").to(DEVICE)
151
+ outputs = gen_model.generate(**inputs, max_new_tokens=300, temperature=0.7, top_p=0.9, do_sample=True)
152
+ response = gen_tokenizer.decode(outputs[0], skip_special_tokens=True)
153
+ return translate_text(response, lang)
154
 
155
  # -----------------------------
156
+ # APP INTERFACE
157
  # -----------------------------
158
+ tab1, tab2, tab3, tab4 = st.tabs(["πŸ“„ Analyzer", "🌐 Translate & Audio", "πŸ’¬ Chatbot", "ℹ️ About"])
159
 
160
+ # TAB 1: ANALYZER
 
 
161
  with tab1:
162
+ st.subheader("πŸ“ Upload or Paste Document")
163
+ lang = st.selectbox("Select Language:", LANG_NAMES, index=0)
164
+ file = st.file_uploader("Upload a Legal Document (PDF/DOCX/TXT)", type=["pdf", "docx", "txt"])
165
  text_input = st.text_area("Or Paste Text Here:", height=200)
166
 
167
  if file or text_input:
168
  text = extract_text(file) if file else text_input
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
+ mode = st.radio("Simplify Mode", ["Explain like I'm 5", "Simplified", "Professional"])
171
+
172
+ if st.button("🧾 Simplify Clauses"):
173
+ with st.spinner("Simplifying..."):
174
+ simplified = clause_simplification(text, mode)
175
+ translated = translate_text(simplified, lang)
176
+ st.success(translated)
177
+ audio_data = text_to_speech(translated, lang)
178
+ if audio_data:
179
+ st.audio(audio_data, format="audio/mp3")
180
+
181
+ if st.button("βš–οΈ Fairness Analysis"):
182
+ fairness_score_visual(text, lang)
183
+
184
+ # TAB 2: TRANSLATION + AUDIO
185
  with tab2:
186
+ st.subheader("🌐 Translate & Listen")
187
+ text_input = st.text_area("Enter text:", height=200)
188
+ lang = st.selectbox("Translate to:", LANG_NAMES, index=4)
189
+
190
+ if st.button("Translate"):
191
  translated = translate_text(text_input, lang)
192
  st.success(translated)
193
  if st.button("🎧 Generate Audio"):
 
195
  if audio_data:
196
  st.audio(audio_data, format="audio/mp3")
197
 
198
+ # TAB 3: CHATBOT
 
 
199
  with tab3:
200
+ st.subheader("πŸ’¬ Chat with ClauseWise (Multilingual)")
201
+ lang = st.selectbox("Chat Language:", LANG_NAMES, index=4)
202
+ query = st.text_area("Ask about clauses, fairness, or legal meaning:", height=150)
203
+ if st.button("Ask"):
 
204
  with st.spinner("Thinking..."):
205
+ response = chat_response(f"You are a legal assistant. Answer helpfully: {query}", lang)
206
  st.success(response)
207
  audio_data = text_to_speech(response, lang)
208
  if audio_data:
209
  st.audio(audio_data, format="audio/mp3")
210
 
211
+ # TAB 4: ABOUT
 
 
212
  with tab4:
213
  st.markdown("""
214
+ ### βš–οΈ About ClauseWise
215
+ ClauseWise is a multilingual AI-powered legal assistant that helps users:
216
+ - Simplify complex clauses
217
+ - Translate and listen in 10+ languages
218
+ - Assess fairness visually
219
+ - Chat interactively
220
+
221
+ **Languages Supported:**
222
  English, French, Spanish, German, Hindi, Tamil, Telugu, Kannada, Marathi, Gujarati, Bengali
223
 
224
+ **Disclaimer:** Educational purposes only, not legal advice.
 
225
  """)