deepthi6 commited on
Commit
2248103
Β·
verified Β·
1 Parent(s): df5688a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -108
app.py CHANGED
@@ -17,22 +17,14 @@ from docx import Document
17
  from gtts import gTTS
18
  from io import BytesIO
19
  import spacy
20
- import subprocess
21
 
22
  # -----------------------------
23
- # Hugging Face fix: ensure Streamlit runs properly
24
- # -----------------------------
25
- if __name__ == "__main__" and os.environ.get("SYSTEM") == "spaces":
26
- subprocess.Popen(["streamlit", "run", "app.py", "--server.port", "7860", "--server.address", "0.0.0.0"])
27
- exit()
28
-
29
- # -----------------------------
30
- # Page config
31
  # -----------------------------
32
  st.set_page_config(page_title="βš–οΈ ClauseWise", page_icon="βš–οΈ", layout="wide")
33
 
34
  # -----------------------------
35
- # Language Map
36
  # -----------------------------
37
  LANG_MAP = {
38
  "English": "en", "French": "fr", "Spanish": "es", "German": "de",
@@ -42,38 +34,49 @@ LANG_MAP = {
42
  LANG_NAMES = list(LANG_MAP.keys())
43
 
44
  # -----------------------------
45
- # Model Loading (cached)
46
  # -----------------------------
47
  @st.cache_resource
48
  def load_models():
49
- simplify_model_name = "mrm8488/t5-small-finetuned-text-simplification"
50
- tokenizer_simplify = AutoTokenizer.from_pretrained(simplify_model_name)
51
- simplify_model = AutoModelForSeq2SeqLM.from_pretrained(simplify_model_name)
52
-
53
- gen_model_id = "microsoft/phi-2"
54
- gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_id, trust_remote_code=True)
55
- gen_model = AutoModelForCausalLM.from_pretrained(gen_model_id, trust_remote_code=True)
56
-
57
- # βœ… Load SpaCy
58
  try:
59
- nlp = spacy.load("en_core_web_sm")
60
- except OSError:
61
- from spacy.cli import download
62
- download("en_core_web_sm")
63
- nlp = spacy.load("en_core_web_sm")
64
-
65
- classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
66
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- return tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer
69
 
 
 
 
 
70
 
71
- tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer = load_models()
72
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
73
- gen_model.to(DEVICE)
 
74
 
75
  # -----------------------------
76
- # Utility Functions
77
  # -----------------------------
78
  def extract_text(file):
79
  if not file:
@@ -111,36 +114,54 @@ def translate_text(text, target_lang):
111
  if lang_code == "en":
112
  return text
113
  try:
 
114
  translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{lang_code}")
115
- return translator(text[:1000])[0]["translation_text"]
116
- except Exception:
 
 
117
  return text
118
 
119
 
120
  def text_to_speech(text, lang):
 
 
121
  try:
122
  lang_code = LANG_MAP.get(lang, "en")
123
- tts = gTTS(text=text[:1000], lang=lang_code)
124
  audio_fp = BytesIO()
125
  tts.write_to_fp(audio_fp)
126
  audio_fp.seek(0)
127
  return audio_fp
128
- except Exception:
 
129
  return None
130
 
131
 
132
  def clause_simplification(text, mode):
133
- prefix = {
 
 
134
  "Simplified": "simplify: ",
135
  "Explain like I'm 5": "explain like I'm 5: ",
136
  "Professional": "rephrase professionally: "
137
- }.get(mode, "simplify: ")
138
- inputs = tokenizer_simplify(prefix + text[:500], return_tensors="pt", truncation=True, max_length=512)
139
- outputs = simplify_model.generate(**inputs, max_length=256, num_beams=4, early_stopping=True)
140
- return tokenizer_simplify.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
141
 
142
 
143
  def fairness_score_visual(text, lang):
 
 
 
144
  pos = len(re.findall(r"\b(mutual|both parties|shared|equal|fair|balanced)\b", text, re.I))
145
  neg = len(re.findall(r"\b(sole|unilateral|exclusive right|one-sided|only)\b", text, re.I))
146
  score = max(0, min(100, 50 + (pos * 5) - (neg * 5)))
@@ -150,116 +171,126 @@ def fairness_score_visual(text, lang):
150
  "Aspect": ["Party A Favored", "Balanced", "Party B Favored"],
151
  "Score": [max(0, 100 - score), score, min(100, score)]
152
  })
153
- fig = px.bar(
154
- fairness_df, x="Score", y="Aspect", orientation="h", text="Score", color="Aspect",
155
- color_discrete_sequence=["#ff6b6b", "#4ecdc4", "#95e1d3"]
156
- )
157
  fig.update_layout(showlegend=False, xaxis_title="Score", yaxis_title="", height=300)
158
  st.plotly_chart(fig, use_container_width=True)
159
- st.info(translate_text(f"Fairness Score: {score}% (Approximate)", lang))
 
 
160
 
161
 
162
- def chat_response(prompt, lang, history):
163
- """Persistent memory chat"""
164
- # Combine chat history context
165
- context = "\n".join([f"User: {u}\nAI: {a}" for u, a in history[-3:]]) # Keep last 3
166
- full_prompt = f"You are a helpful multilingual legal assistant. {context}\nUser: {prompt}\nAI:"
167
- inputs = gen_tokenizer(full_prompt, return_tensors="pt").to(DEVICE)
168
- outputs = gen_model.generate(**inputs, max_new_tokens=200, temperature=0.7, top_p=0.9, do_sample=True)
169
- response = gen_tokenizer.decode(outputs[0], skip_special_tokens=True)
170
- if "AI:" in response:
171
- response = response.split("AI:")[-1].strip()
172
- return translate_text(response, lang)
 
 
 
 
 
 
173
 
174
 
175
  # -----------------------------
176
- # Main Streamlit App
177
  # -----------------------------
178
  def main():
179
  st.title("βš–οΈ ClauseWise: Multilingual Legal AI Assistant")
180
- st.markdown("Simplify, translate, and analyze legal documents with AI β€” in your language.")
181
- st.divider()
182
 
183
  tab1, tab2, tab3, tab4 = st.tabs(["πŸ“„ Analyzer", "🌐 Translate & Audio", "πŸ’¬ Chatbot", "ℹ️ About"])
184
 
 
185
  with tab1:
186
  st.subheader("πŸ“ Upload or Paste Legal Document")
187
- lang = st.selectbox("Select Language:", LANG_NAMES, index=0)
188
  file = st.file_uploader("Upload a Legal Document (PDF/DOCX/TXT)", type=["pdf", "docx", "txt"])
189
- text_input = st.text_area("Or Paste Text Here:", height=200)
190
 
191
  if file or text_input:
192
  text = extract_text(file) if file else text_input
193
- if not text:
194
- st.warning("No content found.")
195
- else:
196
  mode = st.radio("Simplify Mode", ["Explain like I'm 5", "Simplified", "Professional"])
197
  if st.button("🧾 Simplify Clauses"):
198
  with st.spinner("Simplifying..."):
199
  simplified = clause_simplification(text, mode)
200
  translated = translate_text(simplified, lang)
201
  st.success(translated)
202
- audio = text_to_speech(translated, lang)
203
- if audio:
204
- st.audio(audio, format="audio/mp3")
205
-
206
  if st.button("βš–οΈ Fairness Analysis"):
207
- fairness_score_visual(text, lang)
 
 
 
208
 
 
209
  with tab2:
210
  st.subheader("🌐 Translate & Listen")
211
- text_input = st.text_area("Enter text:", height=200)
212
- lang = st.selectbox("Translate to:", LANG_NAMES, index=4)
213
  if st.button("Translate"):
214
- translated = translate_text(text_input, lang)
215
- st.success(translated)
 
 
 
 
216
  if st.button("🎧 Generate Audio"):
217
- audio = text_to_speech(text_input, lang)
218
- if audio:
219
- st.audio(audio, format="audio/mp3")
 
 
 
 
220
 
 
221
  with tab3:
222
- st.subheader("πŸ’¬ Chat with ClauseWise (Memory Enabled)")
223
- lang = st.selectbox("Chat Language:", LANG_NAMES, index=0)
224
- query = st.text_area("Ask your question:", height=150)
225
-
226
- # Maintain persistent conversation
227
- if "chat_history" not in st.session_state:
228
- st.session_state.chat_history = []
229
-
230
  if st.button("Ask"):
231
  if query.strip():
232
  with st.spinner("Thinking..."):
233
- response = chat_response(query, lang, st.session_state.chat_history)
234
- st.session_state.chat_history.append((query, response))
235
  st.success(response)
236
- audio = text_to_speech(response, lang)
237
- if audio:
238
- st.audio(audio, format="audio/mp3")
239
-
240
- # Display conversation history
241
- if st.session_state.chat_history:
242
- st.markdown("### 🧠 Chat History")
243
- for q, a in st.session_state.chat_history[-5:]:
244
- st.markdown(f"**You:** {q}")
245
- st.markdown(f"**ClauseWise:** {a}")
246
-
247
- if st.button("Clear Chat"):
248
- st.session_state.chat_history = []
249
- st.info("Chat cleared.")
250
 
 
251
  with tab4:
252
  st.markdown("""
253
  ### βš–οΈ About ClauseWise
254
  ClauseWise is a multilingual AI-powered legal assistant that helps users:
255
- - Simplify legal language
256
- - Translate and listen in 10+ languages
257
- - Assess fairness visually
258
- - Chat interactively with memory
259
- ---
260
- **Disclaimer:** Educational use only β€” not legal advice.
261
- """)
262
 
 
 
263
 
 
 
 
 
 
 
264
  if __name__ == "__main__":
265
  main()
 
17
  from gtts import gTTS
18
  from io import BytesIO
19
  import spacy
 
20
 
21
  # -----------------------------
22
+ # STREAMLIT PAGE CONFIG
 
 
 
 
 
 
 
23
  # -----------------------------
24
  st.set_page_config(page_title="βš–οΈ ClauseWise", page_icon="βš–οΈ", layout="wide")
25
 
26
  # -----------------------------
27
+ # LANGUAGE MAP
28
  # -----------------------------
29
  LANG_MAP = {
30
  "English": "en", "French": "fr", "Spanish": "es", "German": "de",
 
34
  LANG_NAMES = list(LANG_MAP.keys())
35
 
36
  # -----------------------------
37
+ # MODEL LOADING (with caching)
38
  # -----------------------------
39
  @st.cache_resource
40
  def load_models():
41
+ """Load all required models with error handling"""
 
 
 
 
 
 
 
 
42
  try:
43
+ simplify_model_name = "mrm8488/t5-small-finetuned-text-simplification"
44
+ tokenizer_simplify = AutoTokenizer.from_pretrained(simplify_model_name)
45
+ simplify_model = AutoModelForSeq2SeqLM.from_pretrained(simplify_model_name)
46
+
47
+ gen_model_id = "microsoft/phi-2"
48
+ gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_id, trust_remote_code=True)
49
+ gen_model = AutoModelForCausalLM.from_pretrained(gen_model_id, trust_remote_code=True)
50
+
51
+ # βœ… Auto-download SpaCy if missing
52
+ try:
53
+ nlp = spacy.load("en_core_web_sm")
54
+ except OSError:
55
+ from spacy.cli import download
56
+ download("en_core_web_sm")
57
+ nlp = spacy.load("en_core_web_sm")
58
+
59
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
60
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
61
+
62
+ return tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer
63
+ except Exception as e:
64
+ st.error(f"Error loading models: {e}")
65
+ return None, None, None, None, None, None, None
66
 
 
67
 
68
+ model_data = load_models()
69
+ if model_data[0] is None:
70
+ st.error("Failed to load models. Please check your internet connection and try again.")
71
+ st.stop()
72
 
73
+ tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer = model_data
74
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
75
+ if gen_model is not None:
76
+ gen_model.to(DEVICE)
77
 
78
  # -----------------------------
79
+ # UTILITIES
80
  # -----------------------------
81
  def extract_text(file):
82
  if not file:
 
114
  if lang_code == "en":
115
  return text
116
  try:
117
+ text_to_translate = text[:500]
118
  translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{lang_code}")
119
+ result = translator(text_to_translate, max_length=512)
120
+ return result[0]["translation_text"]
121
+ except Exception as e:
122
+ st.warning(f"Translation unavailable for {target_lang}: {str(e)}")
123
  return text
124
 
125
 
126
  def text_to_speech(text, lang):
127
+ if not text:
128
+ return None
129
  try:
130
  lang_code = LANG_MAP.get(lang, "en")
131
+ tts = gTTS(text=text[:1000], lang=lang_code, slow=False)
132
  audio_fp = BytesIO()
133
  tts.write_to_fp(audio_fp)
134
  audio_fp.seek(0)
135
  return audio_fp
136
+ except Exception as e:
137
+ st.warning(f"Audio generation unavailable: {str(e)}")
138
  return None
139
 
140
 
141
  def clause_simplification(text, mode):
142
+ if not text or simplify_model is None:
143
+ return text
144
+ prefix_map = {
145
  "Simplified": "simplify: ",
146
  "Explain like I'm 5": "explain like I'm 5: ",
147
  "Professional": "rephrase professionally: "
148
+ }
149
+ prefix = prefix_map.get(mode, "simplify: ")
150
+ try:
151
+ text_to_process = text[:500]
152
+ inputs = tokenizer_simplify(prefix + text_to_process, return_tensors="pt",
153
+ truncation=True, max_length=512)
154
+ outputs = simplify_model.generate(**inputs, max_length=256, num_beams=4, early_stopping=True)
155
+ return tokenizer_simplify.decode(outputs[0], skip_special_tokens=True)
156
+ except Exception as e:
157
+ st.error(f"Simplification error: {e}")
158
+ return text
159
 
160
 
161
  def fairness_score_visual(text, lang):
162
+ if not text:
163
+ st.warning("No text to analyze.")
164
+ return
165
  pos = len(re.findall(r"\b(mutual|both parties|shared|equal|fair|balanced)\b", text, re.I))
166
  neg = len(re.findall(r"\b(sole|unilateral|exclusive right|one-sided|only)\b", text, re.I))
167
  score = max(0, min(100, 50 + (pos * 5) - (neg * 5)))
 
171
  "Aspect": ["Party A Favored", "Balanced", "Party B Favored"],
172
  "Score": [max(0, 100 - score), score, min(100, score)]
173
  })
174
+ fig = px.bar(fairness_df, x="Score", y="Aspect", orientation="h", text="Score",
175
+ color="Aspect", color_discrete_sequence=["#ff6b6b", "#4ecdc4", "#95e1d3"])
 
 
176
  fig.update_layout(showlegend=False, xaxis_title="Score", yaxis_title="", height=300)
177
  st.plotly_chart(fig, use_container_width=True)
178
+ fairness_text = f"Fairness Score: {score}% (Approximate - based on keyword analysis)"
179
+ translated_result = translate_text(fairness_text, lang)
180
+ st.info(translated_result)
181
 
182
 
183
+ def chat_response(prompt, lang):
184
+ if not prompt or gen_model is None:
185
+ return "Unable to generate response. Please try again."
186
+ try:
187
+ full_prompt = f"You are a helpful legal assistant. Answer the following question: {prompt}\n\nAnswer:"
188
+ inputs = gen_tokenizer(full_prompt, return_tensors="pt", truncation=True,
189
+ max_length=512).to(DEVICE)
190
+ outputs = gen_model.generate(**inputs, max_new_tokens=200, temperature=0.7,
191
+ top_p=0.9, do_sample=True,
192
+ pad_token_id=gen_tokenizer.eos_token_id)
193
+ response = gen_tokenizer.decode(outputs[0], skip_special_tokens=True)
194
+ if "Answer:" in response:
195
+ response = response.split("Answer:")[-1].strip()
196
+ return translate_text(response, lang)
197
+ except Exception as e:
198
+ st.error(f"Chat error: {e}")
199
+ return "I'm having trouble generating a response. Please try rephrasing your question."
200
 
201
 
202
  # -----------------------------
203
+ # MAIN APP
204
  # -----------------------------
205
  def main():
206
  st.title("βš–οΈ ClauseWise: Multilingual Legal AI Assistant")
207
+ st.markdown("**Simplify**, **translate**, and **analyze** legal documents with AI β€” in your language.\n---")
 
208
 
209
  tab1, tab2, tab3, tab4 = st.tabs(["πŸ“„ Analyzer", "🌐 Translate & Audio", "πŸ’¬ Chatbot", "ℹ️ About"])
210
 
211
+ # TAB 1: ANALYZER
212
  with tab1:
213
  st.subheader("πŸ“ Upload or Paste Legal Document")
214
+ lang = st.selectbox("Select Language:", LANG_NAMES, index=0, key="analyzer_lang")
215
  file = st.file_uploader("Upload a Legal Document (PDF/DOCX/TXT)", type=["pdf", "docx", "txt"])
216
+ text_input = st.text_area("Or Paste Text Here:", height=200, key="analyzer_text")
217
 
218
  if file or text_input:
219
  text = extract_text(file) if file else text_input
220
+ if text.strip():
 
 
221
  mode = st.radio("Simplify Mode", ["Explain like I'm 5", "Simplified", "Professional"])
222
  if st.button("🧾 Simplify Clauses"):
223
  with st.spinner("Simplifying..."):
224
  simplified = clause_simplification(text, mode)
225
  translated = translate_text(simplified, lang)
226
  st.success(translated)
227
+ audio_data = text_to_speech(translated, lang)
228
+ if audio_data:
229
+ st.audio(audio_data, format="audio/mp3")
 
230
  if st.button("βš–οΈ Fairness Analysis"):
231
+ with st.spinner("Analyzing fairness..."):
232
+ fairness_score_visual(text, lang)
233
+ else:
234
+ st.warning("Please provide some text to analyze.")
235
 
236
+ # TAB 2: TRANSLATION + AUDIO
237
  with tab2:
238
  st.subheader("🌐 Translate & Listen")
239
+ text_input = st.text_area("Enter text:", height=200, key="translate_text")
240
+ lang = st.selectbox("Translate to:", LANG_NAMES, index=4, key="translate_lang")
241
  if st.button("Translate"):
242
+ if text_input.strip():
243
+ with st.spinner("Translating..."):
244
+ translated = translate_text(text_input, lang)
245
+ st.success(translated)
246
+ else:
247
+ st.warning("Please enter some text to translate.")
248
  if st.button("🎧 Generate Audio"):
249
+ if text_input.strip():
250
+ with st.spinner("Generating audio..."):
251
+ audio_data = text_to_speech(text_input, lang)
252
+ if audio_data:
253
+ st.audio(audio_data, format="audio/mp3")
254
+ else:
255
+ st.warning("Please enter some text for audio generation.")
256
 
257
+ # TAB 3: CHATBOT
258
  with tab3:
259
+ st.subheader("πŸ’¬ Chat with ClauseWise (Multilingual)")
260
+ lang = st.selectbox("Chat Language:", LANG_NAMES, index=0, key="chat_lang")
261
+ query = st.text_area("Ask about clauses, fairness, or legal meaning:", height=150, key="chat_query")
 
 
 
 
 
262
  if st.button("Ask"):
263
  if query.strip():
264
  with st.spinner("Thinking..."):
265
+ response = chat_response(query, lang)
 
266
  st.success(response)
267
+ audio_data = text_to_speech(response, lang)
268
+ if audio_data:
269
+ st.audio(audio_data, format="audio/mp3")
270
+ else:
271
+ st.warning("Please enter a question.")
 
 
 
 
 
 
 
 
 
272
 
273
+ # TAB 4: ABOUT
274
  with tab4:
275
  st.markdown("""
276
  ### βš–οΈ About ClauseWise
277
  ClauseWise is a multilingual AI-powered legal assistant that helps users:
278
+ - **Simplify complex clauses** into easy-to-understand language
279
+ - **Translate and listen** in 10+ languages
280
+ - **Assess fairness** visually
281
+ - **Chat interactively** about legal concepts
282
+
283
+ **Languages Supported:**
284
+ English, French, Spanish, German, Hindi, Tamil, Telugu, Kannada, Marathi, Gujarati, Bengali
285
 
286
+ **Technologies Used:**
287
+ Hugging Face Transformers (T5, Phi-2, BART), SpaCy, gTTS, Plotly
288
 
289
+ ⚠️ *Disclaimer:* Educational use only β€” not legal advice.
290
+ """)
291
+
292
+ # -----------------------------
293
+ # βœ… CORRECT HUGGING FACE LAUNCHER
294
+ # -----------------------------
295
  if __name__ == "__main__":
296
  main()