bhoomi19 commited on
Commit
7dd21c8
Β·
verified Β·
1 Parent(s): e631f5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +247 -343
app.py CHANGED
@@ -1,361 +1,265 @@
1
- import streamlit as st
2
  import os
3
  import re
4
- import json
5
- from typing import List, Dict
6
- import torch
7
- from transformers import pipeline
8
- from pypdf import PdfReader
9
- import docx
10
  import io
11
-
12
- # Set page config FIRST - this is critical for Streamlit
13
- st.set_page_config(
14
- page_title="ClauseWise Legal Assistant",
15
- page_icon="βš–οΈ",
16
- layout="wide"
 
 
 
 
17
  )
 
 
 
 
 
 
18
 
19
- # Use a small, reliable model
20
- MODEL_ID = "microsoft/DialoGPT-small" # 334M parameters - fits in Spaces memory
 
 
 
 
21
 
22
- @st.cache_resource(show_spinner=False)
23
- def load_model():
24
- """Load a small model that actually works on Spaces"""
25
- try:
26
- # Use a simple pipeline - much more memory efficient
27
- generator = pipeline(
28
- "text-generation",
29
- model=MODEL_ID,
30
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
31
- device_map="auto" if torch.cuda.is_available() else None,
32
- max_length=512
33
- )
34
- return generator
35
- except Exception as e:
36
- st.error(f"Model loading failed: {e}")
37
- return None
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- def simple_llm_generate(prompt: str, max_length=200) -> str:
40
- """Simple generation with error handling"""
41
- generator = load_model()
42
- if generator is None:
43
- return "Model not available. Using demo mode."
44
-
45
  try:
46
- result = generator(
47
- prompt,
48
- max_length=max_length,
49
- num_return_sequences=1,
50
- temperature=0.7,
51
- do_sample=True,
52
- pad_token_id=50256
53
- )
54
- generated = result[0]['generated_text']
55
- # Remove the prompt from the response
56
- if generated.startswith(prompt):
57
- return generated[len(prompt):].strip()
58
- return generated.strip()
59
- except Exception as e:
60
- return f"Generation error: {str(e)}"
61
 
62
- # Document loading functions
63
- def load_text_from_pdf(file_obj):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  try:
65
- # Read the file content
66
- file_content = file_obj.read()
67
- file_obj.seek(0) # Reset file pointer
68
- reader = PdfReader(io.BytesIO(file_content))
69
- text = ""
70
- for page in reader.pages:
71
- page_text = page.extract_text()
72
- if page_text:
73
- text += page_text + "\n"
74
- return text.strip()
 
 
75
  except Exception as e:
76
- return f"Error reading PDF: {str(e)}"
 
 
 
 
 
77
 
78
- def load_text_from_docx(file_obj):
 
 
 
 
 
79
  try:
80
- file_content = file_obj.read()
81
- file_obj.seek(0)
82
- doc = docx.Document(io.BytesIO(file_content))
83
- return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
84
- except Exception as e:
85
- return f"Error reading DOCX: {str(e)}"
86
 
87
- def load_text_from_txt(file_obj):
88
  try:
89
- file_content = file_obj.read()
90
- file_obj.seek(0)
91
- if isinstance(file_content, bytes):
92
- return file_content.decode('utf-8', errors='ignore')
93
- return str(file_content)
94
- except Exception as e:
95
- return f"Error reading TXT: {str(e)}"
 
96
 
97
- def load_document(file):
98
- """Universal document loader"""
99
- if not file:
100
- return ""
101
-
102
- filename = file.name.lower()
103
- if filename.endswith('.pdf'):
104
- return load_text_from_pdf(file)
105
- elif filename.endswith('.docx'):
106
- return load_text_from_docx(file)
107
- elif filename.endswith('.txt'):
108
- return load_text_from_txt(file)
109
- else:
110
- # Try all formats
111
- for loader in [load_text_from_pdf, load_text_from_docx, load_text_from_txt]:
112
- try:
113
- result = loader(file)
114
- if result and not result.startswith("Error"):
115
- return result
116
- except:
117
- continue
118
- return "Could not read document"
119
-
120
- # FIXED regex patterns - simple and working
121
- def extract_clauses_simple(text: str) -> List[str]:
122
- """Simple clause extraction using reliable regex"""
123
- if not text:
124
- return []
125
-
126
- # Multiple splitting strategies
127
- clauses = []
128
-
129
- # Strategy 1: Split by common clause separators
130
- clauses1 = re.split(r'[.;!?]\s+', text)
131
-
132
- # Strategy 2: Split by line breaks followed by numbers or bullets
133
- clauses2 = re.split(r'\n\s*(?:\d+\.|\*|\-)\s+', text)
134
-
135
- # Strategy 3: Split by section markers
136
- clauses3 = re.split(r'\n\s*[A-Z][A-Za-z\s]+\:', text)
137
-
138
- # Combine all strategies and clean up
139
- all_clauses = clauses1 + clauses2 + clauses3
140
- cleaned_clauses = []
141
-
142
- for clause in all_clauses:
143
- clause = clause.strip()
144
- # Only keep meaningful clauses
145
- if (len(clause) > 30 and
146
- len(clause) < 1000 and
147
- not clause.isspace()):
148
- # Simple deduplication
149
- simple_clause = re.sub(r'\s+', ' ', clause.lower())
150
- if simple_clause not in [re.sub(r'\s+', ' ', c.lower()) for c in cleaned_clauses]:
151
- cleaned_clauses.append(clause)
152
-
153
- return cleaned_clauses[:20] # Limit to 20 clauses
154
-
155
- def rule_based_analysis(text):
156
- """Rule-based analysis without AI"""
157
- results = {}
158
-
159
- # Basic statistics
160
- results['character_count'] = len(text)
161
- results['word_count'] = len(text.split())
162
-
163
- # Clause analysis
164
- clauses = extract_clauses_simple(text)
165
- results['clauses_found'] = len(clauses)
166
-
167
- # Risk word detection
168
- risk_words = {
169
- 'high_risk': ['liable', 'indemnify', 'damages', 'breach', 'termination', 'penalty'],
170
- 'medium_risk': ['confidential', 'proprietary', 'warranty', 'obligation'],
171
- 'low_risk': ['agree', 'party', 'contract', 'term']
172
- }
173
-
174
- found_risks = {}
175
- text_lower = text.lower()
176
- for risk_level, words in risk_words.items():
177
- found = [word for word in words if word in text_lower]
178
- if found:
179
- found_risks[risk_level] = found
180
-
181
- results['risk_terms'] = found_risks
182
-
183
- # Simple document type detection
184
- text_lower = text.lower()
185
- doc_type_scores = {
186
- "Non-Disclosure Agreement": len(re.findall(r'confidential|non.?disclosure|nda', text_lower)),
187
- "Employment Contract": len(re.findall(r'employ|salary|duties|terminat', text_lower)),
188
- "Lease Agreement": len(re.findall(r'lease|tenant|rent|property', text_lower)),
189
- "Service Agreement": len(re.findall(r'service|provider|client|deliverable', text_lower)),
190
- "Sales Agreement": len(re.findall(r'sale|purchase|price|payment', text_lower))
191
- }
192
-
193
- best_type = max(doc_type_scores.items(), key=lambda x: x[1])
194
- results['doc_type'] = best_type[0] if best_type[1] > 0 else "General Contract"
195
- results['doc_type_confidence'] = min(100, best_type[1] * 20) # Simple confidence score
196
-
197
- return results, clauses
198
-
199
- # Initialize session state
200
- if 'text_data' not in st.session_state:
201
- st.session_state.text_data = ""
202
- if 'analysis_results' not in st.session_state:
203
- st.session_state.analysis_results = {}
204
- if 'clauses' not in st.session_state:
205
- st.session_state.clauses = []
206
-
207
- # UI Layout
208
- st.title("βš–οΈ ClauseWise Legal Assistant")
209
- st.markdown("**Lightweight legal document analysis**")
210
-
211
- # Sidebar
212
- with st.sidebar:
213
- st.header("πŸ“ Document Input")
214
-
215
- uploaded_file = st.file_uploader(
216
- "Upload Document",
217
- type=["pdf", "docx", "txt"],
218
- help="Supported formats: PDF, Word, Text"
219
  )
220
-
221
- pasted_text = st.text_area("Or paste text below:", height=150, placeholder="Paste your legal text here...")
222
-
223
- process_btn = st.button("πŸ“Š Analyze Document", type="primary", use_container_width=True)
224
-
225
- if process_btn:
226
- if uploaded_file:
227
- with st.spinner("Reading document..."):
228
- st.session_state.text_data = load_document(uploaded_file)
229
- elif pasted_text.strip():
230
- st.session_state.text_data = pasted_text.strip()
231
- else:
232
- st.error("Please upload a file or paste some text")
233
-
234
- if st.session_state.text_data and not st.session_state.text_data.startswith("Error"):
235
- st.success(f"βœ… Loaded {len(st.session_state.text_data)} characters")
236
-
237
- with st.spinner("Analyzing content..."):
238
- st.session_state.analysis_results, st.session_state.clauses = rule_based_analysis(st.session_state.text_data)
239
- else:
240
- st.error("Failed to load document text")
241
-
242
- # Main content area
243
- if st.session_state.text_data and not st.session_state.text_data.startswith("Error"):
244
- # Document preview
245
- with st.expander("πŸ“„ Document Preview", expanded=False):
246
- preview_text = st.session_state.text_data
247
- if len(preview_text) > 1500:
248
- st.text_area("", preview_text[:1500] + "...", height=200, label_visibility="collapsed")
249
- st.caption(f"Preview truncated. Full document: {len(preview_text)} characters")
250
- else:
251
- st.text_area("", preview_text, height=200, label_visibility="collapsed")
252
-
253
- # Analysis results
254
- if st.session_state.analysis_results:
255
- results = st.session_state.analysis_results
256
-
257
- st.subheader("πŸ“Š Analysis Results")
258
-
259
- # Key metrics
260
- col1, col2, col3, col4 = st.columns(4)
261
-
262
- with col1:
263
- st.metric("Document Type", results['doc_type'])
264
-
265
- with col2:
266
- st.metric("Confidence", f"{results['doc_type_confidence']}%")
267
-
268
- with col3:
269
- st.metric("Clauses Found", results['clauses_found'])
270
-
271
- with col4:
272
- st.metric("Word Count", results['word_count'])
273
-
274
- # Risk analysis
275
- if results['risk_terms']:
276
- st.subheader("⚠️ Risk Analysis")
277
-
278
- for risk_level, terms in results['risk_terms'].items():
279
- risk_display = risk_level.replace('_', ' ').title()
280
- color = {
281
- 'high_risk': 'red',
282
- 'medium_risk': 'orange',
283
- 'low_risk': 'green'
284
- }.get(risk_level, 'gray')
285
-
286
- st.write(f"**{risk_display}**: {', '.join(terms)}")
287
-
288
- # Clauses display
289
- if st.session_state.clauses:
290
- st.subheader(f"πŸ“‘ Extracted Clauses ({len(st.session_state.clauses)})")
291
-
292
- for i, clause in enumerate(st.session_state.clauses[:10], 1):
293
- with st.expander(f"Clause {i} ({len(clause)} chars)"):
294
- st.write(clause)
295
-
296
- if len(st.session_state.clauses) > 10:
297
- st.info(f"Showing first 10 of {len(st.session_state.clauses)} clauses")
298
-
299
- # AI Analysis Section (optional)
300
- st.subheader("πŸ€– AI Analysis (Optional)")
301
-
302
- if st.button("Generate AI Summary", key="ai_summary"):
303
- if len(st.session_state.text_data) > 100:
304
- with st.spinner("AI is analyzing..."):
305
- prompt = f"Provide a concise summary of this legal document:\n\n{st.session_state.text_data[:1000]}"
306
- ai_summary = simple_llm_generate(prompt, max_length=300)
307
- st.write(ai_summary)
308
  else:
309
- st.warning("Document too short for AI analysis")
310
-
311
- else:
312
- # Welcome screen
313
- st.markdown("""
314
- ## πŸ‘‹ Welcome to ClauseWise!
315
-
316
- A lightweight legal document analyzer optimized for Hugging Face Spaces.
317
-
318
- ### πŸš€ How to use:
319
- 1. **Upload a document** (PDF, DOCX, TXT) in the sidebar **OR**
320
- 2. **Paste your legal text** in the text area
321
- 3. Click **"Analyze Document"** to process
322
- 4. Review the automated analysis results
323
-
324
- ### πŸ“‹ What it analyzes:
325
- - **Document type** (NDA, Employment, Lease, etc.)
326
- - **Risk terms** and potential issues
327
- - **Clause extraction** and organization
328
- - **Basic statistics** and metrics
329
-
330
- ### πŸ§ͺ Try this sample text:
331
- ```
332
- This Non-Disclosure Agreement (the "Agreement") is entered into between
333
- Company ABC ("Disclosing Party") and John Smith ("Receiving Party").
334
- The Receiving Party agrees to maintain the confidentiality of all
335
- proprietary information disclosed under this Agreement for a period
336
- of three years following termination. Any breach of this Agreement
337
- may result in legal action and liability for damages.
338
- ```
339
-
340
- ### ⚠️ Important Notes:
341
- - Uses rule-based analysis for reliability
342
- - Optional AI features use small, fast models
343
- - Works best with clear legal text
344
- - Free and open source
345
- """)
346
-
347
- # Footer
348
- st.markdown("---")
349
- st.caption("πŸ”’ ClauseWise Demo | Optimized for Hugging Face Spaces | No data stored")
350
-
351
- # Add some custom CSS to make it look nicer
352
- st.markdown("""
353
- <style>
354
- .main .block-container {
355
- padding-top: 2rem;
356
- }
357
- .stButton button {
358
- width: 100%;
359
- }
360
- </style>
361
- """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import re
 
 
 
 
 
 
3
  import io
4
+ import tempfile
5
+ import torch
6
+ import pandas as pd
7
+ import plotly.express as px
8
+ import streamlit as st
9
+ from transformers import (
10
+ AutoTokenizer,
11
+ AutoModelForCausalLM,
12
+ AutoModelForSeq2SeqLM,
13
+ pipeline
14
  )
15
+ from PyPDF2 import PdfReader
16
+ from docx import Document
17
+ from gtts import gTTS
18
+ from io import BytesIO
19
+ import spacy
20
+ import subprocess
21
 
22
+ # -----------------------------
23
+ # Hugging Face fix: ensure Streamlit runs properly
24
+ # -----------------------------
25
+ if _name_ == "_main_" and os.environ.get("SYSTEM") == "spaces":
26
+ subprocess.Popen(["streamlit", "run", "app.py", "--server.port", "7860", "--server.address", "0.0.0.0"])
27
+ exit()
28
 
29
+ # -----------------------------
30
+ # Page config
31
+ # -----------------------------
32
+ st.set_page_config(page_title="βš– ClauseWise", page_icon="βš–", layout="wide")
33
+
34
+ # -----------------------------
35
+ # Language Map
36
+ # -----------------------------
37
+ LANG_MAP = {
38
+ "English": "en", "French": "fr", "Spanish": "es", "German": "de",
39
+ "Hindi": "hi", "Tamil": "ta", "Telugu": "te", "Kannada": "kn",
40
+ "Marathi": "mr", "Gujarati": "gu", "Bengali": "bn"
41
+ }
42
+ LANG_NAMES = list(LANG_MAP.keys())
43
+
44
+ # -----------------------------
45
+ # Model Loading (cached)
46
+ # -----------------------------
47
+ @st.cache_resource
48
+ def load_models():
49
+ simplify_model_name = "mrm8488/t5-small-finetuned-text-simplification"
50
+ tokenizer_simplify = AutoTokenizer.from_pretrained(simplify_model_name)
51
+ simplify_model = AutoModelForSeq2SeqLM.from_pretrained(simplify_model_name)
52
+
53
+ gen_model_id = "microsoft/phi-2"
54
+ gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_id, trust_remote_code=True)
55
+ gen_model = AutoModelForCausalLM.from_pretrained(gen_model_id, trust_remote_code=True)
56
 
57
+ # βœ… Load SpaCy
 
 
 
 
 
58
  try:
59
+ nlp = spacy.load("en_core_web_sm")
60
+ except OSError:
61
+ from spacy.cli import download
62
+ download("en_core_web_sm")
63
+ nlp = spacy.load("en_core_web_sm")
64
+
65
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
66
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
67
+
68
+ return tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer
 
 
 
 
 
69
 
70
+
71
+ tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer = load_models()
72
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
73
+ gen_model.to(DEVICE)
74
+
75
+ # -----------------------------
76
+ # Utility Functions
77
+ # -----------------------------
78
+ def extract_text(file):
79
+ if not file:
80
+ return ""
81
+ name = file.name.lower()
82
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(name)[1]) as tmp:
83
+ tmp.write(file.read())
84
+ tmp_path = tmp.name
85
+ text = ""
86
  try:
87
+ if name.endswith(".pdf"):
88
+ reader = PdfReader(tmp_path)
89
+ for page in reader.pages:
90
+ t = page.extract_text()
91
+ if t:
92
+ text += t + "\n"
93
+ elif name.endswith(".docx"):
94
+ doc = Document(tmp_path)
95
+ text = "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
96
+ else:
97
+ with open(tmp_path, "r", encoding="utf-8", errors="ignore") as f:
98
+ text = f.read()
99
  except Exception as e:
100
+ st.error(f"Error reading file: {e}")
101
+ finally:
102
+ if os.path.exists(tmp_path):
103
+ os.remove(tmp_path)
104
+ return text.strip()
105
+
106
 
107
+ def translate_text(text, target_lang):
108
+ if not text:
109
+ return ""
110
+ lang_code = LANG_MAP.get(target_lang, "en")
111
+ if lang_code == "en":
112
+ return text
113
  try:
114
+ translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{lang_code}")
115
+ return translator(text[:1000])[0]["translation_text"]
116
+ except Exception:
117
+ return text
118
+
 
119
 
120
+ def text_to_speech(text, lang):
121
  try:
122
+ lang_code = LANG_MAP.get(lang, "en")
123
+ tts = gTTS(text=text[:1000], lang=lang_code)
124
+ audio_fp = BytesIO()
125
+ tts.write_to_fp(audio_fp)
126
+ audio_fp.seek(0)
127
+ return audio_fp
128
+ except Exception:
129
+ return None
130
 
131
+
132
+ def clause_simplification(text, mode):
133
+ prefix = {
134
+ "Simplified": "simplify: ",
135
+ "Explain like I'm 5": "explain like I'm 5: ",
136
+ "Professional": "rephrase professionally: "
137
+ }.get(mode, "simplify: ")
138
+ inputs = tokenizer_simplify(prefix + text[:500], return_tensors="pt", truncation=True, max_length=512)
139
+ outputs = simplify_model.generate(**inputs, max_length=256, num_beams=4, early_stopping=True)
140
+ return tokenizer_simplify.decode(outputs[0], skip_special_tokens=True)
141
+
142
+
143
+ def fairness_score_visual(text, lang):
144
+ pos = len(re.findall(r"\b(mutual|both parties|shared|equal|fair|balanced)\b", text, re.I))
145
+ neg = len(re.findall(r"\b(sole|unilateral|exclusive right|one-sided|only)\b", text, re.I))
146
+ score = max(0, min(100, 50 + (pos * 5) - (neg * 5)))
147
+
148
+ st.subheader("βš– Fairness Balance Meter")
149
+ fairness_df = pd.DataFrame({
150
+ "Aspect": ["Party A Favored", "Balanced", "Party B Favored"],
151
+ "Score": [max(0, 100 - score), score, min(100, score)]
152
+ })
153
+ fig = px.bar(
154
+ fairness_df, x="Score", y="Aspect", orientation="h", text="Score", color="Aspect",
155
+ color_discrete_sequence=["#ff6b6b", "#4ecdc4", "#95e1d3"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  )
157
+ fig.update_layout(showlegend=False, xaxis_title="Score", yaxis_title="", height=300)
158
+ st.plotly_chart(fig, use_container_width=True)
159
+ st.info(translate_text(f"Fairness Score: {score}% (Approximate)", lang))
160
+
161
+
162
+ def chat_response(prompt, lang, history):
163
+ """Persistent memory chat"""
164
+ # Combine chat history context
165
+ context = "\n".join([f"User: {u}\nAI: {a}" for u, a in history[-3:]]) # Keep last 3
166
+ full_prompt = f"You are a helpful multilingual legal assistant. {context}\nUser: {prompt}\nAI:"
167
+ inputs = gen_tokenizer(full_prompt, return_tensors="pt").to(DEVICE)
168
+ outputs = gen_model.generate(**inputs, max_new_tokens=200, temperature=0.7, top_p=0.9, do_sample=True)
169
+ response = gen_tokenizer.decode(outputs[0], skip_special_tokens=True)
170
+ if "AI:" in response:
171
+ response = response.split("AI:")[-1].strip()
172
+ return translate_text(response, lang)
173
+
174
+
175
+ # -----------------------------
176
+ # Main Streamlit App
177
+ # -----------------------------
178
+ def main():
179
+ st.title("βš– ClauseWise: Multilingual Legal AI Assistant")
180
+ st.markdown("Simplify, translate, and analyze legal documents with AI β€” in your language.")
181
+ st.divider()
182
+
183
+ tab1, tab2, tab3, tab4 = st.tabs(["πŸ“„ Analyzer", "🌐 Translate & Audio", "πŸ’¬ Chatbot", "β„Ή About"])
184
+
185
+ with tab1:
186
+ st.subheader("πŸ“ Upload or Paste Legal Document")
187
+ lang = st.selectbox("Select Language:", LANG_NAMES, index=0)
188
+ file = st.file_uploader("Upload a Legal Document (PDF/DOCX/TXT)", type=["pdf", "docx", "txt"])
189
+ text_input = st.text_area("Or Paste Text Here:", height=200)
190
+
191
+ if file or text_input:
192
+ text = extract_text(file) if file else text_input
193
+ if not text:
194
+ st.warning("No content found.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  else:
196
+ mode = st.radio("Simplify Mode", ["Explain like I'm 5", "Simplified", "Professional"])
197
+ if st.button("🧾 Simplify Clauses"):
198
+ with st.spinner("Simplifying..."):
199
+ simplified = clause_simplification(text, mode)
200
+ translated = translate_text(simplified, lang)
201
+ st.success(translated)
202
+ audio = text_to_speech(translated, lang)
203
+ if audio:
204
+ st.audio(audio, format="audio/mp3")
205
+
206
+ if st.button("βš– Fairness Analysis"):
207
+ fairness_score_visual(text, lang)
208
+
209
+ with tab2:
210
+ st.subheader("🌐 Translate & Listen")
211
+ text_input = st.text_area("Enter text:", height=200)
212
+ lang = st.selectbox("Translate to:", LANG_NAMES, index=4)
213
+ if st.button("Translate"):
214
+ translated = translate_text(text_input, lang)
215
+ st.success(translated)
216
+ if st.button("🎧 Generate Audio"):
217
+ audio = text_to_speech(text_input, lang)
218
+ if audio:
219
+ st.audio(audio, format="audio/mp3")
220
+
221
+ with tab3:
222
+ st.subheader("πŸ’¬ Chat with ClauseWise (Memory Enabled)")
223
+ lang = st.selectbox("Chat Language:", LANG_NAMES, index=0)
224
+ query = st.text_area("Ask your question:", height=150)
225
+
226
+ # Maintain persistent conversation
227
+ if "chat_history" not in st.session_state:
228
+ st.session_state.chat_history = []
229
+
230
+ if st.button("Ask"):
231
+ if query.strip():
232
+ with st.spinner("Thinking..."):
233
+ response = chat_response(query, lang, st.session_state.chat_history)
234
+ st.session_state.chat_history.append((query, response))
235
+ st.success(response)
236
+ audio = text_to_speech(response, lang)
237
+ if audio:
238
+ st.audio(audio, format="audio/mp3")
239
+
240
+ # Display conversation history
241
+ if st.session_state.chat_history:
242
+ st.markdown("### 🧠 Chat History")
243
+ for q, a in st.session_state.chat_history[-5:]:
244
+ st.markdown(f"*You:* {q}")
245
+ st.markdown(f"*ClauseWise:* {a}")
246
+
247
+ if st.button("Clear Chat"):
248
+ st.session_state.chat_history = []
249
+ st.info("Chat cleared.")
250
+
251
+ with tab4:
252
+ st.markdown("""
253
+ ### βš– About ClauseWise
254
+ ClauseWise is a multilingual AI-powered legal assistant that helps users:
255
+ - Simplify legal language
256
+ - Translate and listen in 10+ languages
257
+ - Assess fairness visually
258
+ - Chat interactively with memory
259
+ ---
260
+ *Disclaimer:* Educational use only β€” not legal advice.
261
+ """)
262
+
263
+
264
+ if _name_ == "_main_":
265
+ main()