Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,19 +19,19 @@ from io import BytesIO
|
|
| 19 |
import spacy
|
| 20 |
|
| 21 |
# -----------------------------
|
| 22 |
-
#
|
| 23 |
# -----------------------------
|
| 24 |
-
st.set_page_config(page_title="βοΈ ClauseWise
|
| 25 |
|
| 26 |
st.title("βοΈ ClauseWise: Multilingual Legal AI Assistant")
|
| 27 |
st.markdown("""
|
| 28 |
-
|
| 29 |
-
|
| 30 |
---
|
| 31 |
""")
|
| 32 |
|
| 33 |
# -----------------------------
|
| 34 |
-
# LANGUAGE
|
| 35 |
# -----------------------------
|
| 36 |
LANG_MAP = {
|
| 37 |
"English": "en", "French": "fr", "Spanish": "es", "German": "de",
|
|
@@ -41,10 +41,10 @@ LANG_MAP = {
|
|
| 41 |
LANG_NAMES = list(LANG_MAP.keys())
|
| 42 |
|
| 43 |
# -----------------------------
|
| 44 |
-
# LOAD MODELS
|
| 45 |
# -----------------------------
|
| 46 |
@st.cache_resource
|
| 47 |
-
def
|
| 48 |
simplify_model_name = "mrm8488/t5-small-finetuned-text-simplification"
|
| 49 |
tokenizer_simplify = AutoTokenizer.from_pretrained(simplify_model_name)
|
| 50 |
simplify_model = AutoModelForSeq2SeqLM.from_pretrained(simplify_model_name)
|
|
@@ -53,7 +53,7 @@ def load_all_models():
|
|
| 53 |
gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_id)
|
| 54 |
gen_model = AutoModelForCausalLM.from_pretrained(gen_model_id)
|
| 55 |
|
| 56 |
-
# β
|
| 57 |
try:
|
| 58 |
nlp = spacy.load("en_core_web_sm")
|
| 59 |
except OSError:
|
|
@@ -66,14 +66,16 @@ def load_all_models():
|
|
| 66 |
|
| 67 |
return tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer
|
| 68 |
|
| 69 |
-
tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer =
|
| 70 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 71 |
gen_model.to(DEVICE)
|
| 72 |
|
| 73 |
# -----------------------------
|
| 74 |
-
#
|
| 75 |
# -----------------------------
|
| 76 |
def extract_text(file):
|
|
|
|
|
|
|
| 77 |
name = file.name.lower()
|
| 78 |
with tempfile.NamedTemporaryFile(delete=False) as tmp:
|
| 79 |
tmp.write(file.read())
|
|
@@ -83,16 +85,16 @@ def extract_text(file):
|
|
| 83 |
if name.endswith(".pdf"):
|
| 84 |
reader = PdfReader(tmp_path)
|
| 85 |
for page in reader.pages:
|
| 86 |
-
|
| 87 |
-
if
|
| 88 |
-
text +=
|
| 89 |
elif name.endswith(".docx"):
|
| 90 |
doc = Document(tmp_path)
|
| 91 |
text = "\n".join([p.text for p in doc.paragraphs])
|
| 92 |
else:
|
| 93 |
text = open(tmp_path, "r", encoding="utf-8", errors="ignore").read()
|
| 94 |
except Exception as e:
|
| 95 |
-
st.error(f"
|
| 96 |
finally:
|
| 97 |
os.remove(tmp_path)
|
| 98 |
return text.strip()
|
|
@@ -108,15 +110,15 @@ def translate_text(text, target_lang):
|
|
| 108 |
return f"(Translation unavailable for {target_lang})"
|
| 109 |
|
| 110 |
def text_to_speech(text, lang):
|
| 111 |
-
lang_code = LANG_MAP[lang]
|
| 112 |
try:
|
|
|
|
| 113 |
tts = gTTS(text=text, lang=lang_code)
|
| 114 |
audio_fp = BytesIO()
|
| 115 |
tts.write_to_fp(audio_fp)
|
| 116 |
audio_fp.seek(0)
|
| 117 |
return audio_fp
|
| 118 |
except Exception:
|
| 119 |
-
st.warning("
|
| 120 |
return None
|
| 121 |
|
| 122 |
def clause_simplification(text, mode):
|
|
@@ -139,63 +141,53 @@ def fairness_score_visual(text, lang):
|
|
| 139 |
"Aspect": ["Party A Favored", "Balanced", "Party B Favored"],
|
| 140 |
"Score": [100 - score, score // 2, score]
|
| 141 |
})
|
| 142 |
-
fig = px.bar(
|
| 143 |
-
fairness_df, x="Score", y="Aspect", orientation="h",
|
| 144 |
-
color="Aspect", text="Score", title="Fairness Score Representation"
|
| 145 |
-
)
|
| 146 |
fig.update_layout(showlegend=False, xaxis_title="Score", yaxis_title="")
|
| 147 |
st.plotly_chart(fig, use_container_width=True)
|
| 148 |
-
|
| 149 |
-
translated_info = translate_text(f"Fairness Score: {score}% (Educational Estimate Only)", lang)
|
| 150 |
-
st.info(translated_info)
|
| 151 |
|
| 152 |
def chat_response(prompt, lang):
|
| 153 |
inputs = gen_tokenizer(prompt, return_tensors="pt").to(DEVICE)
|
| 154 |
-
outputs = gen_model.generate(**inputs, max_new_tokens=
|
| 155 |
-
|
| 156 |
-
return translate_text(
|
| 157 |
|
| 158 |
# -----------------------------
|
| 159 |
-
#
|
| 160 |
# -----------------------------
|
| 161 |
-
tab1, tab2, tab3, tab4 = st.tabs(["π Analyzer", "π Translate & Audio", "π¬ Chatbot", "
|
| 162 |
|
| 163 |
-
#
|
| 164 |
-
# TAB 1: Analyzer
|
| 165 |
-
# -----------------------------
|
| 166 |
with tab1:
|
| 167 |
-
st.subheader("π Upload or Paste
|
| 168 |
-
lang = st.selectbox("Select
|
| 169 |
-
file = st.file_uploader("Upload Document (PDF/DOCX/TXT)", type=["pdf", "docx", "txt"])
|
| 170 |
text_input = st.text_area("Or Paste Text Here:", height=200)
|
| 171 |
|
| 172 |
if file or text_input:
|
| 173 |
text = extract_text(file) if file else text_input
|
| 174 |
-
st.markdown("---")
|
| 175 |
-
col1, col2 = st.columns(2)
|
| 176 |
-
with col1:
|
| 177 |
-
mode = st.radio("Simplify Mode", ["Explain like I'm 5", "Simplified", "Professional"])
|
| 178 |
-
if st.button("π§Ύ Simplify Clauses"):
|
| 179 |
-
with st.spinner("Simplifying..."):
|
| 180 |
-
simplified = clause_simplification(text, mode)
|
| 181 |
-
translated_output = translate_text(simplified, lang)
|
| 182 |
-
st.success(translated_output)
|
| 183 |
-
audio_data = text_to_speech(translated_output, lang)
|
| 184 |
-
if audio_data:
|
| 185 |
-
st.audio(audio_data, format="audio/mp3")
|
| 186 |
-
|
| 187 |
-
with col2:
|
| 188 |
-
if st.button("βοΈ Fairness Analysis"):
|
| 189 |
-
fairness_score_visual(text, lang)
|
| 190 |
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
with tab2:
|
| 195 |
-
st.subheader("π Translate &
|
| 196 |
-
text_input = st.text_area("Enter text
|
| 197 |
-
lang = st.selectbox("
|
| 198 |
-
|
|
|
|
| 199 |
translated = translate_text(text_input, lang)
|
| 200 |
st.success(translated)
|
| 201 |
if st.button("π§ Generate Audio"):
|
|
@@ -203,39 +195,31 @@ with tab2:
|
|
| 203 |
if audio_data:
|
| 204 |
st.audio(audio_data, format="audio/mp3")
|
| 205 |
|
| 206 |
-
#
|
| 207 |
-
# TAB 3: Chatbot
|
| 208 |
-
# -----------------------------
|
| 209 |
with tab3:
|
| 210 |
-
st.subheader("π¬ ClauseWise Multilingual
|
| 211 |
-
lang = st.selectbox("
|
| 212 |
-
st.
|
| 213 |
-
|
| 214 |
-
if st.button("Ask ClauseWise"):
|
| 215 |
with st.spinner("Thinking..."):
|
| 216 |
-
response = chat_response(f"
|
| 217 |
st.success(response)
|
| 218 |
audio_data = text_to_speech(response, lang)
|
| 219 |
if audio_data:
|
| 220 |
st.audio(audio_data, format="audio/mp3")
|
| 221 |
|
| 222 |
-
#
|
| 223 |
-
# TAB 4: About
|
| 224 |
-
# -----------------------------
|
| 225 |
with tab4:
|
| 226 |
st.markdown("""
|
| 227 |
-
###
|
| 228 |
-
ClauseWise is
|
| 229 |
-
- Simplify complex
|
| 230 |
-
- Translate and listen in
|
| 231 |
-
-
|
| 232 |
-
-
|
| 233 |
-
|
| 234 |
-
**Supported
|
| 235 |
English, French, Spanish, German, Hindi, Tamil, Telugu, Kannada, Marathi, Gujarati, Bengali
|
| 236 |
|
| 237 |
-
**Disclaimer:**
|
| 238 |
-
ClauseWise provides educational insights only and does not offer legal advice.
|
| 239 |
""")
|
| 240 |
-
|
| 241 |
-
st.markdown("<p style='text-align:center; color:gray;'>Β© 2025 ClauseWise | Multilingual Legal AI Assistant</p>", unsafe_allow_html=True)
|
|
|
|
| 19 |
import spacy
|
| 20 |
|
| 21 |
# -----------------------------
|
| 22 |
+
# STREAMLIT CONFIG
|
| 23 |
# -----------------------------
|
| 24 |
+
st.set_page_config(page_title="βοΈ ClauseWise", page_icon="βοΈ", layout="wide")
|
| 25 |
|
| 26 |
st.title("βοΈ ClauseWise: Multilingual Legal AI Assistant")
|
| 27 |
st.markdown("""
|
| 28 |
+
Simplify, translate, and analyze legal documents in **10+ languages**.
|
| 29 |
+
ClauseWise helps you understand clauses, fairness, and contract structure β plus chat with an AI legal assistant.
|
| 30 |
---
|
| 31 |
""")
|
| 32 |
|
| 33 |
# -----------------------------
|
| 34 |
+
# LANGUAGE SUPPORT
|
| 35 |
# -----------------------------
|
| 36 |
LANG_MAP = {
|
| 37 |
"English": "en", "French": "fr", "Spanish": "es", "German": "de",
|
|
|
|
| 41 |
LANG_NAMES = list(LANG_MAP.keys())
|
| 42 |
|
| 43 |
# -----------------------------
|
| 44 |
+
# LOAD ALL MODELS
|
| 45 |
# -----------------------------
|
| 46 |
@st.cache_resource
|
| 47 |
+
def load_models():
|
| 48 |
simplify_model_name = "mrm8488/t5-small-finetuned-text-simplification"
|
| 49 |
tokenizer_simplify = AutoTokenizer.from_pretrained(simplify_model_name)
|
| 50 |
simplify_model = AutoModelForSeq2SeqLM.from_pretrained(simplify_model_name)
|
|
|
|
| 53 |
gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_id)
|
| 54 |
gen_model = AutoModelForCausalLM.from_pretrained(gen_model_id)
|
| 55 |
|
| 56 |
+
# β
Safe SpaCy load
|
| 57 |
try:
|
| 58 |
nlp = spacy.load("en_core_web_sm")
|
| 59 |
except OSError:
|
|
|
|
| 66 |
|
| 67 |
return tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer
|
| 68 |
|
| 69 |
+
tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer = load_models()
|
| 70 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 71 |
gen_model.to(DEVICE)
|
| 72 |
|
| 73 |
# -----------------------------
|
| 74 |
+
# UTILITIES
|
| 75 |
# -----------------------------
|
| 76 |
def extract_text(file):
|
| 77 |
+
if not file:
|
| 78 |
+
return ""
|
| 79 |
name = file.name.lower()
|
| 80 |
with tempfile.NamedTemporaryFile(delete=False) as tmp:
|
| 81 |
tmp.write(file.read())
|
|
|
|
| 85 |
if name.endswith(".pdf"):
|
| 86 |
reader = PdfReader(tmp_path)
|
| 87 |
for page in reader.pages:
|
| 88 |
+
page_text = page.extract_text()
|
| 89 |
+
if page_text:
|
| 90 |
+
text += page_text + "\n"
|
| 91 |
elif name.endswith(".docx"):
|
| 92 |
doc = Document(tmp_path)
|
| 93 |
text = "\n".join([p.text for p in doc.paragraphs])
|
| 94 |
else:
|
| 95 |
text = open(tmp_path, "r", encoding="utf-8", errors="ignore").read()
|
| 96 |
except Exception as e:
|
| 97 |
+
st.error(f"Error reading file: {e}")
|
| 98 |
finally:
|
| 99 |
os.remove(tmp_path)
|
| 100 |
return text.strip()
|
|
|
|
| 110 |
return f"(Translation unavailable for {target_lang})"
|
| 111 |
|
| 112 |
def text_to_speech(text, lang):
|
|
|
|
| 113 |
try:
|
| 114 |
+
lang_code = LANG_MAP[lang]
|
| 115 |
tts = gTTS(text=text, lang=lang_code)
|
| 116 |
audio_fp = BytesIO()
|
| 117 |
tts.write_to_fp(audio_fp)
|
| 118 |
audio_fp.seek(0)
|
| 119 |
return audio_fp
|
| 120 |
except Exception:
|
| 121 |
+
st.warning("Audio unavailable for this language.")
|
| 122 |
return None
|
| 123 |
|
| 124 |
def clause_simplification(text, mode):
|
|
|
|
| 141 |
"Aspect": ["Party A Favored", "Balanced", "Party B Favored"],
|
| 142 |
"Score": [100 - score, score // 2, score]
|
| 143 |
})
|
| 144 |
+
fig = px.bar(fairness_df, x="Score", y="Aspect", orientation="h", text="Score", color="Aspect")
|
|
|
|
|
|
|
|
|
|
| 145 |
fig.update_layout(showlegend=False, xaxis_title="Score", yaxis_title="")
|
| 146 |
st.plotly_chart(fig, use_container_width=True)
|
| 147 |
+
st.info(translate_text(f"Fairness Score: {score}% (Approximate)", lang))
|
|
|
|
|
|
|
| 148 |
|
| 149 |
def chat_response(prompt, lang):
|
| 150 |
inputs = gen_tokenizer(prompt, return_tensors="pt").to(DEVICE)
|
| 151 |
+
outputs = gen_model.generate(**inputs, max_new_tokens=300, temperature=0.7, top_p=0.9, do_sample=True)
|
| 152 |
+
response = gen_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 153 |
+
return translate_text(response, lang)
|
| 154 |
|
| 155 |
# -----------------------------
|
| 156 |
+
# APP INTERFACE
|
| 157 |
# -----------------------------
|
| 158 |
+
tab1, tab2, tab3, tab4 = st.tabs(["π Analyzer", "π Translate & Audio", "π¬ Chatbot", "βΉοΈ About"])
|
| 159 |
|
| 160 |
+
# TAB 1: ANALYZER
|
|
|
|
|
|
|
| 161 |
with tab1:
|
| 162 |
+
st.subheader("π Upload or Paste Document")
|
| 163 |
+
lang = st.selectbox("Select Language:", LANG_NAMES, index=0)
|
| 164 |
+
file = st.file_uploader("Upload a Legal Document (PDF/DOCX/TXT)", type=["pdf", "docx", "txt"])
|
| 165 |
text_input = st.text_area("Or Paste Text Here:", height=200)
|
| 166 |
|
| 167 |
if file or text_input:
|
| 168 |
text = extract_text(file) if file else text_input
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
+
mode = st.radio("Simplify Mode", ["Explain like I'm 5", "Simplified", "Professional"])
|
| 171 |
+
|
| 172 |
+
if st.button("π§Ύ Simplify Clauses"):
|
| 173 |
+
with st.spinner("Simplifying..."):
|
| 174 |
+
simplified = clause_simplification(text, mode)
|
| 175 |
+
translated = translate_text(simplified, lang)
|
| 176 |
+
st.success(translated)
|
| 177 |
+
audio_data = text_to_speech(translated, lang)
|
| 178 |
+
if audio_data:
|
| 179 |
+
st.audio(audio_data, format="audio/mp3")
|
| 180 |
+
|
| 181 |
+
if st.button("βοΈ Fairness Analysis"):
|
| 182 |
+
fairness_score_visual(text, lang)
|
| 183 |
+
|
| 184 |
+
# TAB 2: TRANSLATION + AUDIO
|
| 185 |
with tab2:
|
| 186 |
+
st.subheader("π Translate & Listen")
|
| 187 |
+
text_input = st.text_area("Enter text:", height=200)
|
| 188 |
+
lang = st.selectbox("Translate to:", LANG_NAMES, index=4)
|
| 189 |
+
|
| 190 |
+
if st.button("Translate"):
|
| 191 |
translated = translate_text(text_input, lang)
|
| 192 |
st.success(translated)
|
| 193 |
if st.button("π§ Generate Audio"):
|
|
|
|
| 195 |
if audio_data:
|
| 196 |
st.audio(audio_data, format="audio/mp3")
|
| 197 |
|
| 198 |
+
# TAB 3: CHATBOT
|
|
|
|
|
|
|
| 199 |
with tab3:
|
| 200 |
+
st.subheader("π¬ Chat with ClauseWise (Multilingual)")
|
| 201 |
+
lang = st.selectbox("Chat Language:", LANG_NAMES, index=4)
|
| 202 |
+
query = st.text_area("Ask about clauses, fairness, or legal meaning:", height=150)
|
| 203 |
+
if st.button("Ask"):
|
|
|
|
| 204 |
with st.spinner("Thinking..."):
|
| 205 |
+
response = chat_response(f"You are a legal assistant. Answer helpfully: {query}", lang)
|
| 206 |
st.success(response)
|
| 207 |
audio_data = text_to_speech(response, lang)
|
| 208 |
if audio_data:
|
| 209 |
st.audio(audio_data, format="audio/mp3")
|
| 210 |
|
| 211 |
+
# TAB 4: ABOUT
|
|
|
|
|
|
|
| 212 |
with tab4:
|
| 213 |
st.markdown("""
|
| 214 |
+
### βοΈ About ClauseWise
|
| 215 |
+
ClauseWise is a multilingual AI-powered legal assistant that helps users:
|
| 216 |
+
- Simplify complex clauses
|
| 217 |
+
- Translate and listen in 10+ languages
|
| 218 |
+
- Assess fairness visually
|
| 219 |
+
- Chat interactively
|
| 220 |
+
|
| 221 |
+
**Languages Supported:**
|
| 222 |
English, French, Spanish, German, Hindi, Tamil, Telugu, Kannada, Marathi, Gujarati, Bengali
|
| 223 |
|
| 224 |
+
**Disclaimer:** Educational purposes only, not legal advice.
|
|
|
|
| 225 |
""")
|
|
|
|
|
|