Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,19 +19,12 @@ from io import BytesIO
|
|
| 19 |
import spacy
|
| 20 |
|
| 21 |
# -----------------------------
|
| 22 |
-
# STREAMLIT CONFIG
|
| 23 |
# -----------------------------
|
| 24 |
st.set_page_config(page_title="βοΈ ClauseWise", page_icon="βοΈ", layout="wide")
|
| 25 |
|
| 26 |
-
st.title("βοΈ ClauseWise: Multilingual Legal AI Assistant")
|
| 27 |
-
st.markdown("""
|
| 28 |
-
Simplify, translate, and analyze legal documents in **10+ languages**.
|
| 29 |
-
ClauseWise helps you understand clauses, fairness, and contract structure β plus chat with an AI legal assistant.
|
| 30 |
-
---
|
| 31 |
-
""")
|
| 32 |
-
|
| 33 |
# -----------------------------
|
| 34 |
-
# LANGUAGE
|
| 35 |
# -----------------------------
|
| 36 |
LANG_MAP = {
|
| 37 |
"English": "en", "French": "fr", "Spanish": "es", "German": "de",
|
|
@@ -41,7 +34,7 @@ LANG_MAP = {
|
|
| 41 |
LANG_NAMES = list(LANG_MAP.keys())
|
| 42 |
|
| 43 |
# -----------------------------
|
| 44 |
-
#
|
| 45 |
# -----------------------------
|
| 46 |
@st.cache_resource
|
| 47 |
def load_models():
|
|
@@ -53,7 +46,7 @@ def load_models():
|
|
| 53 |
gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_id)
|
| 54 |
gen_model = AutoModelForCausalLM.from_pretrained(gen_model_id)
|
| 55 |
|
| 56 |
-
# β
|
| 57 |
try:
|
| 58 |
nlp = spacy.load("en_core_web_sm")
|
| 59 |
except OSError:
|
|
@@ -63,7 +56,6 @@ def load_models():
|
|
| 63 |
|
| 64 |
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
| 65 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 66 |
-
|
| 67 |
return tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer
|
| 68 |
|
| 69 |
tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer = load_models()
|
|
@@ -85,9 +77,9 @@ def extract_text(file):
|
|
| 85 |
if name.endswith(".pdf"):
|
| 86 |
reader = PdfReader(tmp_path)
|
| 87 |
for page in reader.pages:
|
| 88 |
-
|
| 89 |
-
if
|
| 90 |
-
text +=
|
| 91 |
elif name.endswith(".docx"):
|
| 92 |
doc = Document(tmp_path)
|
| 93 |
text = "\n".join([p.text for p in doc.paragraphs])
|
|
@@ -134,7 +126,7 @@ def clause_simplification(text, mode):
|
|
| 134 |
def fairness_score_visual(text, lang):
|
| 135 |
pos = len(re.findall(r"(mutual|both parties|shared)", text, re.I))
|
| 136 |
neg = len(re.findall(r"(sole|unilateral|exclusive right)", text, re.I))
|
| 137 |
-
score = max(0, min(100, 70 + pos - 2*neg))
|
| 138 |
|
| 139 |
st.subheader("βοΈ Fairness Balance Meter")
|
| 140 |
fairness_df = pd.DataFrame({
|
|
@@ -153,73 +145,86 @@ def chat_response(prompt, lang):
|
|
| 153 |
return translate_text(response, lang)
|
| 154 |
|
| 155 |
# -----------------------------
|
| 156 |
-
# APP
|
| 157 |
# -----------------------------
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
with
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
file = st.file_uploader("Upload a Legal Document (PDF/DOCX/TXT)", type=["pdf", "docx", "txt"])
|
| 165 |
-
text_input = st.text_area("Or Paste Text Here:", height=200)
|
| 166 |
-
|
| 167 |
-
if file or text_input:
|
| 168 |
-
text = extract_text(file) if file else text_input
|
| 169 |
-
|
| 170 |
-
mode = st.radio("Simplify Mode", ["Explain like I'm 5", "Simplified", "Professional"])
|
| 171 |
-
|
| 172 |
-
if st.button("π§Ύ Simplify Clauses"):
|
| 173 |
-
with st.spinner("Simplifying..."):
|
| 174 |
-
simplified = clause_simplification(text, mode)
|
| 175 |
-
translated = translate_text(simplified, lang)
|
| 176 |
-
st.success(translated)
|
| 177 |
-
audio_data = text_to_speech(translated, lang)
|
| 178 |
-
if audio_data:
|
| 179 |
-
st.audio(audio_data, format="audio/mp3")
|
| 180 |
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
if audio_data:
|
| 209 |
st.audio(audio_data, format="audio/mp3")
|
| 210 |
|
| 211 |
-
# TAB
|
| 212 |
-
with
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
-
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
import spacy
|
| 20 |
|
| 21 |
# -----------------------------
|
| 22 |
+
# STREAMLIT PAGE CONFIG
|
| 23 |
# -----------------------------
|
| 24 |
st.set_page_config(page_title="βοΈ ClauseWise", page_icon="βοΈ", layout="wide")
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
# -----------------------------
|
| 27 |
+
# LANGUAGE MAP
|
| 28 |
# -----------------------------
|
| 29 |
LANG_MAP = {
|
| 30 |
"English": "en", "French": "fr", "Spanish": "es", "German": "de",
|
|
|
|
| 34 |
LANG_NAMES = list(LANG_MAP.keys())
|
| 35 |
|
| 36 |
# -----------------------------
|
| 37 |
+
# MODEL LOADING (with caching)
|
| 38 |
# -----------------------------
|
| 39 |
@st.cache_resource
|
| 40 |
def load_models():
|
|
|
|
| 46 |
gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_id)
|
| 47 |
gen_model = AutoModelForCausalLM.from_pretrained(gen_model_id)
|
| 48 |
|
| 49 |
+
# β
Auto-download SpaCy if missing
|
| 50 |
try:
|
| 51 |
nlp = spacy.load("en_core_web_sm")
|
| 52 |
except OSError:
|
|
|
|
| 56 |
|
| 57 |
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
| 58 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
|
|
|
| 59 |
return tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer
|
| 60 |
|
| 61 |
tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer = load_models()
|
|
|
|
| 77 |
if name.endswith(".pdf"):
|
| 78 |
reader = PdfReader(tmp_path)
|
| 79 |
for page in reader.pages:
|
| 80 |
+
t = page.extract_text()
|
| 81 |
+
if t:
|
| 82 |
+
text += t + "\n"
|
| 83 |
elif name.endswith(".docx"):
|
| 84 |
doc = Document(tmp_path)
|
| 85 |
text = "\n".join([p.text for p in doc.paragraphs])
|
|
|
|
| 126 |
def fairness_score_visual(text, lang):
|
| 127 |
pos = len(re.findall(r"(mutual|both parties|shared)", text, re.I))
|
| 128 |
neg = len(re.findall(r"(sole|unilateral|exclusive right)", text, re.I))
|
| 129 |
+
score = max(0, min(100, 70 + pos - 2 * neg))
|
| 130 |
|
| 131 |
st.subheader("βοΈ Fairness Balance Meter")
|
| 132 |
fairness_df = pd.DataFrame({
|
|
|
|
| 145 |
return translate_text(response, lang)
|
| 146 |
|
| 147 |
# -----------------------------
|
| 148 |
+
# MAIN STREAMLIT APP FUNCTION
|
| 149 |
# -----------------------------
|
| 150 |
+
def main():
|
| 151 |
+
st.title("βοΈ ClauseWise: Multilingual Legal AI Assistant")
|
| 152 |
+
st.markdown("""
|
| 153 |
+
**Simplify**, **translate**, and **analyze** legal documents with AI β in your language.
|
| 154 |
+
---
|
| 155 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
+
tab1, tab2, tab3, tab4 = st.tabs(["π Analyzer", "π Translate & Audio", "π¬ Chatbot", "βΉοΈ About"])
|
| 158 |
+
|
| 159 |
+
# TAB 1: ANALYZER
|
| 160 |
+
with tab1:
|
| 161 |
+
st.subheader("π Upload or Paste Legal Document")
|
| 162 |
+
lang = st.selectbox("Select Language:", LANG_NAMES, index=0)
|
| 163 |
+
file = st.file_uploader("Upload a Legal Document (PDF/DOCX/TXT)", type=["pdf", "docx", "txt"])
|
| 164 |
+
text_input = st.text_area("Or Paste Text Here:", height=200)
|
| 165 |
+
|
| 166 |
+
if file or text_input:
|
| 167 |
+
text = extract_text(file) if file else text_input
|
| 168 |
+
|
| 169 |
+
mode = st.radio("Simplify Mode", ["Explain like I'm 5", "Simplified", "Professional"])
|
| 170 |
+
|
| 171 |
+
if st.button("π§Ύ Simplify Clauses"):
|
| 172 |
+
with st.spinner("Simplifying..."):
|
| 173 |
+
simplified = clause_simplification(text, mode)
|
| 174 |
+
translated = translate_text(simplified, lang)
|
| 175 |
+
st.success(translated)
|
| 176 |
+
audio_data = text_to_speech(translated, lang)
|
| 177 |
+
if audio_data:
|
| 178 |
+
st.audio(audio_data, format="audio/mp3")
|
| 179 |
+
|
| 180 |
+
if st.button("βοΈ Fairness Analysis"):
|
| 181 |
+
fairness_score_visual(text, lang)
|
| 182 |
+
|
| 183 |
+
# TAB 2: TRANSLATION + AUDIO
|
| 184 |
+
with tab2:
|
| 185 |
+
st.subheader("π Translate & Listen")
|
| 186 |
+
text_input = st.text_area("Enter text:", height=200)
|
| 187 |
+
lang = st.selectbox("Translate to:", LANG_NAMES, index=4)
|
| 188 |
+
|
| 189 |
+
if st.button("Translate"):
|
| 190 |
+
translated = translate_text(text_input, lang)
|
| 191 |
+
st.success(translated)
|
| 192 |
+
if st.button("π§ Generate Audio"):
|
| 193 |
+
audio_data = text_to_speech(text_input, lang)
|
| 194 |
if audio_data:
|
| 195 |
st.audio(audio_data, format="audio/mp3")
|
| 196 |
|
| 197 |
+
# TAB 3: CHATBOT
|
| 198 |
+
with tab3:
|
| 199 |
+
st.subheader("π¬ Chat with ClauseWise (Multilingual)")
|
| 200 |
+
lang = st.selectbox("Chat Language:", LANG_NAMES, index=4)
|
| 201 |
+
query = st.text_area("Ask about clauses, fairness, or legal meaning:", height=150)
|
| 202 |
+
if st.button("Ask"):
|
| 203 |
+
with st.spinner("Thinking..."):
|
| 204 |
+
response = chat_response(f"You are a legal assistant. Answer helpfully: {query}", lang)
|
| 205 |
+
st.success(response)
|
| 206 |
+
audio_data = text_to_speech(response, lang)
|
| 207 |
+
if audio_data:
|
| 208 |
+
st.audio(audio_data, format="audio/mp3")
|
| 209 |
|
| 210 |
+
# TAB 4: ABOUT
|
| 211 |
+
with tab4:
|
| 212 |
+
st.markdown("""
|
| 213 |
+
### βοΈ About ClauseWise
|
| 214 |
+
ClauseWise is a multilingual AI-powered legal assistant that helps users:
|
| 215 |
+
- Simplify complex clauses
|
| 216 |
+
- Translate and listen in 10+ languages
|
| 217 |
+
- Assess fairness visually
|
| 218 |
+
- Chat interactively
|
| 219 |
|
| 220 |
+
**Languages Supported:**
|
| 221 |
+
English, French, Spanish, German, Hindi, Tamil, Telugu, Kannada, Marathi, Gujarati, Bengali
|
| 222 |
+
|
| 223 |
+
**Disclaimer:** Educational purposes only, not legal advice.
|
| 224 |
+
""")
|
| 225 |
+
|
| 226 |
+
# -----------------------------
|
| 227 |
+
# RUN STREAMLIT APP SAFELY
|
| 228 |
+
# -----------------------------
|
| 229 |
+
if __name__ == "__main__":
|
| 230 |
+
main()
|