Spaces:

deepthi6
/

clausewise_full_project

Runtime error

App Files Files Community

deepthi6 commited on Nov 5, 2025

Commit

6edbcf8

verified ·

1 Parent(s): 36b1906

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -81

app.py CHANGED Viewed

@@ -19,19 +19,19 @@ from io import BytesIO
 import spacy
 # -----------------------------
-# PAGE CONFIG
 # -----------------------------
-st.set_page_config(page_title="⚖️ ClauseWise: Multilingual Legal AI Assistant", page_icon="⚖️", layout="wide")
 st.title("⚖️ ClauseWise: Multilingual Legal AI Assistant")
 st.markdown("""
-ClauseWise helps you **simplify, translate, and understand legal documents** in your preferred language.
-Upload contracts, extract clauses, check fairness, and chat with your AI legal assistant — all multilingual and with audio output.
 ---
 """)
 # -----------------------------
-# LANGUAGE MAP
 # -----------------------------
 LANG_MAP = {
     "English": "en", "French": "fr", "Spanish": "es", "German": "de",
@@ -41,10 +41,10 @@ LANG_MAP = {
 LANG_NAMES = list(LANG_MAP.keys())
 # -----------------------------
-# LOAD MODELS
 # -----------------------------
 @st.cache_resource
-def load_all_models():
     simplify_model_name = "mrm8488/t5-small-finetuned-text-simplification"
     tokenizer_simplify = AutoTokenizer.from_pretrained(simplify_model_name)
     simplify_model = AutoModelForSeq2SeqLM.from_pretrained(simplify_model_name)
@@ -53,7 +53,7 @@ def load_all_models():
     gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_id)
     gen_model = AutoModelForCausalLM.from_pretrained(gen_model_id)
-    # ✅ Auto-download SpaCy model if missing
     try:
         nlp = spacy.load("en_core_web_sm")
     except OSError:
@@ -66,14 +66,16 @@ def load_all_models():
     return tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer
-tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer = load_all_models()
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 gen_model.to(DEVICE)
 # -----------------------------
-# UTILS
 # -----------------------------
 def extract_text(file):
     name = file.name.lower()
     with tempfile.NamedTemporaryFile(delete=False) as tmp:
         tmp.write(file.read())
@@ -83,16 +85,16 @@ def extract_text(file):
         if name.endswith(".pdf"):
             reader = PdfReader(tmp_path)
             for page in reader.pages:
-                t = page.extract_text()
-                if t:
-                    text += t + "\n"
         elif name.endswith(".docx"):
             doc = Document(tmp_path)
             text = "\n".join([p.text for p in doc.paragraphs])
         else:
             text = open(tmp_path, "r", encoding="utf-8", errors="ignore").read()
     except Exception as e:
-        st.error(f"Failed to read file: {e}")
     finally:
         os.remove(tmp_path)
     return text.strip()
@@ -108,15 +110,15 @@ def translate_text(text, target_lang):
         return f"(Translation unavailable for {target_lang})"
 def text_to_speech(text, lang):
-    lang_code = LANG_MAP[lang]
     try:
         tts = gTTS(text=text, lang=lang_code)
         audio_fp = BytesIO()
         tts.write_to_fp(audio_fp)
         audio_fp.seek(0)
         return audio_fp
     except Exception:
-        st.warning("Speech generation failed for this language.")
         return None
 def clause_simplification(text, mode):
@@ -139,63 +141,53 @@ def fairness_score_visual(text, lang):
         "Aspect": ["Party A Favored", "Balanced", "Party B Favored"],
         "Score": [100 - score, score // 2, score]
     })
-    fig = px.bar(
-        fairness_df, x="Score", y="Aspect", orientation="h",
-        color="Aspect", text="Score", title="Fairness Score Representation"
-    )
     fig.update_layout(showlegend=False, xaxis_title="Score", yaxis_title="")
     st.plotly_chart(fig, use_container_width=True)
-    translated_info = translate_text(f"Fairness Score: {score}% (Educational Estimate Only)", lang)
-    st.info(translated_info)
 def chat_response(prompt, lang):
     inputs = gen_tokenizer(prompt, return_tensors="pt").to(DEVICE)
-    outputs = gen_model.generate(**inputs, max_new_tokens=350, do_sample=True, temperature=0.7, top_p=0.9)
-    resp = gen_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return translate_text(resp, lang)
 # -----------------------------
-# MAIN TABS
 # -----------------------------
-tab1, tab2, tab3, tab4 = st.tabs(["📄 Analyzer", "🌐 Translate & Audio", "💬 Chatbot", "⚙️ About"])
-# -----------------------------
-# TAB 1: Analyzer
-# -----------------------------
 with tab1:
-    st.subheader("📁 Upload or Paste Legal Document")
-    lang = st.selectbox("Select Working Language:", LANG_NAMES, index=0)
-    file = st.file_uploader("Upload Document (PDF/DOCX/TXT)", type=["pdf", "docx", "txt"])
     text_input = st.text_area("Or Paste Text Here:", height=200)
     if file or text_input:
         text = extract_text(file) if file else text_input
-        st.markdown("---")
-        col1, col2 = st.columns(2)
-        with col1:
-            mode = st.radio("Simplify Mode", ["Explain like I'm 5", "Simplified", "Professional"])
-            if st.button("🧾 Simplify Clauses"):
-                with st.spinner("Simplifying..."):
-                    simplified = clause_simplification(text, mode)
-                    translated_output = translate_text(simplified, lang)
-                    st.success(translated_output)
-                    audio_data = text_to_speech(translated_output, lang)
-                    if audio_data:
-                        st.audio(audio_data, format="audio/mp3")
-        with col2:
-            if st.button("⚖️ Fairness Analysis"):
-                fairness_score_visual(text, lang)
-# -----------------------------
-# TAB 2: Translate & Audio
-# -----------------------------
 with tab2:
-    st.subheader("🌐 Translate & Hear Content")
-    text_input = st.text_area("Enter text to translate or listen:", height=200)
-    lang = st.selectbox("Choose Translation Language:", LANG_NAMES, index=4)
-    if st.button("Translate Text"):
         translated = translate_text(text_input, lang)
         st.success(translated)
     if st.button("🎧 Generate Audio"):
@@ -203,39 +195,31 @@ with tab2:
         if audio_data:
             st.audio(audio_data, format="audio/mp3")
-# -----------------------------
-# TAB 3: Chatbot
-# -----------------------------
 with tab3:
-    st.subheader("💬 ClauseWise Multilingual Chatbot")
-    lang = st.selectbox("Chatbot Language:", LANG_NAMES, index=4)
-    st.markdown("Ask questions about contract clauses, fairness, or legal basics. *(Educational only — not legal advice.)*")
-    query = st.text_area("Your question:", height=150)
-    if st.button("Ask ClauseWise"):
         with st.spinner("Thinking..."):
-            response = chat_response(f"Answer this like a legal assistant: {query}", lang)
             st.success(response)
             audio_data = text_to_speech(response, lang)
             if audio_data:
                 st.audio(audio_data, format="audio/mp3")
-# -----------------------------
-# TAB 4: About
-# -----------------------------
 with tab4:
     st.markdown("""
-    ### 🌍 About ClauseWise
-    ClauseWise is an **AI-powered multilingual legal document assistant** that helps users:
-    - Simplify complex legal clauses
-    - Translate and listen in **10+ languages**
-    - Analyze fairness visually
-    - Ask questions interactively in any supported language
-    **Supported Languages:**
     English, French, Spanish, German, Hindi, Tamil, Telugu, Kannada, Marathi, Gujarati, Bengali
-    **Disclaimer:**
-    ClauseWise provides educational insights only and does not offer legal advice.
     """)
-st.markdown("<p style='text-align:center; color:gray;'>© 2025 ClauseWise | Multilingual Legal AI Assistant</p>", unsafe_allow_html=True)

 import spacy
 # -----------------------------
+# STREAMLIT CONFIG
 # -----------------------------
+st.set_page_config(page_title="⚖️ ClauseWise", page_icon="⚖️", layout="wide")
 st.title("⚖️ ClauseWise: Multilingual Legal AI Assistant")
 st.markdown("""
+Simplify, translate, and analyze legal documents in **10+ languages**.
+ClauseWise helps you understand clauses, fairness, and contract structure — plus chat with an AI legal assistant.
 ---
 """)
 # -----------------------------
+# LANGUAGE SUPPORT
 # -----------------------------
 LANG_MAP = {
     "English": "en", "French": "fr", "Spanish": "es", "German": "de",
 LANG_NAMES = list(LANG_MAP.keys())
 # -----------------------------
+# LOAD ALL MODELS
 # -----------------------------
 @st.cache_resource
+def load_models():
     simplify_model_name = "mrm8488/t5-small-finetuned-text-simplification"
     tokenizer_simplify = AutoTokenizer.from_pretrained(simplify_model_name)
     simplify_model = AutoModelForSeq2SeqLM.from_pretrained(simplify_model_name)
     gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_id)
     gen_model = AutoModelForCausalLM.from_pretrained(gen_model_id)
+    # ✅ Safe SpaCy load
     try:
         nlp = spacy.load("en_core_web_sm")
     except OSError:
     return tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer
+tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer = load_models()
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 gen_model.to(DEVICE)
 # -----------------------------
+# UTILITIES
 # -----------------------------
 def extract_text(file):
+    if not file:
+        return ""
     name = file.name.lower()
     with tempfile.NamedTemporaryFile(delete=False) as tmp:
         tmp.write(file.read())
         if name.endswith(".pdf"):
             reader = PdfReader(tmp_path)
             for page in reader.pages:
+                page_text = page.extract_text()
+                if page_text:
+                    text += page_text + "\n"
         elif name.endswith(".docx"):
             doc = Document(tmp_path)
             text = "\n".join([p.text for p in doc.paragraphs])
         else:
             text = open(tmp_path, "r", encoding="utf-8", errors="ignore").read()
     except Exception as e:
+        st.error(f"Error reading file: {e}")
     finally:
         os.remove(tmp_path)
     return text.strip()
         return f"(Translation unavailable for {target_lang})"
 def text_to_speech(text, lang):
     try:
+        lang_code = LANG_MAP[lang]
         tts = gTTS(text=text, lang=lang_code)
         audio_fp = BytesIO()
         tts.write_to_fp(audio_fp)
         audio_fp.seek(0)
         return audio_fp
     except Exception:
+        st.warning("Audio unavailable for this language.")
         return None
 def clause_simplification(text, mode):
         "Aspect": ["Party A Favored", "Balanced", "Party B Favored"],
         "Score": [100 - score, score // 2, score]
     })
+    fig = px.bar(fairness_df, x="Score", y="Aspect", orientation="h", text="Score", color="Aspect")
     fig.update_layout(showlegend=False, xaxis_title="Score", yaxis_title="")
     st.plotly_chart(fig, use_container_width=True)
+    st.info(translate_text(f"Fairness Score: {score}% (Approximate)", lang))
 def chat_response(prompt, lang):
     inputs = gen_tokenizer(prompt, return_tensors="pt").to(DEVICE)
+    outputs = gen_model.generate(**inputs, max_new_tokens=300, temperature=0.7, top_p=0.9, do_sample=True)
+    response = gen_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return translate_text(response, lang)
 # -----------------------------
+# APP INTERFACE
 # -----------------------------
+tab1, tab2, tab3, tab4 = st.tabs(["📄 Analyzer", "🌐 Translate & Audio", "💬 Chatbot", "ℹ️ About"])
+# TAB 1: ANALYZER
 with tab1:
+    st.subheader("📁 Upload or Paste Document")
+    lang = st.selectbox("Select Language:", LANG_NAMES, index=0)
+    file = st.file_uploader("Upload a Legal Document (PDF/DOCX/TXT)", type=["pdf", "docx", "txt"])
     text_input = st.text_area("Or Paste Text Here:", height=200)
     if file or text_input:
         text = extract_text(file) if file else text_input
+        mode = st.radio("Simplify Mode", ["Explain like I'm 5", "Simplified", "Professional"])
+        if st.button("🧾 Simplify Clauses"):
+            with st.spinner("Simplifying..."):
+                simplified = clause_simplification(text, mode)
+                translated = translate_text(simplified, lang)
+                st.success(translated)
+                audio_data = text_to_speech(translated, lang)
+                if audio_data:
+                    st.audio(audio_data, format="audio/mp3")
+        if st.button("⚖️ Fairness Analysis"):
+            fairness_score_visual(text, lang)
+# TAB 2: TRANSLATION + AUDIO
 with tab2:
+    st.subheader("🌐 Translate & Listen")
+    text_input = st.text_area("Enter text:", height=200)
+    lang = st.selectbox("Translate to:", LANG_NAMES, index=4)
+    if st.button("Translate"):
         translated = translate_text(text_input, lang)
         st.success(translated)
     if st.button("🎧 Generate Audio"):
         if audio_data:
             st.audio(audio_data, format="audio/mp3")
+# TAB 3: CHATBOT
 with tab3:
+    st.subheader("💬 Chat with ClauseWise (Multilingual)")
+    lang = st.selectbox("Chat Language:", LANG_NAMES, index=4)
+    query = st.text_area("Ask about clauses, fairness, or legal meaning:", height=150)
+    if st.button("Ask"):
         with st.spinner("Thinking..."):
+            response = chat_response(f"You are a legal assistant. Answer helpfully: {query}", lang)
             st.success(response)
             audio_data = text_to_speech(response, lang)
             if audio_data:
                 st.audio(audio_data, format="audio/mp3")
+# TAB 4: ABOUT
 with tab4:
     st.markdown("""
+    ### ⚖️ About ClauseWise
+    ClauseWise is a multilingual AI-powered legal assistant that helps users:
+    - Simplify complex clauses
+    - Translate and listen in 10+ languages
+    - Assess fairness visually
+    - Chat interactively
+    **Languages Supported:**
     English, French, Spanish, German, Hindi, Tamil, Telugu, Kannada, Marathi, Gujarati, Bengali
+    **Disclaimer:** Educational purposes only, not legal advice.
     """)