Spaces:

deepthi6
/

clausewise_full_project

Runtime error

App Files Files Community

deepthi6 commited on Nov 5, 2025

Commit

2248103

verified ·

1 Parent(s): df5688a

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -108

app.py CHANGED Viewed

@@ -17,22 +17,14 @@ from docx import Document
 from gtts import gTTS
 from io import BytesIO
 import spacy
-import subprocess
 # -----------------------------
-# Hugging Face fix: ensure Streamlit runs properly
-# -----------------------------
-if __name__ == "__main__" and os.environ.get("SYSTEM") == "spaces":
-    subprocess.Popen(["streamlit", "run", "app.py", "--server.port", "7860", "--server.address", "0.0.0.0"])
-    exit()
-# -----------------------------
-# Page config
 # -----------------------------
 st.set_page_config(page_title="⚖️ ClauseWise", page_icon="⚖️", layout="wide")
 # -----------------------------
-# Language Map
 # -----------------------------
 LANG_MAP = {
     "English": "en", "French": "fr", "Spanish": "es", "German": "de",
@@ -42,38 +34,49 @@ LANG_MAP = {
 LANG_NAMES = list(LANG_MAP.keys())
 # -----------------------------
-# Model Loading (cached)
 # -----------------------------
 @st.cache_resource
 def load_models():
-    simplify_model_name = "mrm8488/t5-small-finetuned-text-simplification"
-    tokenizer_simplify = AutoTokenizer.from_pretrained(simplify_model_name)
-    simplify_model = AutoModelForSeq2SeqLM.from_pretrained(simplify_model_name)
-    gen_model_id = "microsoft/phi-2"
-    gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_id, trust_remote_code=True)
-    gen_model = AutoModelForCausalLM.from_pretrained(gen_model_id, trust_remote_code=True)
-    # ✅ Load SpaCy
     try:
-        nlp = spacy.load("en_core_web_sm")
-    except OSError:
-        from spacy.cli import download
-        download("en_core_web_sm")
-        nlp = spacy.load("en_core_web_sm")
-    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-    return tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer
-tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer = load_models()
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-gen_model.to(DEVICE)
 # -----------------------------
-# Utility Functions
 # -----------------------------
 def extract_text(file):
     if not file:
@@ -111,36 +114,54 @@ def translate_text(text, target_lang):
     if lang_code == "en":
         return text
     try:
         translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{lang_code}")
-        return translator(text[:1000])[0]["translation_text"]
-    except Exception:
         return text
 def text_to_speech(text, lang):
     try:
         lang_code = LANG_MAP.get(lang, "en")
-        tts = gTTS(text=text[:1000], lang=lang_code)
         audio_fp = BytesIO()
         tts.write_to_fp(audio_fp)
         audio_fp.seek(0)
         return audio_fp
-    except Exception:
         return None
 def clause_simplification(text, mode):
-    prefix = {
         "Simplified": "simplify: ",
         "Explain like I'm 5": "explain like I'm 5: ",
         "Professional": "rephrase professionally: "
-    }.get(mode, "simplify: ")
-    inputs = tokenizer_simplify(prefix + text[:500], return_tensors="pt", truncation=True, max_length=512)
-    outputs = simplify_model.generate(**inputs, max_length=256, num_beams=4, early_stopping=True)
-    return tokenizer_simplify.decode(outputs[0], skip_special_tokens=True)
 def fairness_score_visual(text, lang):
     pos = len(re.findall(r"\b(mutual|both parties|shared|equal|fair|balanced)\b", text, re.I))
     neg = len(re.findall(r"\b(sole|unilateral|exclusive right|one-sided|only)\b", text, re.I))
     score = max(0, min(100, 50 + (pos * 5) - (neg * 5)))
@@ -150,116 +171,126 @@ def fairness_score_visual(text, lang):
         "Aspect": ["Party A Favored", "Balanced", "Party B Favored"],
         "Score": [max(0, 100 - score), score, min(100, score)]
     })
-    fig = px.bar(
-        fairness_df, x="Score", y="Aspect", orientation="h", text="Score", color="Aspect",
-        color_discrete_sequence=["#ff6b6b", "#4ecdc4", "#95e1d3"]
-    )
     fig.update_layout(showlegend=False, xaxis_title="Score", yaxis_title="", height=300)
     st.plotly_chart(fig, use_container_width=True)
-    st.info(translate_text(f"Fairness Score: {score}% (Approximate)", lang))
-def chat_response(prompt, lang, history):
-    """Persistent memory chat"""
-    # Combine chat history context
-    context = "\n".join([f"User: {u}\nAI: {a}" for u, a in history[-3:]])  # Keep last 3
-    full_prompt = f"You are a helpful multilingual legal assistant. {context}\nUser: {prompt}\nAI:"
-    inputs = gen_tokenizer(full_prompt, return_tensors="pt").to(DEVICE)
-    outputs = gen_model.generate(**inputs, max_new_tokens=200, temperature=0.7, top_p=0.9, do_sample=True)
-    response = gen_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    if "AI:" in response:
-        response = response.split("AI:")[-1].strip()
-    return translate_text(response, lang)
 # -----------------------------
-# Main Streamlit App
 # -----------------------------
 def main():
     st.title("⚖️ ClauseWise: Multilingual Legal AI Assistant")
-    st.markdown("Simplify, translate, and analyze legal documents with AI — in your language.")
-    st.divider()
     tab1, tab2, tab3, tab4 = st.tabs(["📄 Analyzer", "🌐 Translate & Audio", "💬 Chatbot", "ℹ️ About"])
     with tab1:
         st.subheader("📁 Upload or Paste Legal Document")
-        lang = st.selectbox("Select Language:", LANG_NAMES, index=0)
         file = st.file_uploader("Upload a Legal Document (PDF/DOCX/TXT)", type=["pdf", "docx", "txt"])
-        text_input = st.text_area("Or Paste Text Here:", height=200)
         if file or text_input:
             text = extract_text(file) if file else text_input
-            if not text:
-                st.warning("No content found.")
-            else:
                 mode = st.radio("Simplify Mode", ["Explain like I'm 5", "Simplified", "Professional"])
                 if st.button("🧾 Simplify Clauses"):
                     with st.spinner("Simplifying..."):
                         simplified = clause_simplification(text, mode)
                         translated = translate_text(simplified, lang)
                         st.success(translated)
-                        audio = text_to_speech(translated, lang)
-                        if audio:
-                            st.audio(audio, format="audio/mp3")
                 if st.button("⚖️ Fairness Analysis"):
-                    fairness_score_visual(text, lang)
     with tab2:
         st.subheader("🌐 Translate & Listen")
-        text_input = st.text_area("Enter text:", height=200)
-        lang = st.selectbox("Translate to:", LANG_NAMES, index=4)
         if st.button("Translate"):
-            translated = translate_text(text_input, lang)
-            st.success(translated)
         if st.button("🎧 Generate Audio"):
-            audio = text_to_speech(text_input, lang)
-            if audio:
-                st.audio(audio, format="audio/mp3")
     with tab3:
-        st.subheader("💬 Chat with ClauseWise (Memory Enabled)")
-        lang = st.selectbox("Chat Language:", LANG_NAMES, index=0)
-        query = st.text_area("Ask your question:", height=150)
-        # Maintain persistent conversation
-        if "chat_history" not in st.session_state:
-            st.session_state.chat_history = []
         if st.button("Ask"):
             if query.strip():
                 with st.spinner("Thinking..."):
-                    response = chat_response(query, lang, st.session_state.chat_history)
-                    st.session_state.chat_history.append((query, response))
                     st.success(response)
-                    audio = text_to_speech(response, lang)
-                    if audio:
-                        st.audio(audio, format="audio/mp3")
-        # Display conversation history
-        if st.session_state.chat_history:
-            st.markdown("### 🧠 Chat History")
-            for q, a in st.session_state.chat_history[-5:]:
-                st.markdown(f"**You:** {q}")
-                st.markdown(f"**ClauseWise:** {a}")
-        if st.button("Clear Chat"):
-            st.session_state.chat_history = []
-            st.info("Chat cleared.")
     with tab4:
         st.markdown("""
         ### ⚖️ About ClauseWise
         ClauseWise is a multilingual AI-powered legal assistant that helps users:
-        - Simplify legal language
-        - Translate and listen in 10+ languages
-        - Assess fairness visually
-        - Chat interactively with memory
-        ---
-        **Disclaimer:** Educational use only — not legal advice.
-        """)
 if __name__ == "__main__":
     main()

 from gtts import gTTS
 from io import BytesIO
 import spacy
 # -----------------------------
+# STREAMLIT PAGE CONFIG
 # -----------------------------
 st.set_page_config(page_title="⚖️ ClauseWise", page_icon="⚖️", layout="wide")
 # -----------------------------
+# LANGUAGE MAP
 # -----------------------------
 LANG_MAP = {
     "English": "en", "French": "fr", "Spanish": "es", "German": "de",
 LANG_NAMES = list(LANG_MAP.keys())
 # -----------------------------
+# MODEL LOADING (with caching)
 # -----------------------------
 @st.cache_resource
 def load_models():
+    """Load all required models with error handling"""
     try:
+        simplify_model_name = "mrm8488/t5-small-finetuned-text-simplification"
+        tokenizer_simplify = AutoTokenizer.from_pretrained(simplify_model_name)
+        simplify_model = AutoModelForSeq2SeqLM.from_pretrained(simplify_model_name)
+        gen_model_id = "microsoft/phi-2"
+        gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_id, trust_remote_code=True)
+        gen_model = AutoModelForCausalLM.from_pretrained(gen_model_id, trust_remote_code=True)
+        # ✅ Auto-download SpaCy if missing
+        try:
+            nlp = spacy.load("en_core_web_sm")
+        except OSError:
+            from spacy.cli import download
+            download("en_core_web_sm")
+            nlp = spacy.load("en_core_web_sm")
+        classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+        summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+        return tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer
+    except Exception as e:
+        st.error(f"Error loading models: {e}")
+        return None, None, None, None, None, None, None
+model_data = load_models()
+if model_data[0] is None:
+    st.error("Failed to load models. Please check your internet connection and try again.")
+    st.stop()
+tokenizer_simplify, simplify_model, gen_tokenizer, gen_model, nlp, classifier, summarizer = model_data
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+if gen_model is not None:
+    gen_model.to(DEVICE)
 # -----------------------------
+# UTILITIES
 # -----------------------------
 def extract_text(file):
     if not file:
     if lang_code == "en":
         return text
     try:
+        text_to_translate = text[:500]
         translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{lang_code}")
+        result = translator(text_to_translate, max_length=512)
+        return result[0]["translation_text"]
+    except Exception as e:
+        st.warning(f"Translation unavailable for {target_lang}: {str(e)}")
         return text
 def text_to_speech(text, lang):
+    if not text:
+        return None
     try:
         lang_code = LANG_MAP.get(lang, "en")
+        tts = gTTS(text=text[:1000], lang=lang_code, slow=False)
         audio_fp = BytesIO()
         tts.write_to_fp(audio_fp)
         audio_fp.seek(0)
         return audio_fp
+    except Exception as e:
+        st.warning(f"Audio generation unavailable: {str(e)}")
         return None
 def clause_simplification(text, mode):
+    if not text or simplify_model is None:
+        return text
+    prefix_map = {
         "Simplified": "simplify: ",
         "Explain like I'm 5": "explain like I'm 5: ",
         "Professional": "rephrase professionally: "
+    }
+    prefix = prefix_map.get(mode, "simplify: ")
+    try:
+        text_to_process = text[:500]
+        inputs = tokenizer_simplify(prefix + text_to_process, return_tensors="pt",
+                                    truncation=True, max_length=512)
+        outputs = simplify_model.generate(**inputs, max_length=256, num_beams=4, early_stopping=True)
+        return tokenizer_simplify.decode(outputs[0], skip_special_tokens=True)
+    except Exception as e:
+        st.error(f"Simplification error: {e}")
+        return text
 def fairness_score_visual(text, lang):
+    if not text:
+        st.warning("No text to analyze.")
+        return
     pos = len(re.findall(r"\b(mutual|both parties|shared|equal|fair|balanced)\b", text, re.I))
     neg = len(re.findall(r"\b(sole|unilateral|exclusive right|one-sided|only)\b", text, re.I))
     score = max(0, min(100, 50 + (pos * 5) - (neg * 5)))
         "Aspect": ["Party A Favored", "Balanced", "Party B Favored"],
         "Score": [max(0, 100 - score), score, min(100, score)]
     })
+    fig = px.bar(fairness_df, x="Score", y="Aspect", orientation="h", text="Score",
+                 color="Aspect", color_discrete_sequence=["#ff6b6b", "#4ecdc4", "#95e1d3"])
     fig.update_layout(showlegend=False, xaxis_title="Score", yaxis_title="", height=300)
     st.plotly_chart(fig, use_container_width=True)
+    fairness_text = f"Fairness Score: {score}% (Approximate - based on keyword analysis)"
+    translated_result = translate_text(fairness_text, lang)
+    st.info(translated_result)
+def chat_response(prompt, lang):
+    if not prompt or gen_model is None:
+        return "Unable to generate response. Please try again."
+    try:
+        full_prompt = f"You are a helpful legal assistant. Answer the following question: {prompt}\n\nAnswer:"
+        inputs = gen_tokenizer(full_prompt, return_tensors="pt", truncation=True,
+                               max_length=512).to(DEVICE)
+        outputs = gen_model.generate(**inputs, max_new_tokens=200, temperature=0.7,
+                                     top_p=0.9, do_sample=True,
+                                     pad_token_id=gen_tokenizer.eos_token_id)
+        response = gen_tokenizer.decode(outputs[0], skip_special_tokens=True)
+        if "Answer:" in response:
+            response = response.split("Answer:")[-1].strip()
+        return translate_text(response, lang)
+    except Exception as e:
+        st.error(f"Chat error: {e}")
+        return "I'm having trouble generating a response. Please try rephrasing your question."
 # -----------------------------
+# MAIN APP
 # -----------------------------
 def main():
     st.title("⚖️ ClauseWise: Multilingual Legal AI Assistant")
+    st.markdown("**Simplify**, **translate**, and **analyze** legal documents with AI — in your language.\n---")
     tab1, tab2, tab3, tab4 = st.tabs(["📄 Analyzer", "🌐 Translate & Audio", "💬 Chatbot", "ℹ️ About"])
+    # TAB 1: ANALYZER
     with tab1:
         st.subheader("📁 Upload or Paste Legal Document")
+        lang = st.selectbox("Select Language:", LANG_NAMES, index=0, key="analyzer_lang")
         file = st.file_uploader("Upload a Legal Document (PDF/DOCX/TXT)", type=["pdf", "docx", "txt"])
+        text_input = st.text_area("Or Paste Text Here:", height=200, key="analyzer_text")
         if file or text_input:
             text = extract_text(file) if file else text_input
+            if text.strip():
                 mode = st.radio("Simplify Mode", ["Explain like I'm 5", "Simplified", "Professional"])
                 if st.button("🧾 Simplify Clauses"):
                     with st.spinner("Simplifying..."):
                         simplified = clause_simplification(text, mode)
                         translated = translate_text(simplified, lang)
                         st.success(translated)
+                        audio_data = text_to_speech(translated, lang)
+                        if audio_data:
+                            st.audio(audio_data, format="audio/mp3")
                 if st.button("⚖️ Fairness Analysis"):
+                    with st.spinner("Analyzing fairness..."):
+                        fairness_score_visual(text, lang)
+            else:
+                st.warning("Please provide some text to analyze.")
+    # TAB 2: TRANSLATION + AUDIO
     with tab2:
         st.subheader("🌐 Translate & Listen")
+        text_input = st.text_area("Enter text:", height=200, key="translate_text")
+        lang = st.selectbox("Translate to:", LANG_NAMES, index=4, key="translate_lang")
         if st.button("Translate"):
+            if text_input.strip():
+                with st.spinner("Translating..."):
+                    translated = translate_text(text_input, lang)
+                    st.success(translated)
+            else:
+                st.warning("Please enter some text to translate.")
         if st.button("🎧 Generate Audio"):
+            if text_input.strip():
+                with st.spinner("Generating audio..."):
+                    audio_data = text_to_speech(text_input, lang)
+                    if audio_data:
+                        st.audio(audio_data, format="audio/mp3")
+            else:
+                st.warning("Please enter some text for audio generation.")
+    # TAB 3: CHATBOT
     with tab3:
+        st.subheader("💬 Chat with ClauseWise (Multilingual)")
+        lang = st.selectbox("Chat Language:", LANG_NAMES, index=0, key="chat_lang")
+        query = st.text_area("Ask about clauses, fairness, or legal meaning:", height=150, key="chat_query")
         if st.button("Ask"):
             if query.strip():
                 with st.spinner("Thinking..."):
+                    response = chat_response(query, lang)
                     st.success(response)
+                    audio_data = text_to_speech(response, lang)
+                    if audio_data:
+                        st.audio(audio_data, format="audio/mp3")
+            else:
+                st.warning("Please enter a question.")
+    # TAB 4: ABOUT
     with tab4:
         st.markdown("""
         ### ⚖️ About ClauseWise
         ClauseWise is a multilingual AI-powered legal assistant that helps users:
+        - **Simplify complex clauses** into easy-to-understand language
+        - **Translate and listen** in 10+ languages
+        - **Assess fairness** visually
+        - **Chat interactively** about legal concepts
+        **Languages Supported:**
+        English, French, Spanish, German, Hindi, Tamil, Telugu, Kannada, Marathi, Gujarati, Bengali
+        **Technologies Used:**
+        Hugging Face Transformers (T5, Phi-2, BART), SpaCy, gTTS, Plotly
+        ⚠️ *Disclaimer:* Educational use only — not legal advice.
+        """)
+# -----------------------------
+# ✅ CORRECT HUGGING FACE LAUNCHER
+# -----------------------------
 if __name__ == "__main__":
     main()