Spaces:

bhoomi19
/

clausewise

Runtime error

App Files Files Community

bhoomi19 commited on Nov 5, 2025

Commit

e631f5a

verified ·

1 Parent(s): 89c4cef

Update app.py

Browse files

Files changed (1) hide show

app.py +291 -432

app.py CHANGED Viewed

@@ -1,502 +1,361 @@
 import streamlit as st
-import tempfile
 import os
 import re
-import io
 import json
-from typing import List, Dict, Tuple, Any, Optional
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
 from pypdf import PdfReader
 import docx
-import spacy
-import math
-import sys
-import subprocess
-# -------------------------
-# SPACES-SPECIFIC CONFIG
-# -------------------------
-# Hugging Face Spaces provide these tokens automatically
-HF_TOKEN = os.environ.get("HF_TOKEN")
-# Set page config as the VERY FIRST Streamlit command
 st.set_page_config(
-    page_title="ClauseWise – Granite 3.2 (2B) Legal Assistant",
     page_icon="⚖️",
-    layout="wide",
-    initial_sidebar_state="expanded"
 )
-# -------------------------
-# MODEL SETUP - Optimized for Spaces
-# -------------------------
-MODEL_ID = "ibm-granite/granite-3.2-2b-instruct"
-# Spaces hardware detection
-if torch.cuda.is_available():
-    DEVICE = "cuda"
-    DTYPE = torch.float16  # Use float16 for better memory usage
-elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
-    DEVICE = "mps"
-    DTYPE = torch.float16
-else:
-    DEVICE = "cpu"
-    DTYPE = torch.float32
-# Cache model properly for Spaces
-@st.cache_resource(show_spinner=True)
-def load_llm_model():
-    """Load the LLM model optimized for Spaces"""
     try:
-        st.info("🚀 Loading AI model... This may take a few minutes on first run.")
-        # Load tokenizer
-        tokenizer = AutoTokenizer.from_pretrained(
-            MODEL_ID,
-            token=HF_TOKEN,
-            trust_remote_code=True
-        )
-        # Load model with optimized settings for Spaces
-        model = AutoModelForCausalLM.from_pretrained(
-            MODEL_ID,
-            token=HF_TOKEN,
-            torch_dtype=DTYPE,
-            trust_remote_code=True,
-            device_map="auto" if DEVICE != "cpu" else None,
-            low_cpu_mem_usage=True
         )
-        # If no device map, move manually
-        if DEVICE != "cpu" and model.device.type != DEVICE:
-            model = model.to(DEVICE)
-        st.success("✅ Model loaded successfully!")
-        return tokenizer, model
     except Exception as e:
-        st.error(f"❌ Error loading model: {str(e)}")
-        # Return a fallback that won't break the app
-        return None, None
-# -------------------------
-# SPAcy SETUP
-# -------------------------
-try:
-    nlp = spacy.load("en_core_web_sm")
-except OSError:
-    with st.spinner("Downloading spaCy model..."):
-        subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"])
-        nlp = spacy.load("en_core_web_sm")
-# -------------------------
-# SESSION STATE INITIALIZATION
-# -------------------------
-def init_session_state():
-    """Initialize all session state variables"""
-    defaults = {
-        'model_loaded': False,
-        'text_data': "",
-        'simplified_clause': "",
-        'classification_results': "",
-        'ner_results': {},
-        'extracted_clauses': [],
-        'negotiation_alternatives': {},
-        'risk_prediction': {},
-        'fairness_score': 50,
-        'fairness_rationale': "",
-        'battle_results': "",
-        'sensitive_data_results': "",
-        'litigation_risk_results': "",
-    }
-    for key, value in defaults.items():
-        if key not in st.session_state:
-            st.session_state[key] = value
-init_session_state()
-# -------------------------
-# STREAMLIT UI - Spaces Optimized
-# -------------------------
-# Sidebar with Spaces info
-with st.sidebar:
-    st.title("⚖️ ClauseWise")
-    st.markdown("Legal AI Assistant powered by Granite 3.2 2B")
-    # Spaces info
-    st.markdown("---")
-    st.markdown("**Hardware Info:**")
-    st.write(f"Device: {DEVICE}")
-    if torch.cuda.is_available():
-        st.write(f"GPU: {torch.cuda.get_device_name()}")
-    # File upload
-    st.markdown("---")
-    st.subheader("📁 Document Input")
-    uploaded_file = st.file_uploader(
-        "Upload PDF/DOCX/TXT",
-        type=["pdf", "docx", "txt"],
-        help="Supported formats: PDF, Word, Text"
-    )
-    # Text input
-    pasted_text = st.text_area(
-        "Or paste text directly",
-        height=150,
-        placeholder="Paste your legal text here...",
-        help="For best results, provide clear legal clauses or contract text"
-    )
-    # Load model button
-    st.markdown("---")
-    if st.button("🔄 Initialize AI Model", type="primary"):
-        with st.spinner("Loading AI model..."):
-            tokenizer, model = load_llm_model()
-            if tokenizer and model:
-                st.session_state.model_loaded = True
-                st.success("AI model ready!")
-            else:
-                st.error("Failed to load model")
-# Main area
-st.title("⚖️ ClauseWise – Legal AI Assistant")
-st.markdown("Analyze legal documents with AI-powered insights using IBM's Granite 3.2 2B model")
-# Process document input
-if uploaded_file or pasted_text:
-    with st.spinner("Processing document..."):
-        if uploaded_file:
-            text_data = load_document(uploaded_file)
-        else:
-            text_data = pasted_text
-        st.session_state.text_data = text_data
-        # Show document preview
-        with st.expander("📄 Document Preview", expanded=False):
-            preview_text = text_data[:1500] + ("..." if len(text_data) > 1500 else "")
-            st.text_area("Preview", preview_text, height=200, label_visibility="collapsed")
-            st.caption(f"Document length: {len(text_data)} characters")
-# Warning if no model loaded
-if not st.session_state.model_loaded:
-    st.warning("⚠️ Please initialize the AI model first using the button in the sidebar")
-# -------------------------
-# HELPER FUNCTIONS - Optimized for Spaces
-# -------------------------
-def load_document(file) -> str:
-    """Load text from various document formats"""
-    if not file:
-        return ""
-    name = (file.name or "").lower()
     try:
-        if name.endswith(".pdf"):
-            return load_text_from_pdf(file)
-        elif name.endswith(".docx"):
-            return load_text_from_docx(file)
-        elif name.endswith(".txt"):
-            return load_text_from_txt(file)
-        else:
-            # Try all formats
-            for loader in [load_text_from_pdf, load_text_from_docx, load_text_from_txt]:
-                try:
-                    return loader(file)
-                except:
-                    continue
-            return ""
     except Exception as e:
-        st.error(f"Error reading document: {str(e)}")
-        return ""
-def load_text_from_pdf(file_obj) -> str:
-    """Extract text from PDF"""
     try:
-        reader = PdfReader(file_obj)
         text = ""
         for page in reader.pages:
-            page_text = page.extract_text() or ""
-            text += page_text + "\n"
         return text.strip()
     except Exception as e:
-        st.error(f"PDF reading error: {str(e)}")
-        return ""
-def load_text_from_docx(file_obj) -> str:
-    """Extract text from Word document"""
     try:
-        doc = docx.Document(file_obj)
-        return "\n".join([para.text for para in doc.paragraphs]).strip()
     except Exception as e:
-        st.error(f"DOCX reading error: {str(e)}")
-        return ""
-def load_text_from_txt(file_obj) -> str:
-    """Extract text from text file"""
     try:
-        content = file_obj.read()
-        if isinstance(content, bytes):
-            content = content.decode('utf-8', errors='ignore')
-        return str(content).strip()
     except Exception as e:
-        st.error(f"TXT reading error: {str(e)}")
-        return ""
-def build_chat_prompt(system_prompt: str, user_prompt: str) -> str:
-    """Build chat prompt for the model"""
-    tokenizer, model = load_llm_model()
-    if tokenizer is None:
-        return f"{system_prompt}\n\n{user_prompt}"
-    messages = [
-        {"role": "system", "content": system_prompt},
-        {"role": "user", "content": user_prompt}
-    ]
-    try:
-        return tokenizer.apply_chat_template(
-            messages,
-            tokenize=False,
-            add_generation_prompt=True
-        )
-    except Exception:
-        # Fallback template
-        return f"System: {system_prompt}\n\nUser: {user_prompt}\n\nAssistant:"
-def llm_generate(system_prompt: str, user_prompt: str, max_new_tokens=512, temperature=0.3, top_p=0.9) -> str:
-    """Generate text using the LLM with Spaces optimization"""
-    tokenizer, model = load_llm_model()
-    if tokenizer is None or model is None:
-        return "❌ AI model not loaded. Please initialize the model first."
-    try:
-        prompt = build_chat_prompt(system_prompt, user_prompt)
-        # Tokenize with truncation for Spaces memory limits
-        inputs = tokenizer(
-            prompt,
-            return_tensors="pt",
-            truncation=True,
-            max_length=2048
-        ).to(DEVICE)
-        # Generate with optimized settings
-        with torch.no_generation():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=max_new_tokens,
-                temperature=temperature,
-                top_p=top_p,
-                do_sample=True,
-                pad_token_id=tokenizer.eos_token_id,
-                repetition_penalty=1.1
-            )
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract just the assistant's response
-        if "Assistant:" in response:
-            return response.split("Assistant:")[-1].strip()
-        elif prompt in response:
-            return response[len(prompt):].strip()
-        else:
-            return response.strip()
-    except Exception as e:
-        return f"❌ Generation error: {str(e)}"
-# -------------------------
-# ANALYSIS FUNCTIONS - Simplified for Spaces
-# -------------------------
-def simplify_clause(clause: str) -> str:
-    """Simplify legal clause to plain English"""
-    system = """You are a legal assistant that rewrites complex legal clauses into simple, plain English.
-    Keep the meaning exactly the same but make it easy for non-lawyers to understand.
-    Focus on clarity and simplicity."""
-    user = f"Rewrite this legal clause in plain English:\n\n{clause}"
-    return llm_generate(system, user, max_new_tokens=400)
-def ner_entities(text: str) -> Dict[str, List[str]]:
-    """Extract named entities using spaCy"""
-    if not text:
-        return {}
-    doc = nlp(text[:10000])  # Limit for performance
-    entities = {}
-    for ent in doc.ents:
-        entities.setdefault(ent.label_, []).append(ent.text)
-    # Remove duplicates
-    return {k: list(set(v)) for k, v in entities.items()}
-CLAUSE_SPLIT_REGEX = re.compile(r"(?:(?:^\s*\d+(?:\.\d+)*[.)]\s+)|(?:(?<=[.;])\s+(?=[A-Z]))", re.MULTILINE)
-def extract_clauses(text: str) -> List[str]:
-    """Extract individual clauses from legal text"""
-    if not text:
-        return []
-    # Simple clause splitting
-    clauses = re.split(CLAUSE_SPLIT_REGEX, text)
-    clauses = [c.strip() for c in clauses if len(c.strip()) > 50]  # Minimum length
-    # Remove duplicates based on simplified text
-    seen = set()
-    unique_clauses = []
-    for clause in clauses:
-        # Create a simple fingerprint
-        simple = re.sub(r'\s+', ' ', clause.lower())[:100]
-        if simple not in seen:
-            seen.add(simple)
-            unique_clauses.append(clause)
-    return unique_clauses[:20]  # Limit for performance
-# -------------------------
-# MAIN TABS INTERFACE
-# -------------------------
-if st.session_state.text_data:
-    tab1, tab2, tab3, tab4 = st.tabs([
-        "🔍 Clause Analysis",
-        "📊 Document Insights",
-        "⚖️ Legal Review",
-        "🛡️ Risk Assessment"
-    ])
-    with tab1:
-        st.subheader("Clause Analysis")
-        col1, col2 = st.columns(2)
         with col1:
-            if st.button("🧹 Simplify Clauses", use_container_width=True):
-                if st.session_state.model_loaded:
-                    with st.spinner("Simplifying clauses..."):
-                        simplified = simplify_clause(st.session_state.text_data[:2000])
-                        st.session_state.simplified_clause = simplified
-                else:
-                    st.warning("Please initialize AI model first")
-            if st.session_state.simplified_clause:
-                st.subheader("Simplified Version")
-                st.write(st.session_state.simplified_clause)
         with col2:
-            if st.button("🔍 Extract Entities", use_container_width=True):
-                with st.spinner("Extracting named entities..."):
-                    entities = ner_entities(st.session_state.text_data)
-                    st.session_state.ner_results = entities
-            if st.session_state.ner_results:
-                st.subheader("Named Entities")
-                for label, values in st.session_state.ner_results.items():
-                    with st.expander(f"{label} ({len(values)})"):
-                        st.write(", ".join(values[:10]))  # Limit display
-    with tab2:
-        st.subheader("Document Insights")
-        col1, col2 = st.columns(2)
-        with col1:
-            if st.button("📑 Extract Clauses", use_container_width=True):
-                with st.spinner("Extracting clauses..."):
-                    clauses = extract_clauses(st.session_state.text_data)
-                    st.session_state.extracted_clauses = clauses
-            if st.session_state.extracted_clauses:
-                st.subheader(f"Extracted Clauses ({len(st.session_state.extracted_clauses)})")
-                for i, clause in enumerate(st.session_state.extracted_clauses[:5], 1):
-                    with st.expander(f"Clause {i}"):
-                        st.write(clause[:500] + "..." if len(clause) > 500 else clause)
-        with col2:
-            if st.button("📋 Classify Document", use_container_width=True):
-                if st.session_state.model_loaded:
-                    with st.spinner("Classifying document..."):
-                        doc_type = classify_document(st.session_state.text_data)
-                        st.session_state.classification_results = doc_type
-                else:
-                    st.warning("Please initialize AI model first")
-            if st.session_state.classification_results:
-                st.subheader("Document Type")
-                st.info(st.session_state.classification_results)
-    with tab3:
-        st.subheader("Legal Review Tools")
-        st.info("More advanced legal review features will be available here")
-    with tab4:
-        st.subheader("Risk Assessment")
-        st.info("Risk analysis features will be available here")
 else:
-    # Welcome screen when no document is loaded
     st.markdown("""
-    ## 👋 Welcome to ClauseWise
-    To get started:
-    1. **Upload a document** (PDF, Word, or Text) in the sidebar, OR
     2. **Paste your legal text** in the text area
-    3. **Initialize the AI model** using the button in the sidebar
-    4. **Choose an analysis tool** from the tabs above
-    ### 📋 Supported Analyses:
-    - **Clause Simplification**: Rewrite legal jargon in plain English
-    - **Entity Extraction**: Identify people, organizations, dates
-    - **Clause Extraction**: Break down documents into individual clauses
-    - **Document Classification**: Identify the type of legal document
-    ### ⚠️ Important Notes for Spaces:
-    - Model loading may take 2-5 minutes on first use
-    - Some features require GPU acceleration
-    - Large documents may be processed in chunks
     """)
-# -------------------------
-# MISSING FUNCTION IMPLEMENTATIONS
-# -------------------------
-def classify_document(text: str) -> str:
-    """Classify document type"""
-    system = """You are a legal document classifier. Analyze the text and classify it into one of these types:
-    - Non-Disclosure Agreement (NDA)
-    - Employment Contract
-    - Service Agreement
-    - Lease Agreement
-    - Sales Agreement
-    - Terms of Service
-    - Other Legal Document
-    Respond with ONLY the document type name."""
-    user = f"Classify this legal document:\n\n{text[:3000]}"
-    response = llm_generate(system, user, max_new_tokens=100)
-    return response.strip()
-# Add other functions as needed with simplified implementations for Spaces
-# -------------------------
-# FOOTER
-# -------------------------
 st.markdown("---")
-st.markdown(
-    "**ClauseWise** | Powered by IBM Granite 3.2 2B | "
-    "Deployed on Hugging Face Spaces 🤗"
-)

 import streamlit as st
 import os
 import re
 import json
+from typing import List, Dict
 import torch
+from transformers import pipeline
 from pypdf import PdfReader
 import docx
+import io
+# Set page config FIRST - this is critical for Streamlit
 st.set_page_config(
+    page_title="ClauseWise Legal Assistant",
     page_icon="⚖️",
+    layout="wide"
 )
+# Use a small, reliable model
+MODEL_ID = "microsoft/DialoGPT-small"  # 334M parameters - fits in Spaces memory
+@st.cache_resource(show_spinner=False)
+def load_model():
+    """Load a small model that actually works on Spaces"""
     try:
+        # Use a simple pipeline - much more memory efficient
+        generator = pipeline(
+            "text-generation",
+            model=MODEL_ID,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto" if torch.cuda.is_available() else None,
+            max_length=512
         )
+        return generator
     except Exception as e:
+        st.error(f"Model loading failed: {e}")
+        return None
+def simple_llm_generate(prompt: str, max_length=200) -> str:
+    """Simple generation with error handling"""
+    generator = load_model()
+    if generator is None:
+        return "Model not available. Using demo mode."
     try:
+        result = generator(
+            prompt,
+            max_length=max_length,
+            num_return_sequences=1,
+            temperature=0.7,
+            do_sample=True,
+            pad_token_id=50256
+        )
+        generated = result[0]['generated_text']
+        # Remove the prompt from the response
+        if generated.startswith(prompt):
+            return generated[len(prompt):].strip()
+        return generated.strip()
     except Exception as e:
+        return f"Generation error: {str(e)}"
+# Document loading functions
+def load_text_from_pdf(file_obj):
     try:
+        # Read the file content
+        file_content = file_obj.read()
+        file_obj.seek(0)  # Reset file pointer
+        reader = PdfReader(io.BytesIO(file_content))
         text = ""
         for page in reader.pages:
+            page_text = page.extract_text()
+            if page_text:
+                text += page_text + "\n"
         return text.strip()
     except Exception as e:
+        return f"Error reading PDF: {str(e)}"
+def load_text_from_docx(file_obj):
     try:
+        file_content = file_obj.read()
+        file_obj.seek(0)
+        doc = docx.Document(io.BytesIO(file_content))
+        return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
     except Exception as e:
+        return f"Error reading DOCX: {str(e)}"
+def load_text_from_txt(file_obj):
     try:
+        file_content = file_obj.read()
+        file_obj.seek(0)
+        if isinstance(file_content, bytes):
+            return file_content.decode('utf-8', errors='ignore')
+        return str(file_content)
     except Exception as e:
+        return f"Error reading TXT: {str(e)}"
+def load_document(file):
+    """Universal document loader"""
+    if not file:
+        return ""
+    filename = file.name.lower()
+    if filename.endswith('.pdf'):
+        return load_text_from_pdf(file)
+    elif filename.endswith('.docx'):
+        return load_text_from_docx(file)
+    elif filename.endswith('.txt'):
+        return load_text_from_txt(file)
+    else:
+        # Try all formats
+        for loader in [load_text_from_pdf, load_text_from_docx, load_text_from_txt]:
+            try:
+                result = loader(file)
+                if result and not result.startswith("Error"):
+                    return result
+            except:
+                continue
+        return "Could not read document"
+# FIXED regex patterns - simple and working
+def extract_clauses_simple(text: str) -> List[str]:
+    """Simple clause extraction using reliable regex"""
+    if not text:
+        return []
+    # Multiple splitting strategies
+    clauses = []
+    # Strategy 1: Split by common clause separators
+    clauses1 = re.split(r'[.;!?]\s+', text)
+    # Strategy 2: Split by line breaks followed by numbers or bullets
+    clauses2 = re.split(r'\n\s*(?:\d+\.|\*|\-)\s+', text)
+    # Strategy 3: Split by section markers
+    clauses3 = re.split(r'\n\s*[A-Z][A-Za-z\s]+\:', text)
+    # Combine all strategies and clean up
+    all_clauses = clauses1 + clauses2 + clauses3
+    cleaned_clauses = []
+    for clause in all_clauses:
+        clause = clause.strip()
+        # Only keep meaningful clauses
+        if (len(clause) > 30 and
+            len(clause) < 1000 and
+            not clause.isspace()):
+            # Simple deduplication
+            simple_clause = re.sub(r'\s+', ' ', clause.lower())
+            if simple_clause not in [re.sub(r'\s+', ' ', c.lower()) for c in cleaned_clauses]:
+                cleaned_clauses.append(clause)
+    return cleaned_clauses[:20]  # Limit to 20 clauses
+def rule_based_analysis(text):
+    """Rule-based analysis without AI"""
+    results = {}
+    # Basic statistics
+    results['character_count'] = len(text)
+    results['word_count'] = len(text.split())
+    # Clause analysis
+    clauses = extract_clauses_simple(text)
+    results['clauses_found'] = len(clauses)
+    # Risk word detection
+    risk_words = {
+        'high_risk': ['liable', 'indemnify', 'damages', 'breach', 'termination', 'penalty'],
+        'medium_risk': ['confidential', 'proprietary', 'warranty', 'obligation'],
+        'low_risk': ['agree', 'party', 'contract', 'term']
+    }
+    found_risks = {}
+    text_lower = text.lower()
+    for risk_level, words in risk_words.items():
+        found = [word for word in words if word in text_lower]
+        if found:
+            found_risks[risk_level] = found
+    results['risk_terms'] = found_risks
+    # Simple document type detection
+    text_lower = text.lower()
+    doc_type_scores = {
+        "Non-Disclosure Agreement": len(re.findall(r'confidential|non.?disclosure|nda', text_lower)),
+        "Employment Contract": len(re.findall(r'employ|salary|duties|terminat', text_lower)),
+        "Lease Agreement": len(re.findall(r'lease|tenant|rent|property', text_lower)),
+        "Service Agreement": len(re.findall(r'service|provider|client|deliverable', text_lower)),
+        "Sales Agreement": len(re.findall(r'sale|purchase|price|payment', text_lower))
+    }
+    best_type = max(doc_type_scores.items(), key=lambda x: x[1])
+    results['doc_type'] = best_type[0] if best_type[1] > 0 else "General Contract"
+    results['doc_type_confidence'] = min(100, best_type[1] * 20)  # Simple confidence score
+    return results, clauses
+# Initialize session state
+if 'text_data' not in st.session_state:
+    st.session_state.text_data = ""
+if 'analysis_results' not in st.session_state:
+    st.session_state.analysis_results = {}
+if 'clauses' not in st.session_state:
+    st.session_state.clauses = []
+# UI Layout
+st.title("⚖️ ClauseWise Legal Assistant")
+st.markdown("**Lightweight legal document analysis**")
+# Sidebar
+with st.sidebar:
+    st.header("📁 Document Input")
+    uploaded_file = st.file_uploader(
+        "Upload Document",
+        type=["pdf", "docx", "txt"],
+        help="Supported formats: PDF, Word, Text"
+    )
+    pasted_text = st.text_area("Or paste text below:", height=150, placeholder="Paste your legal text here...")
+    process_btn = st.button("📊 Analyze Document", type="primary", use_container_width=True)
+    if process_btn:
+        if uploaded_file:
+            with st.spinner("Reading document..."):
+                st.session_state.text_data = load_document(uploaded_file)
+        elif pasted_text.strip():
+            st.session_state.text_data = pasted_text.strip()
+        else:
+            st.error("Please upload a file or paste some text")
+        if st.session_state.text_data and not st.session_state.text_data.startswith("Error"):
+            st.success(f"✅ Loaded {len(st.session_state.text_data)} characters")
+            with st.spinner("Analyzing content..."):
+                st.session_state.analysis_results, st.session_state.clauses = rule_based_analysis(st.session_state.text_data)
+        else:
+            st.error("Failed to load document text")
+# Main content area
+if st.session_state.text_data and not st.session_state.text_data.startswith("Error"):
+    # Document preview
+    with st.expander("📄 Document Preview", expanded=False):
+        preview_text = st.session_state.text_data
+        if len(preview_text) > 1500:
+            st.text_area("", preview_text[:1500] + "...", height=200, label_visibility="collapsed")
+            st.caption(f"Preview truncated. Full document: {len(preview_text)} characters")
+        else:
+            st.text_area("", preview_text, height=200, label_visibility="collapsed")
+    # Analysis results
+    if st.session_state.analysis_results:
+        results = st.session_state.analysis_results
+        st.subheader("📊 Analysis Results")
+        # Key metrics
+        col1, col2, col3, col4 = st.columns(4)
         with col1:
+            st.metric("Document Type", results['doc_type'])
         with col2:
+            st.metric("Confidence", f"{results['doc_type_confidence']}%")
+        with col3:
+            st.metric("Clauses Found", results['clauses_found'])
+        with col4:
+            st.metric("Word Count", results['word_count'])
+        # Risk analysis
+        if results['risk_terms']:
+            st.subheader("⚠️ Risk Analysis")
+            for risk_level, terms in results['risk_terms'].items():
+                risk_display = risk_level.replace('_', ' ').title()
+                color = {
+                    'high_risk': 'red',
+                    'medium_risk': 'orange',
+                    'low_risk': 'green'
+                }.get(risk_level, 'gray')
+                st.write(f"**{risk_display}**: {', '.join(terms)}")
+        # Clauses display
+        if st.session_state.clauses:
+            st.subheader(f"📑 Extracted Clauses ({len(st.session_state.clauses)})")
+            for i, clause in enumerate(st.session_state.clauses[:10], 1):
+                with st.expander(f"Clause {i} ({len(clause)} chars)"):
+                    st.write(clause)
+            if len(st.session_state.clauses) > 10:
+                st.info(f"Showing first 10 of {len(st.session_state.clauses)} clauses")
+        # AI Analysis Section (optional)
+        st.subheader("🤖 AI Analysis (Optional)")
+        if st.button("Generate AI Summary", key="ai_summary"):
+            if len(st.session_state.text_data) > 100:
+                with st.spinner("AI is analyzing..."):
+                    prompt = f"Provide a concise summary of this legal document:\n\n{st.session_state.text_data[:1000]}"
+                    ai_summary = simple_llm_generate(prompt, max_length=300)
+                    st.write(ai_summary)
+            else:
+                st.warning("Document too short for AI analysis")
 else:
+    # Welcome screen
     st.markdown("""
+    ## 👋 Welcome to ClauseWise!
+    A lightweight legal document analyzer optimized for Hugging Face Spaces.
+    ### 🚀 How to use:
+    1. **Upload a document** (PDF, DOCX, TXT) in the sidebar **OR**
     2. **Paste your legal text** in the text area
+    3. Click **"Analyze Document"** to process
+    4. Review the automated analysis results
+    ### 📋 What it analyzes:
+    - **Document type** (NDA, Employment, Lease, etc.)
+    - **Risk terms** and potential issues
+    - **Clause extraction** and organization
+    - **Basic statistics** and metrics
+    ### 🧪 Try this sample text:
+    ```
+    This Non-Disclosure Agreement (the "Agreement") is entered into between
+    Company ABC ("Disclosing Party") and John Smith ("Receiving Party").
+    The Receiving Party agrees to maintain the confidentiality of all
+    proprietary information disclosed under this Agreement for a period
+    of three years following termination. Any breach of this Agreement
+    may result in legal action and liability for damages.
+    ```
+    ### ⚠️ Important Notes:
+    - Uses rule-based analysis for reliability
+    - Optional AI features use small, fast models
+    - Works best with clear legal text
+    - Free and open source
     """)
+# Footer
 st.markdown("---")
+st.caption("🔒 ClauseWise Demo | Optimized for Hugging Face Spaces | No data stored")
+# Add some custom CSS to make it look nicer
+st.markdown("""
+<style>
+    .main .block-container {
+        padding-top: 2rem;
+    }
+    .stButton button {
+        width: 100%;
+    }
+</style>
+""", unsafe_allow_html=True)