File size: 4,687 Bytes
1f23c0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import streamlit as st
import PyPDF2
from docx import Document
import json
from google import genai
from dotenv import load_dotenv
import os
import re

# Load API Key from .env
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

if not api_key:
    st.error("❌ Gemini API key not found in .env.")
    st.stop()

# Utility: Extract text from PDF
def extract_text_from_pdf(file):
    reader = PyPDF2.PdfReader(file)
    text = ""
    for page in reader.pages:
        content = page.extract_text()
        if content:
            text += content + "\n"
    return text.strip()

# Utility: Extract text from DOCX
def extract_text_from_docx(file):
    doc = Document(file)
    return "\n".join([para.text for para in doc.paragraphs]).strip()

# Utility: Parse Gemini JSON response
def safe_parse_json(response_text):
    try:
        clean_text = re.sub(r"^```(?:json)?|```$", "", response_text.strip(), flags=re.MULTILINE)
        return json.loads(clean_text)
    except Exception as e:
        st.error("⚠️ Could not parse Gemini response as JSON. Showing raw response.")
        return {
            "summary": response_text,
            "highlights": None,
            "glossary": None
        }

# Call Gemini API
def call_gemini_api(document_text):
    client = genai.Client(api_key=api_key)

    prompt = (
        f"Analyze the following legal document:\n\n{document_text}\n\n"
        "Instructions:\n"
        "- Summarize the key points of the document.\n"
        "- Highlight obligations, rights, and critical clauses (as a list of objects with 'clause' and 'description').\n"
        "- Provide simplified explanations of complex legal terms (as a dictionary).\n"
        "Return the result as JSON with keys: 'summary', 'highlights', 'glossary'."
    )

    response = client.models.generate_content(
        model="gemini-2.0-flash",
        contents=prompt
    )

    return safe_parse_json(response.text)

# Render Highlights Beautifully
def render_highlights(highlights):
    if isinstance(highlights, list) and all(isinstance(item, dict) for item in highlights):
        for idx, item in enumerate(highlights, 1):
            clause = item.get("clause", "").strip()
            desc = item.get("description", "").strip()
            if clause and desc:
                st.markdown(f"""
                <div style="background-color:#f5f5f5;padding:10px;border-radius:8px;margin-bottom:10px">
                    <strong>{idx}. {clause}</strong><br>
                    <span style="font-size: 0.95rem;">{desc}</span>
                </div>
                """, unsafe_allow_html=True)
    elif isinstance(highlights, str):
        st.markdown(highlights)
    else:
        st.info("No highlights available.")

# Render Glossary Beautifully
def render_glossary(glossary):
    if isinstance(glossary, dict):
        for term, explanation in glossary.items():
            st.markdown(f"""
            <div style="margin-bottom: 8px;">
                <strong>{term}:</strong> {explanation}
            </div>
            """, unsafe_allow_html=True)
    elif isinstance(glossary, str):
        st.markdown(glossary)
    else:
        st.info("No glossary available.")

# Main App
def main():
    st.set_page_config(page_title="Legal Document Summarizer", layout="wide")
    st.title("πŸ“„ Legal Document Summarizer")
    st.caption("Upload a legal document (PDF or DOCX) to get a summary, key highlights, and glossary of legal terms.")

    uploaded_file = st.file_uploader("Upload your document", type=["pdf", "docx"])

    if uploaded_file:
        if uploaded_file.type == "application/pdf":
            document_text = extract_text_from_pdf(uploaded_file)
        elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            document_text = extract_text_from_docx(uploaded_file)
        else:
            st.error("Unsupported file format.")
            return

        if not document_text.strip():
            st.error("No text extracted from the document.")
            return

        st.subheader("πŸ“„ Document Preview")
        st.text_area("Extracted Text", document_text, height=300)

        if st.button("Summarize Document"):
            with st.spinner("Calling Gemini..."):
                result = call_gemini_api(document_text)

                st.subheader("πŸ“ Summary")
                st.write(result.get("summary", "No summary found."))

                st.subheader("πŸ“Œ Highlights")
                render_highlights(result.get("highlights"))

                st.subheader("πŸ“˜ Glossary")
                render_glossary(result.get("glossary"))

if __name__ == "__main__":
    main()