Spaces:

arshad1234321
/

finTech

Runtime error

File size: 4,687 Bytes

92f1ec3
 
 
 
 
6f4d51b
 
 
92f1ec3
6f4d51b
 
 
92f1ec3
6f4d51b
 
 
 
 
92f1ec3
6f4d51b
92f1ec3
6f4d51b
 
 
 
 
92f1ec3
6f4d51b
92f1ec3
6f4d51b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92f1ec3
6f4d51b
92f1ec3
6f4d51b
92f1ec3
6f4d51b
 
 
 
92f1ec3
6f4d51b
92f1ec3
 
6f4d51b
92f1ec3
 
6f4d51b
92f1ec3
6f4d51b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92f1ec3
6f4d51b
 
 
92f1ec3
6f4d51b
92f1ec3
6f4d51b
92f1ec3
 
 
 
 
6f4d51b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92f1ec3

import streamlit as st
import PyPDF2
from docx import Document
import json
from google import genai
from dotenv import load_dotenv
import os
import re

# Load API Key from .env
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

if not api_key:
    st.error("❌ Gemini API key not found in .env.")
    st.stop()

# Utility: Extract text from PDF
def extract_text_from_pdf(file):
    reader = PyPDF2.PdfReader(file)
    text = ""
    for page in reader.pages:
        content = page.extract_text()
        if content:
            text += content + "\n"
    return text.strip()

# Utility: Extract text from DOCX
def extract_text_from_docx(file):
    doc = Document(file)
    return "\n".join([para.text for para in doc.paragraphs]).strip()

# Utility: Parse Gemini JSON response
def safe_parse_json(response_text):
    try:
        clean_text = re.sub(r"^```(?:json)?|```$", "", response_text.strip(), flags=re.MULTILINE)
        return json.loads(clean_text)
    except Exception as e:
        st.error("⚠️ Could not parse Gemini response as JSON. Showing raw response.")
        return {
            "summary": response_text,
            "highlights": None,
            "glossary": None
        }

# Call Gemini API
def call_gemini_api(document_text):
    client = genai.Client(api_key=api_key)

    prompt = (
        f"Analyze the following legal document:\n\n{document_text}\n\n"
        "Instructions:\n"
        "- Summarize the key points of the document.\n"
        "- Highlight obligations, rights, and critical clauses (as a list of objects with 'clause' and 'description').\n"
        "- Provide simplified explanations of complex legal terms (as a dictionary).\n"
        "Return the result as JSON with keys: 'summary', 'highlights', 'glossary'."
    )

    response = client.models.generate_content(
        model="gemini-2.0-flash",
        contents=prompt
    )

    return safe_parse_json(response.text)

# Render Highlights Beautifully
def render_highlights(highlights):
    if isinstance(highlights, list) and all(isinstance(item, dict) for item in highlights):
        for idx, item in enumerate(highlights, 1):
            clause = item.get("clause", "").strip()
            desc = item.get("description", "").strip()
            if clause and desc:
                st.markdown(f"""
                <div style="background-color:#f5f5f5;padding:10px;border-radius:8px;margin-bottom:10px">
                    <strong>{idx}. {clause}</strong><br>
                    <span style="font-size: 0.95rem;">{desc}</span>
                </div>
                """, unsafe_allow_html=True)
    elif isinstance(highlights, str):
        st.markdown(highlights)
    else:
        st.info("No highlights available.")

# Render Glossary Beautifully
def render_glossary(glossary):
    if isinstance(glossary, dict):
        for term, explanation in glossary.items():
            st.markdown(f"""
            <div style="margin-bottom: 8px;">
                <strong>{term}:</strong> {explanation}
            </div>
            """, unsafe_allow_html=True)
    elif isinstance(glossary, str):
        st.markdown(glossary)
    else:
        st.info("No glossary available.")

# Main App
def main():
    st.set_page_config(page_title="Legal Document Summarizer", layout="wide")
    st.title("📄 Legal Document Summarizer")
    st.caption("Upload a legal document (PDF or DOCX) to get a summary, key highlights, and glossary of legal terms.")

    uploaded_file = st.file_uploader("Upload your document", type=["pdf", "docx"])

    if uploaded_file:
        if uploaded_file.type == "application/pdf":
            document_text = extract_text_from_pdf(uploaded_file)
        elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            document_text = extract_text_from_docx(uploaded_file)
        else:
            st.error("Unsupported file format.")
            return

        if not document_text.strip():
            st.error("No text extracted from the document.")
            return

        st.subheader("📄 Document Preview")
        st.text_area("Extracted Text", document_text, height=300)

        if st.button("Summarize Document"):
            with st.spinner("Calling Gemini..."):
                result = call_gemini_api(document_text)

                st.subheader("📝 Summary")
                st.write(result.get("summary", "No summary found."))

                st.subheader("📌 Highlights")
                render_highlights(result.get("highlights"))

                st.subheader("📘 Glossary")
                render_glossary(result.get("glossary"))

if __name__ == "__main__":
    main()