Spaces:
Runtime error
Runtime error
File size: 4,687 Bytes
92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 6f4d51b 92f1ec3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import streamlit as st
import PyPDF2
from docx import Document
import json
from google import genai
from dotenv import load_dotenv
import os
import re
# Load API Key from .env
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
st.error("β Gemini API key not found in .env.")
st.stop()
# Utility: Extract text from PDF
def extract_text_from_pdf(file):
reader = PyPDF2.PdfReader(file)
text = ""
for page in reader.pages:
content = page.extract_text()
if content:
text += content + "\n"
return text.strip()
# Utility: Extract text from DOCX
def extract_text_from_docx(file):
doc = Document(file)
return "\n".join([para.text for para in doc.paragraphs]).strip()
# Utility: Parse Gemini JSON response
def safe_parse_json(response_text):
try:
clean_text = re.sub(r"^```(?:json)?|```$", "", response_text.strip(), flags=re.MULTILINE)
return json.loads(clean_text)
except Exception as e:
st.error("β οΈ Could not parse Gemini response as JSON. Showing raw response.")
return {
"summary": response_text,
"highlights": None,
"glossary": None
}
# Call Gemini API
def call_gemini_api(document_text):
client = genai.Client(api_key=api_key)
prompt = (
f"Analyze the following legal document:\n\n{document_text}\n\n"
"Instructions:\n"
"- Summarize the key points of the document.\n"
"- Highlight obligations, rights, and critical clauses (as a list of objects with 'clause' and 'description').\n"
"- Provide simplified explanations of complex legal terms (as a dictionary).\n"
"Return the result as JSON with keys: 'summary', 'highlights', 'glossary'."
)
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=prompt
)
return safe_parse_json(response.text)
# Render Highlights Beautifully
def render_highlights(highlights):
if isinstance(highlights, list) and all(isinstance(item, dict) for item in highlights):
for idx, item in enumerate(highlights, 1):
clause = item.get("clause", "").strip()
desc = item.get("description", "").strip()
if clause and desc:
st.markdown(f"""
<div style="background-color:#f5f5f5;padding:10px;border-radius:8px;margin-bottom:10px">
<strong>{idx}. {clause}</strong><br>
<span style="font-size: 0.95rem;">{desc}</span>
</div>
""", unsafe_allow_html=True)
elif isinstance(highlights, str):
st.markdown(highlights)
else:
st.info("No highlights available.")
# Render Glossary Beautifully
def render_glossary(glossary):
if isinstance(glossary, dict):
for term, explanation in glossary.items():
st.markdown(f"""
<div style="margin-bottom: 8px;">
<strong>{term}:</strong> {explanation}
</div>
""", unsafe_allow_html=True)
elif isinstance(glossary, str):
st.markdown(glossary)
else:
st.info("No glossary available.")
# Main App
def main():
st.set_page_config(page_title="Legal Document Summarizer", layout="wide")
st.title("π Legal Document Summarizer")
st.caption("Upload a legal document (PDF or DOCX) to get a summary, key highlights, and glossary of legal terms.")
uploaded_file = st.file_uploader("Upload your document", type=["pdf", "docx"])
if uploaded_file:
if uploaded_file.type == "application/pdf":
document_text = extract_text_from_pdf(uploaded_file)
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
document_text = extract_text_from_docx(uploaded_file)
else:
st.error("Unsupported file format.")
return
if not document_text.strip():
st.error("No text extracted from the document.")
return
st.subheader("π Document Preview")
st.text_area("Extracted Text", document_text, height=300)
if st.button("Summarize Document"):
with st.spinner("Calling Gemini..."):
result = call_gemini_api(document_text)
st.subheader("π Summary")
st.write(result.get("summary", "No summary found."))
st.subheader("π Highlights")
render_highlights(result.get("highlights"))
st.subheader("π Glossary")
render_glossary(result.get("glossary"))
if __name__ == "__main__":
main()
|