import streamlit as st import PyPDF2 from docx import Document import json from google import genai from dotenv import load_dotenv import os import re # Load API Key from .env load_dotenv() api_key = os.getenv("GEMINI_API_KEY") if not api_key: st.error("❌ Gemini API key not found in .env.") st.stop() # Utility: Extract text from PDF def extract_text_from_pdf(file): reader = PyPDF2.PdfReader(file) text = "" for page in reader.pages: content = page.extract_text() if content: text += content + "\n" return text.strip() # Utility: Extract text from DOCX def extract_text_from_docx(file): doc = Document(file) return "\n".join([para.text for para in doc.paragraphs]).strip() # Utility: Parse Gemini JSON response def safe_parse_json(response_text): try: clean_text = re.sub(r"^```(?:json)?|```$", "", response_text.strip(), flags=re.MULTILINE) return json.loads(clean_text) except Exception as e: st.error("⚠️ Could not parse Gemini response as JSON. Showing raw response.") return { "summary": response_text, "highlights": None, "glossary": None } # Call Gemini API def call_gemini_api(document_text): client = genai.Client(api_key=api_key) prompt = ( f"Analyze the following legal document:\n\n{document_text}\n\n" "Instructions:\n" "- Summarize the key points of the document.\n" "- Highlight obligations, rights, and critical clauses (as a list of objects with 'clause' and 'description').\n" "- Provide simplified explanations of complex legal terms (as a dictionary).\n" "Return the result as JSON with keys: 'summary', 'highlights', 'glossary'." ) response = client.models.generate_content( model="gemini-2.0-flash", contents=prompt ) return safe_parse_json(response.text) # Render Highlights Beautifully def render_highlights(highlights): if isinstance(highlights, list) and all(isinstance(item, dict) for item in highlights): for idx, item in enumerate(highlights, 1): clause = item.get("clause", "").strip() desc = item.get("description", "").strip() if clause and desc: st.markdown(f"""
{idx}. {clause}
{desc}
""", unsafe_allow_html=True) elif isinstance(highlights, str): st.markdown(highlights) else: st.info("No highlights available.") # Render Glossary Beautifully def render_glossary(glossary): if isinstance(glossary, dict): for term, explanation in glossary.items(): st.markdown(f"""
{term}: {explanation}
""", unsafe_allow_html=True) elif isinstance(glossary, str): st.markdown(glossary) else: st.info("No glossary available.") # Main App def main(): st.set_page_config(page_title="Legal Document Summarizer", layout="wide") st.title("📄 Legal Document Summarizer") st.caption("Upload a legal document (PDF or DOCX) to get a summary, key highlights, and glossary of legal terms.") uploaded_file = st.file_uploader("Upload your document", type=["pdf", "docx"]) if uploaded_file: if uploaded_file.type == "application/pdf": document_text = extract_text_from_pdf(uploaded_file) elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": document_text = extract_text_from_docx(uploaded_file) else: st.error("Unsupported file format.") return if not document_text.strip(): st.error("No text extracted from the document.") return st.subheader("📄 Document Preview") st.text_area("Extracted Text", document_text, height=300) if st.button("Summarize Document"): with st.spinner("Calling Gemini..."): result = call_gemini_api(document_text) st.subheader("📝 Summary") st.write(result.get("summary", "No summary found.")) st.subheader("📌 Highlights") render_highlights(result.get("highlights")) st.subheader("📘 Glossary") render_glossary(result.get("glossary")) if __name__ == "__main__": main()