import streamlit as st import PyPDF2 from docx import Document import json from google import genai from dotenv import load_dotenv import os import re # Load API Key from .env load_dotenv() api_key = os.getenv("GEMINI_API_KEY") if not api_key: st.error("❌ Gemini API key not found in .env.") st.stop() # Utility: Extract text from PDF def extract_text_from_pdf(file): reader = PyPDF2.PdfReader(file) text = "" for page in reader.pages: content = page.extract_text() if content: text += content + "\n" return text.strip() # Utility: Extract text from DOCX def extract_text_from_docx(file): doc = Document(file) return "\n".join([para.text for para in doc.paragraphs]).strip() # Utility: Parse Gemini JSON response def safe_parse_json(response_text): try: clean_text = re.sub(r"^```(?:json)?|```$", "", response_text.strip(), flags=re.MULTILINE) return json.loads(clean_text) except Exception as e: st.error("⚠️ Could not parse Gemini response as JSON. Showing raw response.") return { "summary": response_text, "highlights": None, "glossary": None } # Call Gemini API def call_gemini_api(document_text): client = genai.Client(api_key=api_key) prompt = ( f"Analyze the following legal document:\n\n{document_text}\n\n" "Instructions:\n" "- Summarize the key points of the document.\n" "- Highlight obligations, rights, and critical clauses (as a list of objects with 'clause' and 'description').\n" "- Provide simplified explanations of complex legal terms (as a dictionary).\n" "Return the result as JSON with keys: 'summary', 'highlights', 'glossary'." ) response = client.models.generate_content( model="gemini-2.0-flash", contents=prompt ) return safe_parse_json(response.text) # Render Highlights Beautifully def render_highlights(highlights): if isinstance(highlights, list) and all(isinstance(item, dict) for item in highlights): for idx, item in enumerate(highlights, 1): clause = item.get("clause", "").strip() desc = item.get("description", "").strip() if clause and desc: st.markdown(f"""