arshad1234321 commited on
Commit
81a7b62
Β·
verified Β·
1 Parent(s): 6d3e714

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -0
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import PyPDF2
3
+ from docx import Document
4
+ import json
5
+ from google import genai
6
+ from dotenv import load_dotenv
7
+ import os
8
+ import re
9
+ import pandas as pd
10
+
11
+ # Load API Key from .env or environment variable (for Hugging Face Spaces)
12
+ load_dotenv()
13
+ api_key = os.getenv("GEMINI_API_KEY")
14
+
15
+ if not api_key:
16
+ st.error("❌ Gemini API key not found. Please set GEMINI_API_KEY.")
17
+ st.stop()
18
+
19
+ # Utility: Extract text from PDF
20
+ def extract_text_from_pdf(file):
21
+ reader = PyPDF2.PdfReader(file)
22
+ text = ""
23
+ for page in reader.pages:
24
+ content = page.extract_text()
25
+ if content:
26
+ text += content + "\n"
27
+ return text.strip()
28
+
29
+ # Utility: Extract text from DOCX
30
+ def extract_text_from_docx(file):
31
+ doc = Document(file)
32
+ return "\n".join([para.text for para in doc.paragraphs]).strip()
33
+
34
+ # Parse Gemini JSON response
35
+ def safe_parse_json(response_text):
36
+ try:
37
+ clean_text = re.sub(r"^```(?:json)?|```$", "", response_text.strip(), flags=re.MULTILINE)
38
+ return json.loads(clean_text)
39
+ except Exception as e:
40
+ st.error("⚠️ Could not parse Gemini response as JSON. Showing raw response.")
41
+ return {
42
+ "summary": response_text,
43
+ "highlights": None,
44
+ "glossary": None
45
+ }
46
+
47
+ # Call Gemini API
48
+ def call_gemini_api(document_text):
49
+ client = genai.Client(api_key=api_key)
50
+
51
+ prompt = (
52
+ f"Analyze the following legal document:\n\n{document_text}\n\n"
53
+ "Instructions:\n"
54
+ "- Summarize the key points of the document.\n"
55
+ "- Highlight obligations, rights, and critical clauses (as a list of objects with 'clause' and 'description').\n"
56
+ "- Provide simplified explanations of complex legal terms (as a dictionary).\n"
57
+ "Return the result as JSON with keys: 'summary', 'highlights', 'glossary'."
58
+ )
59
+
60
+ response = client.models.generate_content(
61
+ model="gemini-2.0-flash",
62
+ contents=prompt
63
+ )
64
+
65
+ return safe_parse_json(response.text)
66
+
67
+ # Render Highlights as Table
68
+ def render_highlights(highlights):
69
+ if isinstance(highlights, list) and all(isinstance(item, dict) for item in highlights):
70
+ df = pd.DataFrame(highlights)
71
+ st.table(df)
72
+ elif isinstance(highlights, str):
73
+ st.markdown(highlights)
74
+ else:
75
+ st.info("No highlights available.")
76
+
77
+ # Render Glossary as Table
78
+ def render_glossary(glossary):
79
+ if isinstance(glossary, dict):
80
+ glossary_list = [{"Term": term, "Explanation": explanation} for term, explanation in glossary.items()]
81
+ df = pd.DataFrame(glossary_list)
82
+ st.table(df)
83
+ elif isinstance(glossary, str):
84
+ st.markdown(glossary)
85
+ else:
86
+ st.info("No glossary available.")
87
+
88
+ # Main App
89
+ def main():
90
+ st.set_page_config(page_title="Legal Document Summarizer", layout="wide")
91
+ st.title("πŸ“„ Legal Document Summarizer")
92
+ st.caption("Upload a legal document (PDF or DOCX) to get a summary, key highlights, and glossary of legal terms.")
93
+
94
+ uploaded_file = st.file_uploader("Upload your document", type=["pdf", "docx"])
95
+
96
+ if uploaded_file:
97
+ if uploaded_file.type == "application/pdf":
98
+ document_text = extract_text_from_pdf(uploaded_file)
99
+ elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
100
+ document_text = extract_text_from_docx(uploaded_file)
101
+ else:
102
+ st.error("Unsupported file format.")
103
+ return
104
+
105
+ if not document_text.strip():
106
+ st.error("No text extracted from the document.")
107
+ return
108
+
109
+ st.subheader("πŸ“„ Document Preview")
110
+ st.text_area("Extracted Text", document_text, height=300)
111
+
112
+ if st.button("Summarize Document"):
113
+ with st.spinner("Calling Gemini..."):
114
+ result = call_gemini_api(document_text)
115
+
116
+ st.subheader("πŸ“ Summary")
117
+ st.write(result.get("summary", "No summary found."))
118
+
119
+ st.subheader("πŸ“Œ Highlights")
120
+ render_highlights(result.get("highlights"))
121
+
122
+ st.subheader("πŸ“˜ Glossary")
123
+ render_glossary(result.get("glossary"))
124
+
125
+ if __name__ == "__main__":
126
+ main()