arshad1234321 commited on
Commit
92f1ec3
·
verified ·
1 Parent(s): e0ce697

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -0
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import PyPDF2
3
+ from docx import Document
4
+ import json
5
+ from google import genai
6
+
7
+ # -------------------------------
8
+ # Utility Functions
9
+ # -------------------------------
10
+
11
+ def extract_text_from_pdf(file):
12
+ """
13
+ Extracts textual content from a PDF document.
14
+ """
15
+ pdf_reader = PyPDF2.PdfReader(file)
16
+ text = ""
17
+ for page in pdf_reader.pages:
18
+ page_text = page.extract_text()
19
+ if page_text:
20
+ text += page_text + "\n"
21
+ return text
22
+
23
+ def extract_text_from_docx(file):
24
+ """
25
+ Extracts textual content from a DOCX document.
26
+ """
27
+ document = Document(file)
28
+ text = ""
29
+ for para in document.paragraphs:
30
+ text += para.text + "\n"
31
+ return text
32
+
33
+ def call_gemini_api(document_content):
34
+ """
35
+ Calls the Google GenAI Gemini API (gemini-2.0-flash) with a prompt to analyze and summarize
36
+ the legal document, extracting key points, highlighting obligations/rights, and simplifying
37
+ complex legal terms.
38
+
39
+ The prompt instructs the model to return the output in JSON format with three keys:
40
+ - summary: A concise summary of the document.
41
+ - highlights: Key obligations, rights, and clauses.
42
+ - glossary: Simplified explanations of complex legal terms.
43
+ """
44
+ # Initialize the Gemini client using your API key stored in Streamlit secrets.
45
+ api_key = st.secrets["GEMINI_API_KEY"]
46
+ client = genai.Client(api_key=api_key)
47
+
48
+ # Construct the prompt with clear instructions
49
+ prompt = (
50
+ f"Analyze the following legal document:\n\n"
51
+ f"{document_content}\n\n"
52
+ "Instructions:\n"
53
+ "1. Summarize the key points of the document.\n"
54
+ "2. Highlight obligations, rights, and critical clauses.\n"
55
+ "3. Provide simplified explanations of complex legal terms.\n"
56
+ "Output the results as a valid JSON object with the following keys: "
57
+ "'summary', 'highlights', 'glossary'."
58
+ )
59
+
60
+ # Call the Gemini API using the google.genai client
61
+ response = client.models.generate_content(
62
+ model="gemini-2.0-flash",
63
+ contents=prompt,
64
+ )
65
+
66
+ # Try parsing the output JSON; if parsing fails, return the text as the summary.
67
+ try:
68
+ result = json.loads(response.text)
69
+ except Exception as e:
70
+ st.error("Failed to parse Gemini API response as JSON. Returning raw text instead.")
71
+ result = {"summary": response.text, "highlights": "N/A", "glossary": "N/A"}
72
+
73
+ return result
74
+
75
+ # -------------------------------
76
+ # Main Application
77
+ # -------------------------------
78
+
79
+ def main():
80
+ st.title("Legal Document Summarizer")
81
+ st.write("Upload a legal document (PDF or DOCX) to receive a concise summary, key highlights, and a glossary of complex legal terms.")
82
+
83
+ uploaded_file = st.file_uploader("Upload Legal Document", type=["pdf", "docx"])
84
+
85
+ if uploaded_file is not None:
86
+ # Display file details
87
+ file_details = {
88
+ "Filename": uploaded_file.name,
89
+ "File Type": uploaded_file.type,
90
+ "File Size (bytes)": uploaded_file.size
91
+ }
92
+ st.write("**Uploaded File Details**", file_details)
93
+
94
+ # Extract text from the document based on the file type
95
+ document_text = ""
96
+ if uploaded_file.type == "application/pdf":
97
+ document_text = extract_text_from_pdf(uploaded_file)
98
+ elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
99
+ document_text = extract_text_from_docx(uploaded_file)
100
+ else:
101
+ st.error("Unsupported file type.")
102
+
103
+ if document_text.strip():
104
+ st.subheader("Extracted Document Text")
105
+ st.text_area("Document Text", document_text, height=300)
106
+
107
+ if st.button("Summarize Document"):
108
+ with st.spinner("Analyzing document via Gemini API..."):
109
+ result = call_gemini_api(document_text)
110
+ if result:
111
+ summary = result.get("summary", "No summary provided by the API.")
112
+ highlights = result.get("highlights", "No highlights provided by the API.")
113
+ glossary = result.get("glossary", "No glossary provided by the API.")
114
+
115
+ st.subheader("Document Summary")
116
+ st.write(summary)
117
+
118
+ st.subheader("Highlights (Obligations, Rights, Critical Clauses)")
119
+ st.write(highlights)
120
+
121
+ st.subheader("Glossary (Simplified Legal Terms)")
122
+ st.write(glossary)
123
+ else:
124
+ st.error("Failed to retrieve a valid response from the Gemini API.")
125
+ else:
126
+ st.error("No text could be extracted from the document.")
127
+
128
+ if __name__ == "__main__":
129
+ main()