mangalathkedar commited on
Commit
67de82a
·
verified ·
1 Parent(s): 2d441c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -91
app.py CHANGED
@@ -3,9 +3,9 @@ import streamlit as st
3
  from openai import OpenAI
4
  import os
5
  from dotenv import load_dotenv
 
6
  from PIL import Image
7
  import io
8
- import fitz # PyMuPDF
9
  import tempfile
10
 
11
  # Load environment variables
@@ -14,43 +14,24 @@ load_dotenv()
14
  client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
15
 
16
  def convert_pdf_to_images(pdf_file):
17
- """Convert PDF to list of images using PyMuPDF"""
18
- images = []
19
  with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
20
  tmp_file.write(pdf_file.getvalue())
21
  pdf_path = tmp_file.name
22
-
23
- pdf_document = fitz.open(pdf_path)
24
- for page_number in range(pdf_document.page_count):
25
- page = pdf_document[page_number]
26
- pix = page.get_pixmap()
27
- img_data = pix.tobytes("png")
28
- image = Image.open(io.BytesIO(img_data))
29
- images.append(image)
30
-
31
- pdf_document.close()
32
  os.unlink(pdf_path)
33
  return images
34
 
35
  def format_response(text):
36
  """Format the analysis response with clean styling"""
37
- formatted_text = """
38
- <div style="
39
- background-color: white;
40
- padding: 20px;
41
- border-radius: 5px;
42
- font-family: Arial, sans-serif;
43
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
44
- ">
45
- """
46
 
47
  # Split into pages
48
  pages = text.split("Page")
49
 
50
  for page_num, page_content in enumerate(pages[1:], 1): # Skip first empty split
51
- # Add page header
52
- formatted_text += f'<div style="margin-bottom: 30px;">'
53
- formatted_text += f'<h3 style="color: #2c3e50; margin-bottom: 15px;">Page {page_num}</h3>'
54
 
55
  # Process each line
56
  lines = page_content.split('\n')
@@ -62,18 +43,8 @@ def format_response(text):
62
 
63
  if ':' in line:
64
  label, value = line.split(':', 1)
65
- formatted_text += f'<div style="margin-bottom: 10px; display: flex;">'
66
- formatted_text += f'<span style="font-weight: bold; color: #2c3e50; min-width: 200px;">{label.strip()}</span>'
67
- formatted_text += f'<span style="color: #34495e; flex: 1;">{value.strip()}</span>'
68
- formatted_text += '</div>'
69
-
70
- formatted_text += '</div>'
71
 
72
- # Add separator between pages except for the last page
73
- if page_num < len(pages) - 1:
74
- formatted_text += '<hr style="border: 1px solid #eee; margin: 20px 0;">'
75
-
76
- formatted_text += '</div>'
77
  return formatted_text
78
 
79
  def analyze_image(image):
@@ -121,62 +92,34 @@ def analyze_image(image):
121
 
122
  def main():
123
  st.set_page_config(page_title="Document Analysis App", layout="wide")
124
-
125
- # Custom CSS to set light background and improve button styling
126
- st.markdown("""
127
- <style>
128
- .stApp {
129
- background-color: white;
130
- }
131
- .stButton>button {
132
- width: 100%;
133
- background-color: #2c3e50;
134
- color: white;
135
- border: none;
136
- padding: 10px 20px;
137
- border-radius: 5px;
138
- margin-top: 20px;
139
- margin-bottom: 20px;
140
- }
141
- .stButton>button:hover {
142
- background-color: #34495e;
143
- }
144
- .uploadedFile {
145
- margin-bottom: 20px;
146
- }
147
- </style>
148
- """, unsafe_allow_html=True)
149
-
150
- col1, col2, col3 = st.columns([1,2,1])
151
- with col2:
152
- st.title("Document Analysis App")
153
-
154
- uploaded_file = st.file_uploader("Upload document (PDF/Image)", type=['pdf', 'png', 'jpg', 'jpeg'])
155
-
156
- if uploaded_file is not None:
157
- if uploaded_file.type == "application/pdf":
158
- # Handle PDF
159
- with st.spinner("Processing PDF..."):
160
- images = convert_pdf_to_images(uploaded_file)
161
-
162
- if st.button("Extract Information"):
163
- with st.spinner("Analyzing document..."):
164
- all_results = []
165
- for i, image in enumerate(images, 1):
166
- result = analyze_image(image)
167
- all_results.append(f"Page {i} Information:\n{result}")
168
-
169
- combined_results = "\n\n".join(all_results)
170
- st.markdown(format_response(combined_results), unsafe_allow_html=True)
171
-
172
- else:
173
- # Handle single image
174
- image = Image.open(uploaded_file)
175
-
176
  if st.button("Extract Information"):
177
  with st.spinner("Analyzing document..."):
178
- result = analyze_image(image)
179
- st.markdown(format_response(result), unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  if __name__ == "__main__":
182
- main()
 
3
  from openai import OpenAI
4
  import os
5
  from dotenv import load_dotenv
6
+ import pdf2image
7
  from PIL import Image
8
  import io
 
9
  import tempfile
10
 
11
  # Load environment variables
 
14
  client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
15
 
16
  def convert_pdf_to_images(pdf_file):
17
+ """Convert PDF to list of images"""
 
18
  with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
19
  tmp_file.write(pdf_file.getvalue())
20
  pdf_path = tmp_file.name
21
+
22
+ images = pdf2image.convert_from_path(pdf_path)
 
 
 
 
 
 
 
 
23
  os.unlink(pdf_path)
24
  return images
25
 
26
  def format_response(text):
27
  """Format the analysis response with clean styling"""
28
+ formatted_text = ""
 
 
 
 
 
 
 
 
29
 
30
  # Split into pages
31
  pages = text.split("Page")
32
 
33
  for page_num, page_content in enumerate(pages[1:], 1): # Skip first empty split
34
+ formatted_text += f'\n### Page {page_num}\n'
 
 
35
 
36
  # Process each line
37
  lines = page_content.split('\n')
 
43
 
44
  if ':' in line:
45
  label, value = line.split(':', 1)
46
+ formatted_text += f'- **{label.strip()}**: {value.strip()}\n'
 
 
 
 
 
47
 
 
 
 
 
 
48
  return formatted_text
49
 
50
  def analyze_image(image):
 
92
 
93
  def main():
94
  st.set_page_config(page_title="Document Analysis App", layout="wide")
95
+
96
+ st.title("Document Analysis App")
97
+ uploaded_file = st.file_uploader("Upload document (PDF/Image)", type=['pdf', 'png', 'jpg', 'jpeg'])
98
+
99
+ if uploaded_file is not None:
100
+ if uploaded_file.type == "application/pdf":
101
+ # Handle PDF
102
+ with st.spinner("Processing PDF..."):
103
+ images = convert_pdf_to_images(uploaded_file)
104
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  if st.button("Extract Information"):
106
  with st.spinner("Analyzing document..."):
107
+ all_results = []
108
+ for i, image in enumerate(images, 1):
109
+ result = analyze_image(image)
110
+ all_results.append(f"Page {i} Information:\n{result}")
111
+
112
+ combined_results = "\n\n".join(all_results)
113
+ st.markdown(format_response(combined_results))
114
+
115
+ else:
116
+ # Handle single image
117
+ image = Image.open(uploaded_file)
118
+
119
+ if st.button("Extract Information"):
120
+ with st.spinner("Analyzing document..."):
121
+ result = analyze_image(image)
122
+ st.markdown(format_response(result))
123
 
124
  if __name__ == "__main__":
125
+ main()