mangalathkedar commited on
Commit
fa86a32
·
verified ·
1 Parent(s): c124b82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -116
app.py CHANGED
@@ -1,14 +1,17 @@
1
  import base64
2
  import streamlit as st
3
- import openai
4
  import os
 
 
5
  from PIL import Image
6
  import io
7
  import tempfile
8
- import pdf2image
9
 
10
- # Set OpenAI API key from Streamlit secrets
11
- openai.api_key = st.secrets["OPENAI_API_KEY"]
 
 
12
 
13
  def convert_pdf_to_images(pdf_file):
14
  """Convert PDF to list of images"""
@@ -16,35 +19,35 @@ def convert_pdf_to_images(pdf_file):
16
  tmp_file.write(pdf_file.getvalue())
17
  pdf_path = tmp_file.name
18
 
19
- try:
20
- images = pdf2image.convert_from_path(pdf_path)
21
- os.unlink(pdf_path)
22
- return images
23
- except Exception as e:
24
- st.error(f"Error converting PDF: {str(e)}")
25
- return []
26
 
27
  def format_response(text):
28
- """Format the analysis response with clean styling"""
29
  formatted_text = """
30
  <div style="
31
  background-color: white;
32
- padding: 20px;
33
- border-radius: 5px;
34
  font-family: Arial, sans-serif;
35
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
 
36
  ">
37
  """
38
 
39
- # Split into pages
40
  pages = text.split("Page")
41
 
42
  for page_num, page_content in enumerate(pages[1:], 1):
43
- # Add page header
44
- formatted_text += f'<div style="margin-bottom: 30px;">'
45
- formatted_text += f'<h3 style="color: #2c3e50; margin-bottom: 15px;">Page {page_num}</h3>'
 
 
 
 
 
 
46
 
47
- # Process each line
48
  lines = page_content.split('\n')
49
  for line in lines:
50
  if line.strip() and not line.strip().startswith('*') and not line.strip().startswith('Here'):
@@ -52,15 +55,26 @@ def format_response(text):
52
 
53
  if ':' in line:
54
  label, value = line.split(':', 1)
55
- formatted_text += f'<div style="margin-bottom: 10px; display: flex;">'
56
- formatted_text += f'<span style="font-weight: bold; color: #2c3e50; min-width: 200px;">{label.strip()}</span>'
57
- formatted_text += f'<span style="color: #34495e; flex: 1;">{value.strip()}</span>'
58
- formatted_text += '</div>'
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  formatted_text += '</div>'
61
 
62
  if page_num < len(pages) - 1:
63
- formatted_text += '<hr style="border: 1px solid #eee; margin: 20px 0;">'
64
 
65
  formatted_text += '</div>'
66
  return formatted_text
@@ -74,7 +88,7 @@ def analyze_image(image):
74
 
75
  base64_image = base64.b64encode(img_byte_arr).decode("utf-8")
76
 
77
- response = openai.ChatCompletion.create(
78
  model="gpt-4-vision-preview",
79
  messages=[
80
  {
@@ -104,124 +118,93 @@ def analyze_image(image):
104
  max_tokens=1000
105
  )
106
 
107
- return response.choices[0].message['content']
108
  except Exception as e:
109
- st.error(f"API Error: {str(e)}")
110
  return f"An error occurred: {str(e)}"
 
111
  def main():
112
  st.set_page_config(page_title="Document Analysis App", layout="wide")
113
 
114
- # Updated styling with better contrast and modern look
115
  st.markdown("""
116
  <style>
117
  .stApp {
118
- background-color: #f8f9fa;
 
 
 
 
 
119
  }
120
  .stButton>button {
121
  width: 100%;
122
- background-color: #0066cc; /* Changed to a more vibrant blue */
123
  color: white;
124
  border: none;
125
- padding: 12px 24px;
126
- border-radius: 6px;
127
- margin-top: 20px;
128
- margin-bottom: 20px;
129
- font-weight: 500;
130
- transition: background-color 0.3s ease;
131
  }
132
  .stButton>button:hover {
133
- background-color: #0052a3;
134
  }
135
  .uploadedFile {
136
- margin-bottom: 20px;
137
- background-color: white;
138
- padding: 15px;
139
- border-radius: 6px;
140
- border: 1px solid #e0e0e0;
141
- }
142
- div[data-testid="stFileUploader"] {
143
- background-color: white;
144
- padding: 20px;
145
- border-radius: 10px;
146
- border: 2px dashed #cccccc;
147
  margin: 20px 0;
 
 
 
 
 
 
 
 
 
148
  }
149
- h1 {
150
- color: #1a1a1a;
151
- margin-bottom: 30px;
152
- text-align: center;
153
  }
154
  </style>
155
  """, unsafe_allow_html=True)
156
-
157
- # Update the format_response function styling
158
- def format_response(text):
159
- formatted_text = """
160
- <div style="
161
- background-color: white;
162
- padding: 30px;
163
- border-radius: 10px;
164
- font-family: 'Segoe UI', Arial, sans-serif;
165
- box-shadow: 0 4px 6px rgba(0,0,0,0.1);
166
- margin: 20px 0;
167
- border: 1px solid #e0e0e0;
168
- ">
169
- """
170
-
171
- pages = text.split("Page")
172
 
173
- for page_num, page_content in enumerate(pages[1:], 1):
174
- formatted_text += f'<div style="margin-bottom: 30px;">'
175
- formatted_text += f'<h3 style="color: #0066cc; margin-bottom: 20px; padding-bottom: 10px; border-bottom: 2px solid #f0f0f0;">Page {page_num}</h3>'
176
 
177
- lines = page_content.split('\n')
178
- for line in lines:
179
- if line.strip() and not line.strip().startswith('*') and not line.strip().startswith('Here'):
180
- line = line.replace('**', '').replace('- ', '')
181
-
182
- if ':' in line:
183
- label, value = line.split(':', 1)
184
- formatted_text += f'<div style="margin-bottom: 15px; display: flex; align-items: center;">'
185
- formatted_text += f'<span style="font-weight: 600; color: #2c3e50; min-width: 200px; padding-right: 20px;">{label.strip()}</span>'
186
- formatted_text += f'<span style="color: #333333; flex: 1;">{value.strip()}</span>'
187
- formatted_text += '</div>'
188
-
189
- formatted_text += '</div>'
190
 
191
- if page_num < len(pages) - 1:
192
- formatted_text += '<hr style="border: none; height: 1px; background-color: #e0e0e0; margin: 30px 0;">'
193
-
194
- formatted_text += '</div>'
195
- return formatted_text
196
-
197
- col1, col2, col3 = st.columns([1,2,1])
198
- with col2:
199
- st.title("Document Analysis App")
200
-
201
- uploaded_file = st.file_uploader("Upload document (PDF/Image)", type=['pdf', 'png', 'jpg', 'jpeg'])
202
-
203
- if uploaded_file is not None:
204
- if uploaded_file.type == "application/pdf":
205
- with st.spinner("Processing PDF..."):
206
- images = convert_pdf_to_images(uploaded_file)
 
 
 
207
 
208
  if st.button("Extract Information"):
209
  with st.spinner("Analyzing document..."):
210
- all_results = []
211
- for i, image in enumerate(images, 1):
212
- result = analyze_image(image)
213
- all_results.append(f"Page {i} Information:\n{result}")
214
-
215
- combined_results = "\n\n".join(all_results)
216
- st.markdown(format_response(combined_results), unsafe_allow_html=True)
217
-
218
- else:
219
- image = Image.open(uploaded_file)
220
-
221
- if st.button("Extract Information"):
222
- with st.spinner("Analyzing document..."):
223
- result = analyze_image(image)
224
- st.markdown(format_response(result), unsafe_allow_html=True)
225
 
226
  if __name__ == "__main__":
227
  main()
 
1
  import base64
2
  import streamlit as st
3
+ from openai import OpenAI
4
  import os
5
+ from dotenv import load_dotenv
6
+ import pdf2image
7
  from PIL import Image
8
  import io
9
  import tempfile
 
10
 
11
+ # Load environment variables
12
+ load_dotenv()
13
+ # Initialize OpenAI client
14
+ client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
15
 
16
  def convert_pdf_to_images(pdf_file):
17
  """Convert PDF to list of images"""
 
19
  tmp_file.write(pdf_file.getvalue())
20
  pdf_path = tmp_file.name
21
 
22
+ images = pdf2image.convert_from_path(pdf_path)
23
+ os.unlink(pdf_path)
24
+ return images
 
 
 
 
25
 
26
  def format_response(text):
 
27
  formatted_text = """
28
  <div style="
29
  background-color: white;
30
+ padding: 30px;
31
+ border-radius: 10px;
32
  font-family: Arial, sans-serif;
33
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
34
+ margin: 20px 0;
35
  ">
36
  """
37
 
 
38
  pages = text.split("Page")
39
 
40
  for page_num, page_content in enumerate(pages[1:], 1):
41
+ formatted_text += f'''
42
+ <div style="margin-bottom: 40px;">
43
+ <h3 style="color: #2c3e50;
44
+ margin-bottom: 20px;
45
+ padding-bottom: 10px;
46
+ border-bottom: 2px solid #eee;">
47
+ Page {page_num}
48
+ </h3>
49
+ '''
50
 
 
51
  lines = page_content.split('\n')
52
  for line in lines:
53
  if line.strip() and not line.strip().startswith('*') and not line.strip().startswith('Here'):
 
55
 
56
  if ':' in line:
57
  label, value = line.split(':', 1)
58
+ formatted_text += f'''
59
+ <div style="margin-bottom: 15px;
60
+ display: flex;
61
+ align-items: baseline;
62
+ gap: 20px;">
63
+ <span style="font-weight: 600;
64
+ color: #2c3e50;
65
+ min-width: 200px;">
66
+ {label.strip()}
67
+ </span>
68
+ <span style="color: #34495e; flex: 1;">
69
+ {value.strip()}
70
+ </span>
71
+ </div>
72
+ '''
73
 
74
  formatted_text += '</div>'
75
 
76
  if page_num < len(pages) - 1:
77
+ formatted_text += '<hr style="border: none; border-top: 1px solid #eee; margin: 30px 0;">'
78
 
79
  formatted_text += '</div>'
80
  return formatted_text
 
88
 
89
  base64_image = base64.b64encode(img_byte_arr).decode("utf-8")
90
 
91
+ response = client.chat.completions.create(
92
  model="gpt-4-vision-preview",
93
  messages=[
94
  {
 
118
  max_tokens=1000
119
  )
120
 
121
+ return response.choices[0].message.content
122
  except Exception as e:
 
123
  return f"An error occurred: {str(e)}"
124
+
125
  def main():
126
  st.set_page_config(page_title="Document Analysis App", layout="wide")
127
 
128
+ # Custom CSS
129
  st.markdown("""
130
  <style>
131
  .stApp {
132
+ background-color: #ffffff !important;
133
+ }
134
+ .main {
135
+ padding: 20px;
136
+ max-width: 1200px;
137
+ margin: 0 auto;
138
  }
139
  .stButton>button {
140
  width: 100%;
141
+ background-color: #2c3e50;
142
  color: white;
143
  border: none;
144
+ padding: 10px 20px;
145
+ border-radius: 5px;
146
+ margin: 20px 0;
 
 
 
147
  }
148
  .stButton>button:hover {
149
+ background-color: #34495e;
150
  }
151
  .uploadedFile {
 
 
 
 
 
 
 
 
 
 
 
152
  margin: 20px 0;
153
+ padding: 20px;
154
+ background-color: #f8f9fa;
155
+ border-radius: 5px;
156
+ }
157
+ [data-testid="stHeader"] {
158
+ background-color: transparent;
159
+ }
160
+ .stDeployButton {
161
+ display: none !important;
162
  }
163
+ .main .block-container {
164
+ padding-top: 2rem;
165
+ padding-bottom: 2rem;
 
166
  }
167
  </style>
168
  """, unsafe_allow_html=True)
169
+
170
+ # Main container
171
+ with st.container():
172
+ col1, col2, col3 = st.columns([1,3,1])
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
+ with col2:
175
+ st.title("Document Analysis App")
 
176
 
177
+ upload_container = st.container()
178
+ with upload_container:
179
+ uploaded_file = st.file_uploader(
180
+ "Upload document (PDF/Image)",
181
+ type=['pdf', 'png', 'jpg', 'jpeg']
182
+ )
 
 
 
 
 
 
 
183
 
184
+ if uploaded_file is not None:
185
+ if uploaded_file.type == "application/pdf":
186
+ # Handle PDF
187
+ with st.spinner("Processing PDF..."):
188
+ images = convert_pdf_to_images(uploaded_file)
189
+
190
+ if st.button("Extract Information"):
191
+ with st.spinner("Analyzing document..."):
192
+ all_results = []
193
+ for i, image in enumerate(images, 1):
194
+ result = analyze_image(image)
195
+ all_results.append(f"Page {i} Information:\n{result}")
196
+
197
+ combined_results = "\n\n".join(all_results)
198
+ st.markdown(format_response(combined_results), unsafe_allow_html=True)
199
+
200
+ else:
201
+ # Handle single image
202
+ image = Image.open(uploaded_file)
203
 
204
  if st.button("Extract Information"):
205
  with st.spinner("Analyzing document..."):
206
+ result = analyze_image(image)
207
+ st.markdown(format_response(f"Page 1\n{result}"), unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
  if __name__ == "__main__":
210
  main()