mangalathkedar commited on
Commit
e815ec8
·
verified ·
1 Parent(s): b35e37e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -87
app.py CHANGED
@@ -24,57 +24,45 @@ def convert_pdf_to_images(pdf_file):
24
  return images
25
 
26
  def format_response(text):
 
27
  formatted_text = """
28
  <div style="
29
  background-color: white;
30
- padding: 30px;
31
- border-radius: 10px;
32
  font-family: Arial, sans-serif;
33
- box-shadow: 0 2px 8px rgba(0,0,0,0.1);
34
- margin: 20px 0;
35
  ">
36
  """
37
 
 
38
  pages = text.split("Page")
39
 
40
- for page_num, page_content in enumerate(pages[1:], 1):
41
- formatted_text += f'''
42
- <div style="margin-bottom: 40px;">
43
- <h3 style="color: #2c3e50;
44
- margin-bottom: 20px;
45
- padding-bottom: 10px;
46
- border-bottom: 2px solid #eee;">
47
- Page {page_num}
48
- </h3>
49
- '''
50
 
 
51
  lines = page_content.split('\n')
52
  for line in lines:
 
53
  if line.strip() and not line.strip().startswith('*') and not line.strip().startswith('Here'):
 
54
  line = line.replace('**', '').replace('- ', '')
55
 
56
  if ':' in line:
57
  label, value = line.split(':', 1)
58
- formatted_text += f'''
59
- <div style="margin-bottom: 15px;
60
- display: flex;
61
- align-items: baseline;
62
- gap: 20px;">
63
- <span style="font-weight: 600;
64
- color: #2c3e50;
65
- min-width: 200px;">
66
- {label.strip()}
67
- </span>
68
- <span style="color: #34495e; flex: 1;">
69
- {value.strip()}
70
- </span>
71
- </div>
72
- '''
73
 
74
  formatted_text += '</div>'
75
 
 
76
  if page_num < len(pages) - 1:
77
- formatted_text += '<hr style="border: none; border-top: 1px solid #eee; margin: 30px 0;">'
78
 
79
  formatted_text += '</div>'
80
  return formatted_text
@@ -89,7 +77,7 @@ def analyze_image(image):
89
  base64_image = base64.b64encode(img_byte_arr).decode("utf-8")
90
 
91
  response = client.chat.completions.create(
92
- model="gpt-4-vision-preview",
93
  messages=[
94
  {
95
  "role": "user",
@@ -125,16 +113,11 @@ def analyze_image(image):
125
  def main():
126
  st.set_page_config(page_title="Document Analysis App", layout="wide")
127
 
128
- # Custom CSS
129
  st.markdown("""
130
  <style>
131
  .stApp {
132
- background-color: #ffffff !important;
133
- }
134
- .main {
135
- padding: 20px;
136
- max-width: 1200px;
137
- margin: 0 auto;
138
  }
139
  .stButton>button {
140
  width: 100%;
@@ -143,68 +126,48 @@ def main():
143
  border: none;
144
  padding: 10px 20px;
145
  border-radius: 5px;
146
- margin: 20px 0;
 
147
  }
148
  .stButton>button:hover {
149
  background-color: #34495e;
150
  }
151
  .uploadedFile {
152
- margin: 20px 0;
153
- padding: 20px;
154
- background-color: #f8f9fa;
155
- border-radius: 5px;
156
- }
157
- [data-testid="stHeader"] {
158
- background-color: transparent;
159
- }
160
- .stDeployButton {
161
- display: none !important;
162
- }
163
- .main .block-container {
164
- padding-top: 2rem;
165
- padding-bottom: 2rem;
166
  }
167
  </style>
168
  """, unsafe_allow_html=True)
169
 
170
- # Main container
171
- with st.container():
172
- col1, col2, col3 = st.columns([1,3,1])
173
 
174
- with col2:
175
- st.title("Document Analysis App")
176
-
177
- upload_container = st.container()
178
- with upload_container:
179
- uploaded_file = st.file_uploader(
180
- "Upload document (PDF/Image)",
181
- type=['pdf', 'png', 'jpg', 'jpeg']
182
- )
183
-
184
- if uploaded_file is not None:
185
- if uploaded_file.type == "application/pdf":
186
- # Handle PDF
187
- with st.spinner("Processing PDF..."):
188
- images = convert_pdf_to_images(uploaded_file)
189
-
190
- if st.button("Extract Information"):
191
- with st.spinner("Analyzing document..."):
192
- all_results = []
193
- for i, image in enumerate(images, 1):
194
- result = analyze_image(image)
195
- all_results.append(f"Page {i} Information:\n{result}")
196
-
197
- combined_results = "\n\n".join(all_results)
198
- st.markdown(format_response(combined_results), unsafe_allow_html=True)
199
-
200
- else:
201
- # Handle single image
202
- image = Image.open(uploaded_file)
203
 
204
  if st.button("Extract Information"):
205
  with st.spinner("Analyzing document..."):
206
- result = analyze_image(image)
207
- st.markdown(format_response(f"Page 1\n{result}"), unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
  if __name__ == "__main__":
210
  main()
 
24
  return images
25
 
26
  def format_response(text):
27
+ """Format the analysis response with clean styling"""
28
  formatted_text = """
29
  <div style="
30
  background-color: white;
31
+ padding: 20px;
32
+ border-radius: 5px;
33
  font-family: Arial, sans-serif;
34
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
 
35
  ">
36
  """
37
 
38
+ # Split into pages
39
  pages = text.split("Page")
40
 
41
+ for page_num, page_content in enumerate(pages[1:], 1): # Skip first empty split
42
+ # Add page header
43
+ formatted_text += f'<div style="margin-bottom: 30px;">'
44
+ formatted_text += f'<h3 style="color: #2c3e50; margin-bottom: 15px;">Page {page_num}</h3>'
 
 
 
 
 
 
45
 
46
+ # Process each line
47
  lines = page_content.split('\n')
48
  for line in lines:
49
+ # Skip empty lines and lines with asterisks
50
  if line.strip() and not line.strip().startswith('*') and not line.strip().startswith('Here'):
51
+ # Remove asterisks and dashes
52
  line = line.replace('**', '').replace('- ', '')
53
 
54
  if ':' in line:
55
  label, value = line.split(':', 1)
56
+ formatted_text += f'<div style="margin-bottom: 10px; display: flex;">'
57
+ formatted_text += f'<span style="font-weight: bold; color: #2c3e50; min-width: 200px;">{label.strip()}</span>'
58
+ formatted_text += f'<span style="color: #34495e; flex: 1;">{value.strip()}</span>'
59
+ formatted_text += '</div>'
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  formatted_text += '</div>'
62
 
63
+ # Add separator between pages except for the last page
64
  if page_num < len(pages) - 1:
65
+ formatted_text += '<hr style="border: 1px solid #eee; margin: 20px 0;">'
66
 
67
  formatted_text += '</div>'
68
  return formatted_text
 
77
  base64_image = base64.b64encode(img_byte_arr).decode("utf-8")
78
 
79
  response = client.chat.completions.create(
80
+ model="gpt-4o-mini",
81
  messages=[
82
  {
83
  "role": "user",
 
113
  def main():
114
  st.set_page_config(page_title="Document Analysis App", layout="wide")
115
 
116
+ # Custom CSS to set light background and improve button styling
117
  st.markdown("""
118
  <style>
119
  .stApp {
120
+ background-color: white;
 
 
 
 
 
121
  }
122
  .stButton>button {
123
  width: 100%;
 
126
  border: none;
127
  padding: 10px 20px;
128
  border-radius: 5px;
129
+ margin-top: 20px;
130
+ margin-bottom: 20px;
131
  }
132
  .stButton>button:hover {
133
  background-color: #34495e;
134
  }
135
  .uploadedFile {
136
+ margin-bottom: 20px;
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  }
138
  </style>
139
  """, unsafe_allow_html=True)
140
 
141
+ col1, col2, col3 = st.columns([1,2,1])
142
+ with col2:
143
+ st.title("Document Analysis App")
144
 
145
+ uploaded_file = st.file_uploader("Upload document (PDF/Image)", type=['pdf', 'png', 'jpg', 'jpeg'])
146
+
147
+ if uploaded_file is not None:
148
+ if uploaded_file.type == "application/pdf":
149
+ # Handle PDF
150
+ with st.spinner("Processing PDF..."):
151
+ images = convert_pdf_to_images(uploaded_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  if st.button("Extract Information"):
154
  with st.spinner("Analyzing document..."):
155
+ all_results = []
156
+ for i, image in enumerate(images, 1):
157
+ result = analyze_image(image)
158
+ all_results.append(f"Page {i} Information:\n{result}")
159
+
160
+ combined_results = "\n\n".join(all_results)
161
+ st.markdown(format_response(combined_results), unsafe_allow_html=True)
162
+
163
+ else:
164
+ # Handle single image
165
+ image = Image.open(uploaded_file)
166
+
167
+ if st.button("Extract Information"):
168
+ with st.spinner("Analyzing document..."):
169
+ result = analyze_image(image)
170
+ st.markdown(format_response(result), unsafe_allow_html=True)
171
 
172
  if __name__ == "__main__":
173
  main()