anoopreddyyeddula commited on
Commit
3ecbba0
·
1 Parent(s): df33ba6

fix: resolve merge conflicts and update UI

Browse files
Files changed (1) hide show
  1. app.py +77 -70
app.py CHANGED
@@ -11,18 +11,22 @@ import logging
11
  import cv2
12
  from datetime import datetime
13
  import time
 
14
 
15
  # Set up logging for error handling
16
  logging.basicConfig(level=logging.DEBUG)
 
17
 
18
  # Initialize the OCR reader
19
  reader = easyocr.Reader(['en'])
20
 
21
- # Use text classification model (distilbert for sentiment analysis or text validation)
22
- text_classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")
23
-
24
- # Use document classification model (ResNet50 as an example)
25
- doc_classifier = pipeline("image-classification", model="microsoft/resnet-50")
 
 
26
 
27
  def convert_pdf_to_images(pdf_file):
28
  """Convert PDF to list of images with detailed logging"""
@@ -96,93 +100,96 @@ def process_single_image(image):
96
  logging.error(f"Error processing the image: {str(e)}")
97
  return {'error': f"Error processing the image: {str(e)}"}
98
 
99
- # Gradio interface setup
100
- def gradio_interface(input_file):
101
- """Handle both PDF and image files uploaded by the user"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  try:
 
 
 
 
103
  if input_file.name.lower().endswith('.pdf'):
104
  images = convert_pdf_to_images(input_file)
105
  if images is None:
106
- return {'error': 'Invalid PDF or unable to extract images'}
107
  result = process_single_image(images[0])
108
- elif input_file.name.lower().endswith(('png', 'jpg', 'jpeg')):
109
  img = Image.open(input_file)
110
  result = process_single_image(img)
111
  else:
112
- return {'error': 'Unsupported file type. Please upload a valid image or PDF.'}
 
 
 
113
 
114
- return result
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  except Exception as e:
117
- logging.error(f"Error in file processing: {str(e)}")
118
- return {'error': f"Error processing the file: {str(e)}"}
119
 
120
- # Create the Gradio interface with improved UI and error handling
121
  iface = gr.Interface(
122
- fn=gradio_interface,
123
  inputs=[
124
  gr.File(
125
  label="Upload Insurance Document",
126
- file_types=["pdf", "png", "jpg", "jpeg", "tiff"],
127
- type="filepath"
128
  )
129
  ],
130
  outputs=[
131
- gr.Textbox(
132
- label="Extracted Text",
133
- lines=10,
134
- show_copy_button=True
135
- ),
136
- gr.Textbox(
137
- label="Validation Results",
138
- lines=3,
139
- show_copy_button=True
140
- ),
141
- gr.Textbox(
142
- label="Document Classification",
143
- lines=3,
144
- show_copy_button=True
145
- ),
146
- gr.File(
147
- label="Download Analysis Report (Excel)",
148
- type="filepath"
149
- )
150
  ],
151
  title="Insurance Claim Document Analyzer",
152
  description="Upload insurance documents (PDF/Images) for automated text extraction, validation, and classification.",
153
- article="""
154
- ### Supported Features:
155
- - Multi-page PDF processing
156
- - Image formats: PNG, JPG, TIFF
157
- - Text extraction & validation
158
- - Document classification
159
- - Detailed Excel report generation
160
-
161
- ### File Requirements:
162
- - Maximum file size: 10MB
163
- - Clear, readable content
164
- - English language documents
165
- """,
166
- examples=[],
167
- cache_examples=False,
168
- theme=gr.themes.Soft(
169
- primary_hue="blue",
170
- secondary_hue="gray",
171
- neutral_hue="gray"
172
- ),
173
- css=".gradio-container {max-width: 900px; margin: auto}",
174
- allow_flagging="never",
175
- analytics_enabled=False
176
  )
177
 
178
- # Launch with custom configurations
179
  if __name__ == "__main__":
180
- iface.launch(
181
- server_name="0.0.0.0",
182
- server_port=7860,
183
- share=False,
184
- debug=True,
185
- enable_queue=True,
186
- show_error=True,
187
- max_threads=4
188
- )
 
 
 
 
11
  import cv2
12
  from datetime import datetime
13
  import time
14
+ import os
15
 
16
  # Set up logging for error handling
17
  logging.basicConfig(level=logging.DEBUG)
18
+ logger = logging.getLogger(__name__)
19
 
20
  # Initialize the OCR reader
21
  reader = easyocr.Reader(['en'])
22
 
23
+ # Initialize models with error handling
24
+ try:
25
+ text_classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")
26
+ doc_classifier = pipeline("image-classification", model="microsoft/resnet-50")
27
+ except Exception as e:
28
+ logger.error(f"Error initializing models: {str(e)}")
29
+ raise
30
 
31
  def convert_pdf_to_images(pdf_file):
32
  """Convert PDF to list of images with detailed logging"""
 
100
  logging.error(f"Error processing the image: {str(e)}")
101
  return {'error': f"Error processing the image: {str(e)}"}
102
 
103
+ def generate_excel_report(result):
104
+ """Generate Excel report from processing results"""
105
+ try:
106
+ df = pd.DataFrame([{
107
+ 'Timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
108
+ 'Extracted Text': result.get('text', ''),
109
+ 'Validation': result.get('validation', ''),
110
+ 'Validation Confidence': result.get('validation_confidence', 0),
111
+ 'Document Type': result.get('doc_type', ''),
112
+ 'Document Confidence': result.get('doc_confidence', 0)
113
+ }])
114
+
115
+ output_path = f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
116
+ df.to_excel(output_path, index=False)
117
+ return output_path
118
+ except Exception as e:
119
+ logger.error(f"Error generating report: {str(e)}")
120
+ return None
121
+
122
+ def process_claim(input_file):
123
+ """Main processing function for the Gradio interface"""
124
  try:
125
+ if input_file is None:
126
+ return "Please upload a file.", "No file provided.", "No file provided.", None
127
+
128
+ # Process the file
129
  if input_file.name.lower().endswith('.pdf'):
130
  images = convert_pdf_to_images(input_file)
131
  if images is None:
132
+ return "Error processing PDF.", "Invalid PDF file.", "Processing failed.", None
133
  result = process_single_image(images[0])
134
+ elif input_file.name.lower().endswith(('png', 'jpg', 'jpeg', 'tiff')):
135
  img = Image.open(input_file)
136
  result = process_single_image(img)
137
  else:
138
+ return "Unsupported file type.", "Invalid file format.", "Processing failed.", None
139
+
140
+ if 'error' in result:
141
+ return result['error'], "Processing failed.", "Processing failed.", None
142
 
143
+ # Generate Excel report
144
+ report_path = generate_excel_report(result)
145
+
146
+ # Format output strings
147
+ validation_text = f"Validation: {result['validation']}\nConfidence: {result['validation_confidence']:.2%}"
148
+ classification_text = f"Type: {result['doc_type']}\nConfidence: {result['doc_confidence']:.2%}"
149
+
150
+ return (
151
+ result['text'],
152
+ validation_text,
153
+ classification_text,
154
+ report_path if report_path else None
155
+ )
156
 
157
  except Exception as e:
158
+ logger.error(f"Error in process_claim: {str(e)}")
159
+ return str(e), "Processing failed.", "Processing failed.", None
160
 
161
+ # Create the Gradio interface
162
  iface = gr.Interface(
163
+ fn=process_claim,
164
  inputs=[
165
  gr.File(
166
  label="Upload Insurance Document",
167
+ file_types=["pdf", "png", "jpg", "jpeg", "tiff"]
 
168
  )
169
  ],
170
  outputs=[
171
+ gr.Textbox(label="Extracted Text", lines=10),
172
+ gr.Textbox(label="Validation Results", lines=3),
173
+ gr.Textbox(label="Document Classification", lines=3),
174
+ gr.File(label="Download Analysis Report")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  ],
176
  title="Insurance Claim Document Analyzer",
177
  description="Upload insurance documents (PDF/Images) for automated text extraction, validation, and classification.",
178
+ theme=gr.themes.Soft(primary_hue="blue"),
179
+ css=".gradio-container {max-width: 900px; margin: auto}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  )
181
 
182
+ # Launch with error handling
183
  if __name__ == "__main__":
184
+ try:
185
+ iface.launch(
186
+ server_name="0.0.0.0",
187
+ server_port=7860,
188
+ share=False,
189
+ debug=True,
190
+ enable_queue=True,
191
+ max_threads=4
192
+ )
193
+ except Exception as e:
194
+ logger.error(f"Failed to launch interface: {str(e)}")
195
+ raise