abinash73 commited on
Commit
b4e5c22
·
verified ·
1 Parent(s): 1938489

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +331 -161
app.py CHANGED
@@ -1,181 +1,351 @@
1
- <?php
2
- // ============================================
3
- // 1. Updated submit_resume.php with FCM Integration
4
- // ============================================
5
- header('Content-Type: application/json');
 
 
 
6
 
7
- // Database configuration
8
- $host = 'localhost';
9
- $dbname = 'asecurit_db';
10
- $username = 'asecurit_root';
11
- $password = 'Pass098&6';
12
 
13
- // Response array
14
- $response = ['success' => false, 'message' => ''];
15
-
16
- try {
17
- // Validate request method
18
- if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
19
- throw new Exception('Invalid request method');
20
- }
21
-
22
- // Get form data
23
- $name = trim($_POST['name'] ?? '');
24
- $email = trim($_POST['email'] ?? '');
25
- $phone = trim($_POST['phone'] ?? '');
26
- $position = trim($_POST['position'] ?? '');
27
- $address = trim($_POST['address'] ?? '');
28
- $city = trim($_POST['city'] ?? '');
29
- $state = trim($_POST['state'] ?? '');
30
- $pincode = trim($_POST['pincode'] ?? '');
31
- $experience = trim($_POST['experience'] ?? '');
32
- $available_from = trim($_POST['available_from'] ?? '');
33
- $cover = trim($_POST['cover'] ?? '');
 
 
 
 
 
34
 
35
- // Validate required fields
36
- if (empty($name) || empty($email) || empty($phone)) {
37
- throw new Exception('Please provide name, email, and phone number');
38
- }
39
 
40
- // Validate email
41
- if (!filter_var($email, FILTER_VALIDATE_EMAIL)) {
42
- throw new Exception('Invalid email address');
43
- }
 
44
 
45
- // Handle file upload
46
- $resumePath = null;
47
- if (isset($_FILES['resume']) && $_FILES['resume']['error'] === UPLOAD_ERR_OK) {
48
- $file = $_FILES['resume'];
49
- $fileName = $file['name'];
50
- $fileTmpName = $file['tmp_name'];
51
- $fileSize = $file['size'];
52
 
53
- // Get file extension
54
- $fileExt = strtolower(pathinfo($fileName, PATHINFO_EXTENSION));
55
- $allowedExt = ['pdf', 'doc', 'docx'];
 
 
56
 
57
- // Validate file
58
- if (!in_array($fileExt, $allowedExt)) {
59
- throw new Exception('Invalid file type. Only PDF, DOC, and DOCX files are allowed');
60
- }
 
 
 
 
 
 
 
 
61
 
62
- if ($fileSize > 5242880) { // 5MB in bytes
63
- throw new Exception('File size exceeds 5MB limit');
64
- }
 
 
 
 
 
 
 
65
 
66
- // Set upload directory to /filemanager-admin/uploaded-files/resumes
67
- // Use document root to get the absolute path to the main folder
68
- $documentRoot = $_SERVER['DOCUMENT_ROOT'];
69
- $uploadDir = $documentRoot . '/filemanager-admin/uploaded-files/resumes/';
70
-
71
- // Create directory if it doesn't exist
72
- if (!file_exists($uploadDir)) {
73
- if (!mkdir($uploadDir, 0755, true)) {
74
- throw new Exception('Failed to create upload directory');
75
- }
76
- }
77
 
78
- // Verify directory is writable
79
- if (!is_writable($uploadDir)) {
80
- throw new Exception('Upload directory is not writable');
81
- }
82
-
83
- // Generate unique filename
84
- $newFileName = uniqid('resume_', true) . '_' . preg_replace('/[^a-zA-Z0-9._-]/', '', $fileName);
85
- $uploadPath = $uploadDir . $newFileName;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- // Move uploaded file
88
- if (!move_uploaded_file($fileTmpName, $uploadPath)) {
89
- throw new Exception('Failed to upload resume file');
90
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- // Store relative path in database for easier portability
93
- $resumePath = 'filemanager-admin/uploaded-files/resumes/' . $newFileName;
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
- // Set file permissions
96
- chmod($uploadPath, 0644);
 
 
 
 
 
 
 
 
 
 
 
 
97
  }
98
 
99
- // Connect to database
100
- $conn = new PDO("mysql:host=$host;dbname=$dbname;charset=utf8mb4", $username, $password);
101
- $conn->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
102
-
103
- // Prepare SQL statement
104
- $sql = "INSERT INTO resume (
105
- Name, mobile, email_id, applyingfor, address1, city, state, pin,
106
- yearofexp, available_from, cover_letter, resume
107
- ) VALUES (
108
- :name, :mobile, :email, :position, :address, :city, :state, :pin,
109
- :experience, :available_from, :cover, :resume
110
- )";
111
-
112
- $stmt = $conn->prepare($sql);
113
-
114
- // Bind parameters
115
- $stmt->bindParam(':name', $name);
116
- $stmt->bindParam(':mobile', $phone);
117
- $stmt->bindParam(':email', $email);
118
- $stmt->bindParam(':position', $position);
119
- $stmt->bindParam(':address', $address);
120
- $stmt->bindParam(':city', $city);
121
- $stmt->bindParam(':state', $state);
122
- $stmt->bindParam(':pin', $pincode);
123
- $stmt->bindParam(':experience', $experience);
124
- $stmt->bindParam(':available_from', $available_from);
125
- $stmt->bindParam(':cover', $cover);
126
- $stmt->bindParam(':resume', $resumePath);
127
-
128
- // Execute statement
129
- if ($stmt->execute()) {
130
- // ✅ INSERT FILE RECORD INTO files TABLE
131
- if ($resumePath) {
132
- try {
133
- $fileInsertSql = "INSERT INTO files (parent_id, user_id, name, created_at)
134
- VALUES (:parent_id, :user_id, :name, NOW())";
135
- $fileStmt = $conn->prepare($fileInsertSql);
136
- $fileStmt->bindValue(':parent_id', 5, PDO::PARAM_INT);
137
- $fileStmt->bindValue(':user_id', 1, PDO::PARAM_INT);
138
- $fileStmt->bindValue(':name', $newFileName, PDO::PARAM_STR);
139
- $fileStmt->execute();
140
-
141
- error_log('File record inserted into files table: ' . $newFileName);
142
- } catch (PDOException $e) {
143
- // Log error but don't fail the entire operation
144
- error_log('Failed to insert file record: ' . $e->getMessage());
145
- }
146
  }
147
 
148
- $response['success'] = true;
149
- $response['message'] = 'Resume submitted successfully. Our HR team will review and contact you if shortlisted.';
150
-
151
- // ✅ SEND NOTIFICATION TO ALL DEVICES
152
- require_once 'fcm_helper.php';
153
- $fcmHelper = new FCMHelper($conn);
154
-
155
- // Send to all HR and Admin users
156
- $notificationResult = $fcmHelper->sendNewApplicationNotification(
157
- $name,
158
- $position,
159
- $phone,
160
- $email
161
- );
162
-
163
- // Log notification result
164
- error_log('FCM Notification sent: ' . ($notificationResult ? 'Success' : 'Failed'));
165
- } else {
166
- throw new Exception('Failed to save data to database');
167
- }
168
 
169
- } catch (PDOException $e) {
170
- $response['message'] = 'Database error: ' . $e->getMessage();
171
- error_log('Database error: ' . $e->getMessage());
172
- } catch (Exception $e) {
173
- $response['message'] = $e->getMessage();
174
- } finally {
175
- if (isset($conn)) {
176
- $conn = null;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  }
178
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
- echo json_encode($response);
181
- ?>
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import re
4
+ from datetime import datetime
5
+ from paddleocr import PaddleOCR
6
+ from PIL import Image
7
+ import pdf2image
8
+ import numpy as np
9
 
10
+ # Initialize PaddleOCR
11
+ ocr = PaddleOCR(use_textline_orientation=True, lang='en', show_log=False)
 
 
 
12
 
13
+ def extract_text_from_image(image):
14
+ """Extract text from image using PaddleOCR"""
15
+ if isinstance(image, Image.Image):
16
+ image = np.array(image)
17
+
18
+ result = ocr.ocr(image, cls=True)
19
+
20
+ # Extract text with coordinates
21
+ text_blocks = []
22
+ for line in result[0]:
23
+ bbox = line[0]
24
+ text = line[1][0]
25
+ confidence = line[1][1]
26
+
27
+ # Calculate center point for positioning
28
+ y_center = (bbox[0][1] + bbox[2][1]) / 2
29
+ x_center = (bbox[0][0] + bbox[2][0]) / 2
30
+
31
+ text_blocks.append({
32
+ 'text': text,
33
+ 'y': y_center,
34
+ 'x': x_center,
35
+ 'confidence': confidence
36
+ })
37
+
38
+ return text_blocks
39
 
40
+ def pdf_to_images(pdf_file):
41
+ """Convert PDF to images"""
42
+ images = pdf2image.convert_from_path(pdf_file)
43
+ return images
44
 
45
+ def extract_gstin(text):
46
+ """Extract GSTIN using pattern matching"""
47
+ gstin_pattern = r'\d{2}[A-Z]{5}\d{4}[A-Z]{1}[A-Z\d]{1}[Z]{1}[A-Z\d]{1}'
48
+ match = re.search(gstin_pattern, text)
49
+ return match.group(0) if match else None
50
 
51
+ def extract_pincode(text):
52
+ """Extract 6-digit PIN code"""
53
+ pincode_pattern = r'\b\d{6}\b'
54
+ match = re.search(pincode_pattern, text)
55
+ return match.group(0) if match else None
 
 
56
 
57
+ def extract_mobile(text):
58
+ """Extract mobile number"""
59
+ mobile_pattern = r'\b[6-9]\d{9}\b'
60
+ match = re.search(mobile_pattern, text)
61
+ return match.group(0) if match else None
62
 
63
+ def extract_date(text):
64
+ """Extract date in various formats"""
65
+ date_patterns = [
66
+ r'\d{2}[-/]\d{2}[-/]\d{4}',
67
+ r'\d{2}[-/]\d{2}[-/]\d{2}',
68
+ r'\d{4}[-/]\d{2}[-/]\d{2}'
69
+ ]
70
+ for pattern in date_patterns:
71
+ match = re.search(pattern, text)
72
+ if match:
73
+ return match.group(0)
74
+ return None
75
 
76
+ def extract_invoice_number(text_blocks):
77
+ """Extract invoice/bill number"""
78
+ for block in text_blocks:
79
+ text = block['text']
80
+ if re.search(r'(invoice|bill)\s*(no|number|#)', text.lower()):
81
+ # Look for number in same or next block
82
+ number_match = re.search(r'[A-Z0-9/-]+', text)
83
+ if number_match:
84
+ return number_match.group(0)
85
+ return None
86
 
87
+ def extract_amounts(text):
88
+ """Extract monetary amounts"""
89
+ amount_pattern = r'₹?\s*(\d+(?:,\d+)*(?:\.\d{2})?)'
90
+ amounts = re.findall(amount_pattern, text)
91
+ return [float(amt.replace(',', '')) for amt in amounts]
 
 
 
 
 
 
92
 
93
+ def find_header_info(text_blocks):
94
+ """Extract header information (supplier details)"""
95
+ all_text = ' '.join([block['text'] for block in text_blocks])
96
+
97
+ header = {
98
+ "supplier_name": None,
99
+ "supplier_pincode": extract_pincode(all_text),
100
+ "gstin": extract_gstin(all_text),
101
+ "contact_no": extract_mobile(all_text),
102
+ "invoice_no": extract_invoice_number(text_blocks),
103
+ "invoice_date": extract_date(all_text)
104
+ }
105
+
106
+ # Extract supplier name (usually first few lines)
107
+ top_blocks = sorted(text_blocks, key=lambda x: x['y'])[:5]
108
+ supplier_name_candidates = []
109
+ for block in top_blocks:
110
+ text = block['text'].strip()
111
+ if len(text) > 3 and not re.match(r'^[\d\s.,]+$', text):
112
+ supplier_name_candidates.append(text)
113
+
114
+ if supplier_name_candidates:
115
+ header['supplier_name'] = supplier_name_candidates[0]
116
+
117
+ return header
118
 
119
+ def find_line_items(text_blocks):
120
+ """Extract line items from invoice"""
121
+ # Sort blocks by Y coordinate
122
+ sorted_blocks = sorted(text_blocks, key=lambda x: x['y'])
123
+
124
+ items = []
125
+ current_item = {}
126
+
127
+ # Simple heuristic: Look for patterns
128
+ for i, block in enumerate(sorted_blocks):
129
+ text = block['text'].strip()
130
+
131
+ # Look for HSN codes (6 or 8 digits)
132
+ hsn_match = re.search(r'\b\d{4,8}\b', text)
133
+ if hsn_match and not current_item.get('hsn'):
134
+ current_item['hsn'] = hsn_match.group(0)
135
+
136
+ # Look for quantities
137
+ qty_match = re.search(r'\b(\d+(?:\.\d+)?)\s*(pcs|nos|kg|ltr|box|unit)?', text.lower())
138
+ if qty_match and not current_item.get('qty'):
139
+ current_item['qty'] = float(qty_match.group(1))
140
+ current_item['unit'] = qty_match.group(2) if qty_match.group(2) else 'Nos'
141
+
142
+ # Look for rates/amounts
143
+ amount_matches = re.findall(r'₹?\s*(\d+(?:,\d+)*(?:\.\d{2})?)', text)
144
+ if amount_matches:
145
+ amounts = [float(amt.replace(',', '')) for amt in amount_matches]
146
+ if not current_item.get('rate') and len(amounts) > 0:
147
+ current_item['rate'] = amounts[0]
148
+
149
+ # Look for GST percentages
150
+ gst_match = re.search(r'(\d+(?:\.\d+)?)\s*%', text)
151
+ if gst_match and not current_item.get('gst_percent'):
152
+ current_item['gst_percent'] = float(gst_match.group(1))
153
+
154
+ # If we have enough info, save item
155
+ if len(current_item) >= 3:
156
+ if 'item_name' not in current_item:
157
+ current_item['item_name'] = text[:50]
158
+
159
+ items.append({
160
+ 'item_name': current_item.get('item_name', 'Item'),
161
+ 'hsn': current_item.get('hsn', ''),
162
+ 'qty': current_item.get('qty', 0),
163
+ 'unit': current_item.get('unit', 'Nos'),
164
+ 'rate': current_item.get('rate', 0),
165
+ 'discount': current_item.get('discount', 0),
166
+ 'gst_percent': current_item.get('gst_percent', 0)
167
+ })
168
+ current_item = {}
169
+
170
+ return items
171
 
172
+ def calculate_totals(items):
173
+ """Calculate totals from line items"""
174
+ total_gross = 0
175
+ total_taxable = 0
176
+ total_gst = 0
177
+
178
+ for item in items:
179
+ qty = item.get('qty', 0)
180
+ rate = item.get('rate', 0)
181
+ discount = item.get('discount', 0)
182
+ gst_percent = item.get('gst_percent', 0)
183
+
184
+ gross = qty * rate
185
+ taxable = gross - discount
186
+ gst_amount = (taxable * gst_percent) / 100
187
 
188
+ item['gross_amount'] = round(gross, 2)
189
+ item['taxable_amount'] = round(taxable, 2)
190
+ item['gst_amount'] = round(gst_amount, 2)
191
+ item['total_amount'] = round(taxable + gst_amount, 2)
192
+
193
+ total_gross += gross
194
+ total_taxable += taxable
195
+ total_gst += gst_amount
196
+
197
+ return {
198
+ 'total_gross': round(total_gross, 2),
199
+ 'total_taxable': round(total_taxable, 2),
200
+ 'total_gst': round(total_gst, 2),
201
+ 'grand_total': round(total_taxable + total_gst, 2)
202
  }
203
 
204
+ def extract_invoice_data(file):
205
+ """Main function to extract all invoice data"""
206
+ try:
207
+ # Convert PDF to image if needed
208
+ if file.name.lower().endswith('.pdf'):
209
+ images = pdf_to_images(file.name)
210
+ image = images[0] # Process first page
211
+ else:
212
+ image = Image.open(file.name)
213
+
214
+ # Extract text with OCR
215
+ text_blocks = extract_text_from_image(image)
216
+
217
+ # Extract different sections
218
+ header = find_header_info(text_blocks)
219
+ details = find_line_items(text_blocks)
220
+ footer = calculate_totals(details)
221
+
222
+ # Build final JSON structure
223
+ result = {
224
+ "header": header,
225
+ "details": details,
226
+ "footer": footer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  }
228
 
229
+ return json.dumps(result, indent=2, ensure_ascii=False)
230
+
231
+ except Exception as e:
232
+ return json.dumps({
233
+ "error": str(e),
234
+ "message": "Failed to process invoice"
235
+ }, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
+ # Create Gradio Interface
238
+ with gr.Blocks(title="Purchase Invoice Data Extraction", theme=gr.themes.Soft()) as demo:
239
+ gr.Markdown("""
240
+ # 🧾 Purchase Invoice Data Extraction API
241
+
242
+ Upload purchase invoices (PDF or Image) to automatically extract structured data including:
243
+ - Supplier details (Name, PIN, GSTIN, Contact)
244
+ - Invoice information (Number, Date)
245
+ - Line items (Name, HSN, Qty, Rate, Discounts, GST%)
246
+ - Calculated totals (Gross, Taxable, Tax, Grand Total)
247
+ """)
248
+
249
+ with gr.Row():
250
+ with gr.Column():
251
+ file_input = gr.File(
252
+ label="Upload Invoice (PDF or Image)",
253
+ file_types=[".pdf", ".png", ".jpg", ".jpeg"]
254
+ )
255
+ extract_btn = gr.Button("Extract Data", variant="primary", size="lg")
256
+
257
+ gr.Markdown("""
258
+ ### Supported Formats:
259
+ - PDF documents
260
+ - PNG, JPG, JPEG images
261
+ - English and Hindi text
262
+ """)
263
+
264
+ with gr.Column():
265
+ output_json = gr.Code(
266
+ label="Extracted Data (JSON)",
267
+ language="json",
268
+ lines=25
269
+ )
270
+
271
+ gr.Markdown("""
272
+ ### Output Structure:
273
+ ```json
274
+ {
275
+ "header": {
276
+ "supplier_name": "...",
277
+ "supplier_pincode": "...",
278
+ "gstin": "...",
279
+ "contact_no": "...",
280
+ "invoice_no": "...",
281
+ "invoice_date": "..."
282
+ },
283
+ "details": [
284
+ {
285
+ "item_name": "...",
286
+ "hsn": "...",
287
+ "qty": 0,
288
+ "unit": "...",
289
+ "rate": 0,
290
+ "discount": 0,
291
+ "gst_percent": 0,
292
+ "gross_amount": 0,
293
+ "taxable_amount": 0,
294
+ "gst_amount": 0,
295
+ "total_amount": 0
296
+ }
297
+ ],
298
+ "footer": {
299
+ "total_gross": 0,
300
+ "total_taxable": 0,
301
+ "total_gst": 0,
302
+ "grand_total": 0
303
+ }
304
  }
305
+ ```
306
+
307
+ ---
308
+
309
+ ### API Usage:
310
+
311
+ **Python Client:**
312
+ ```python
313
+ from gradio_client import Client
314
+
315
+ client = Client("http://localhost:7860")
316
+ result = client.predict(
317
+ file="path/to/invoice.pdf",
318
+ api_name="/predict"
319
+ )
320
+ print(result)
321
+ ```
322
+
323
+ **cURL:**
324
+ ```bash
325
+ curl -X POST http://localhost:7860/api/predict \\
326
+ -F "file=@invoice.pdf"
327
+ ```
328
+ """)
329
+
330
+ extract_btn.click(
331
+ fn=extract_invoice_data,
332
+ inputs=[file_input],
333
+ outputs=[output_json]
334
+ )
335
+
336
+ # Example usage
337
+ gr.Examples(
338
+ examples=[],
339
+ inputs=[file_input],
340
+ outputs=[output_json],
341
+ fn=extract_invoice_data,
342
+ cache_examples=False
343
+ )
344
 
345
+ if __name__ == "__main__":
346
+ demo.launch(
347
+ server_name="0.0.0.0",
348
+ server_port=7860,
349
+ share=False,
350
+ show_api=True
351
+ )