api-olmocr-api

Sleeping

App Files Files Community

abinash73 commited on Nov 15, 2025

Commit

b4e5c22

verified ·

1 Parent(s): 1938489

Update app.py

Browse files

Files changed (1) hide show

app.py +331 -161

app.py CHANGED Viewed

@@ -1,181 +1,351 @@
-<?php
-// ============================================
-// 1. Updated submit_resume.php with FCM Integration
-// ============================================
-header('Content-Type: application/json');
-// Database configuration
-$host = 'localhost';
-$dbname = 'asecurit_db';
-$username = 'asecurit_root';
-$password = 'Pass098&6';
-// Response array
-$response = ['success' => false, 'message' => ''];
-try {
-    // Validate request method
-    if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
-        throw new Exception('Invalid request method');
-    }
-    // Get form data
-    $name = trim($_POST['name'] ?? '');
-    $email = trim($_POST['email'] ?? '');
-    $phone = trim($_POST['phone'] ?? '');
-    $position = trim($_POST['position'] ?? '');
-    $address = trim($_POST['address'] ?? '');
-    $city = trim($_POST['city'] ?? '');
-    $state = trim($_POST['state'] ?? '');
-    $pincode = trim($_POST['pincode'] ?? '');
-    $experience = trim($_POST['experience'] ?? '');
-    $available_from = trim($_POST['available_from'] ?? '');
-    $cover = trim($_POST['cover'] ?? '');
-    // Validate required fields
-    if (empty($name) || empty($email) || empty($phone)) {
-        throw new Exception('Please provide name, email, and phone number');
-    }
-    // Validate email
-    if (!filter_var($email, FILTER_VALIDATE_EMAIL)) {
-        throw new Exception('Invalid email address');
-    }
-    // Handle file upload
-    $resumePath = null;
-    if (isset($_FILES['resume']) && $_FILES['resume']['error'] === UPLOAD_ERR_OK) {
-        $file = $_FILES['resume'];
-        $fileName = $file['name'];
-        $fileTmpName = $file['tmp_name'];
-        $fileSize = $file['size'];
-        // Get file extension
-        $fileExt = strtolower(pathinfo($fileName, PATHINFO_EXTENSION));
-        $allowedExt = ['pdf', 'doc', 'docx'];
-        // Validate file
-        if (!in_array($fileExt, $allowedExt)) {
-            throw new Exception('Invalid file type. Only PDF, DOC, and DOCX files are allowed');
-        }
-        if ($fileSize > 5242880) { // 5MB in bytes
-            throw new Exception('File size exceeds 5MB limit');
-        }
-        // Set upload directory to /filemanager-admin/uploaded-files/resumes
-        // Use document root to get the absolute path to the main folder
-        $documentRoot = $_SERVER['DOCUMENT_ROOT'];
-        $uploadDir = $documentRoot . '/filemanager-admin/uploaded-files/resumes/';
-        // Create directory if it doesn't exist
-        if (!file_exists($uploadDir)) {
-            if (!mkdir($uploadDir, 0755, true)) {
-                throw new Exception('Failed to create upload directory');
-            }
-        }
-        // Verify directory is writable
-        if (!is_writable($uploadDir)) {
-            throw new Exception('Upload directory is not writable');
-        }
-        // Generate unique filename
-        $newFileName = uniqid('resume_', true) . '_' . preg_replace('/[^a-zA-Z0-9._-]/', '', $fileName);
-        $uploadPath = $uploadDir . $newFileName;
-        // Move uploaded file
-        if (!move_uploaded_file($fileTmpName, $uploadPath)) {
-            throw new Exception('Failed to upload resume file');
-        }
-        // Store relative path in database for easier portability
-        $resumePath = 'filemanager-admin/uploaded-files/resumes/' . $newFileName;
-        // Set file permissions
-        chmod($uploadPath, 0644);
     }
-    // Connect to database
-    $conn = new PDO("mysql:host=$host;dbname=$dbname;charset=utf8mb4", $username, $password);
-    $conn->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
-    // Prepare SQL statement
-    $sql = "INSERT INTO resume (
-        Name, mobile, email_id, applyingfor, address1, city, state, pin,
-        yearofexp, available_from, cover_letter, resume
-    ) VALUES (
-        :name, :mobile, :email, :position, :address, :city, :state, :pin,
-        :experience, :available_from, :cover, :resume
-    )";
-    $stmt = $conn->prepare($sql);
-    // Bind parameters
-    $stmt->bindParam(':name', $name);
-    $stmt->bindParam(':mobile', $phone);
-    $stmt->bindParam(':email', $email);
-    $stmt->bindParam(':position', $position);
-    $stmt->bindParam(':address', $address);
-    $stmt->bindParam(':city', $city);
-    $stmt->bindParam(':state', $state);
-    $stmt->bindParam(':pin', $pincode);
-    $stmt->bindParam(':experience', $experience);
-    $stmt->bindParam(':available_from', $available_from);
-    $stmt->bindParam(':cover', $cover);
-    $stmt->bindParam(':resume', $resumePath);
-    // Execute statement
-    if ($stmt->execute()) {
-        // ✅ INSERT FILE RECORD INTO files TABLE
-        if ($resumePath) {
-            try {
-                $fileInsertSql = "INSERT INTO files (parent_id, user_id, name, created_at)
-                                  VALUES (:parent_id, :user_id, :name, NOW())";
-                $fileStmt = $conn->prepare($fileInsertSql);
-                $fileStmt->bindValue(':parent_id', 5, PDO::PARAM_INT);
-                $fileStmt->bindValue(':user_id', 1, PDO::PARAM_INT);
-                $fileStmt->bindValue(':name', $newFileName, PDO::PARAM_STR);
-                $fileStmt->execute();
-                error_log('File record inserted into files table: ' . $newFileName);
-            } catch (PDOException $e) {
-                // Log error but don't fail the entire operation
-                error_log('Failed to insert file record: ' . $e->getMessage());
-            }
         }
-        $response['success'] = true;
-        $response['message'] = 'Resume submitted successfully. Our HR team will review and contact you if shortlisted.';
-        // ✅ SEND NOTIFICATION TO ALL DEVICES
-        require_once 'fcm_helper.php';
-        $fcmHelper = new FCMHelper($conn);
-        // Send to all HR and Admin users
-        $notificationResult = $fcmHelper->sendNewApplicationNotification(
-            $name,
-            $position,
-            $phone,
-            $email
-        );
-        // Log notification result
-        error_log('FCM Notification sent: ' . ($notificationResult ? 'Success' : 'Failed'));
-    } else {
-        throw new Exception('Failed to save data to database');
-    }
-} catch (PDOException $e) {
-    $response['message'] = 'Database error: ' . $e->getMessage();
-    error_log('Database error: ' . $e->getMessage());
-} catch (Exception $e) {
-    $response['message'] = $e->getMessage();
-} finally {
-    if (isset($conn)) {
-        $conn = null;
     }
-}
-echo json_encode($response);
-?>

+import gradio as gr
+import json
+import re
+from datetime import datetime
+from paddleocr import PaddleOCR
+from PIL import Image
+import pdf2image
+import numpy as np
+# Initialize PaddleOCR
+ocr = PaddleOCR(use_textline_orientation=True, lang='en', show_log=False)
+def extract_text_from_image(image):
+    """Extract text from image using PaddleOCR"""
+    if isinstance(image, Image.Image):
+        image = np.array(image)
+    result = ocr.ocr(image, cls=True)
+    # Extract text with coordinates
+    text_blocks = []
+    for line in result[0]:
+        bbox = line[0]
+        text = line[1][0]
+        confidence = line[1][1]
+        # Calculate center point for positioning
+        y_center = (bbox[0][1] + bbox[2][1]) / 2
+        x_center = (bbox[0][0] + bbox[2][0]) / 2
+        text_blocks.append({
+            'text': text,
+            'y': y_center,
+            'x': x_center,
+            'confidence': confidence
+        })
+    return text_blocks
+def pdf_to_images(pdf_file):
+    """Convert PDF to images"""
+    images = pdf2image.convert_from_path(pdf_file)
+    return images
+def extract_gstin(text):
+    """Extract GSTIN using pattern matching"""
+    gstin_pattern = r'\d{2}[A-Z]{5}\d{4}[A-Z]{1}[A-Z\d]{1}[Z]{1}[A-Z\d]{1}'
+    match = re.search(gstin_pattern, text)
+    return match.group(0) if match else None
+def extract_pincode(text):
+    """Extract 6-digit PIN code"""
+    pincode_pattern = r'\b\d{6}\b'
+    match = re.search(pincode_pattern, text)
+    return match.group(0) if match else None
+def extract_mobile(text):
+    """Extract mobile number"""
+    mobile_pattern = r'\b[6-9]\d{9}\b'
+    match = re.search(mobile_pattern, text)
+    return match.group(0) if match else None
+def extract_date(text):
+    """Extract date in various formats"""
+    date_patterns = [
+        r'\d{2}[-/]\d{2}[-/]\d{4}',
+        r'\d{2}[-/]\d{2}[-/]\d{2}',
+        r'\d{4}[-/]\d{2}[-/]\d{2}'
+    ]
+    for pattern in date_patterns:
+        match = re.search(pattern, text)
+        if match:
+            return match.group(0)
+    return None
+def extract_invoice_number(text_blocks):
+    """Extract invoice/bill number"""
+    for block in text_blocks:
+        text = block['text']
+        if re.search(r'(invoice|bill)\s*(no|number|#)', text.lower()):
+            # Look for number in same or next block
+            number_match = re.search(r'[A-Z0-9/-]+', text)
+            if number_match:
+                return number_match.group(0)
+    return None
+def extract_amounts(text):
+    """Extract monetary amounts"""
+    amount_pattern = r'₹?\s*(\d+(?:,\d+)*(?:\.\d{2})?)'
+    amounts = re.findall(amount_pattern, text)
+    return [float(amt.replace(',', '')) for amt in amounts]
+def find_header_info(text_blocks):
+    """Extract header information (supplier details)"""
+    all_text = ' '.join([block['text'] for block in text_blocks])
+    header = {
+        "supplier_name": None,
+        "supplier_pincode": extract_pincode(all_text),
+        "gstin": extract_gstin(all_text),
+        "contact_no": extract_mobile(all_text),
+        "invoice_no": extract_invoice_number(text_blocks),
+        "invoice_date": extract_date(all_text)
+    }
+    # Extract supplier name (usually first few lines)
+    top_blocks = sorted(text_blocks, key=lambda x: x['y'])[:5]
+    supplier_name_candidates = []
+    for block in top_blocks:
+        text = block['text'].strip()
+        if len(text) > 3 and not re.match(r'^[\d\s.,]+$', text):
+            supplier_name_candidates.append(text)
+    if supplier_name_candidates:
+        header['supplier_name'] = supplier_name_candidates[0]
+    return header
+def find_line_items(text_blocks):
+    """Extract line items from invoice"""
+    # Sort blocks by Y coordinate
+    sorted_blocks = sorted(text_blocks, key=lambda x: x['y'])
+    items = []
+    current_item = {}
+    # Simple heuristic: Look for patterns
+    for i, block in enumerate(sorted_blocks):
+        text = block['text'].strip()
+        # Look for HSN codes (6 or 8 digits)
+        hsn_match = re.search(r'\b\d{4,8}\b', text)
+        if hsn_match and not current_item.get('hsn'):
+            current_item['hsn'] = hsn_match.group(0)
+        # Look for quantities
+        qty_match = re.search(r'\b(\d+(?:\.\d+)?)\s*(pcs|nos|kg|ltr|box|unit)?', text.lower())
+        if qty_match and not current_item.get('qty'):
+            current_item['qty'] = float(qty_match.group(1))
+            current_item['unit'] = qty_match.group(2) if qty_match.group(2) else 'Nos'
+        # Look for rates/amounts
+        amount_matches = re.findall(r'₹?\s*(\d+(?:,\d+)*(?:\.\d{2})?)', text)
+        if amount_matches:
+            amounts = [float(amt.replace(',', '')) for amt in amount_matches]
+            if not current_item.get('rate') and len(amounts) > 0:
+                current_item['rate'] = amounts[0]
+        # Look for GST percentages
+        gst_match = re.search(r'(\d+(?:\.\d+)?)\s*%', text)
+        if gst_match and not current_item.get('gst_percent'):
+            current_item['gst_percent'] = float(gst_match.group(1))
+        # If we have enough info, save item
+        if len(current_item) >= 3:
+            if 'item_name' not in current_item:
+                current_item['item_name'] = text[:50]
+            items.append({
+                'item_name': current_item.get('item_name', 'Item'),
+                'hsn': current_item.get('hsn', ''),
+                'qty': current_item.get('qty', 0),
+                'unit': current_item.get('unit', 'Nos'),
+                'rate': current_item.get('rate', 0),
+                'discount': current_item.get('discount', 0),
+                'gst_percent': current_item.get('gst_percent', 0)
+            })
+            current_item = {}
+    return items
+def calculate_totals(items):
+    """Calculate totals from line items"""
+    total_gross = 0
+    total_taxable = 0
+    total_gst = 0
+    for item in items:
+        qty = item.get('qty', 0)
+        rate = item.get('rate', 0)
+        discount = item.get('discount', 0)
+        gst_percent = item.get('gst_percent', 0)
+        gross = qty * rate
+        taxable = gross - discount
+        gst_amount = (taxable * gst_percent) / 100
+        item['gross_amount'] = round(gross, 2)
+        item['taxable_amount'] = round(taxable, 2)
+        item['gst_amount'] = round(gst_amount, 2)
+        item['total_amount'] = round(taxable + gst_amount, 2)
+        total_gross += gross
+        total_taxable += taxable
+        total_gst += gst_amount
+    return {
+        'total_gross': round(total_gross, 2),
+        'total_taxable': round(total_taxable, 2),
+        'total_gst': round(total_gst, 2),
+        'grand_total': round(total_taxable + total_gst, 2)
     }
+def extract_invoice_data(file):
+    """Main function to extract all invoice data"""
+    try:
+        # Convert PDF to image if needed
+        if file.name.lower().endswith('.pdf'):
+            images = pdf_to_images(file.name)
+            image = images[0]  # Process first page
+        else:
+            image = Image.open(file.name)
+        # Extract text with OCR
+        text_blocks = extract_text_from_image(image)
+        # Extract different sections
+        header = find_header_info(text_blocks)
+        details = find_line_items(text_blocks)
+        footer = calculate_totals(details)
+        # Build final JSON structure
+        result = {
+            "header": header,
+            "details": details,
+            "footer": footer
         }
+        return json.dumps(result, indent=2, ensure_ascii=False)
+    except Exception as e:
+        return json.dumps({
+            "error": str(e),
+            "message": "Failed to process invoice"
+        }, indent=2)
+# Create Gradio Interface
+with gr.Blocks(title="Purchase Invoice Data Extraction", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🧾 Purchase Invoice Data Extraction API
+    Upload purchase invoices (PDF or Image) to automatically extract structured data including:
+    - Supplier details (Name, PIN, GSTIN, Contact)
+    - Invoice information (Number, Date)
+    - Line items (Name, HSN, Qty, Rate, Discounts, GST%)
+    - Calculated totals (Gross, Taxable, Tax, Grand Total)
+    """)
+    with gr.Row():
+        with gr.Column():
+            file_input = gr.File(
+                label="Upload Invoice (PDF or Image)",
+                file_types=[".pdf", ".png", ".jpg", ".jpeg"]
+            )
+            extract_btn = gr.Button("Extract Data", variant="primary", size="lg")
+            gr.Markdown("""
+            ### Supported Formats:
+            - PDF documents
+            - PNG, JPG, JPEG images
+            - English and Hindi text
+            """)
+        with gr.Column():
+            output_json = gr.Code(
+                label="Extracted Data (JSON)",
+                language="json",
+                lines=25
+            )
+    gr.Markdown("""
+    ### Output Structure:
+    ```json
+    {
+      "header": {
+        "supplier_name": "...",
+        "supplier_pincode": "...",
+        "gstin": "...",
+        "contact_no": "...",
+        "invoice_no": "...",
+        "invoice_date": "..."
+      },
+      "details": [
+        {
+          "item_name": "...",
+          "hsn": "...",
+          "qty": 0,
+          "unit": "...",
+          "rate": 0,
+          "discount": 0,
+          "gst_percent": 0,
+          "gross_amount": 0,
+          "taxable_amount": 0,
+          "gst_amount": 0,
+          "total_amount": 0
+        }
+      ],
+      "footer": {
+        "total_gross": 0,
+        "total_taxable": 0,
+        "total_gst": 0,
+        "grand_total": 0
+      }
     }
+    ```
+    ---
+    ### API Usage:
+    **Python Client:**
+    ```python
+    from gradio_client import Client
+    client = Client("http://localhost:7860")
+    result = client.predict(
+        file="path/to/invoice.pdf",
+        api_name="/predict"
+    )
+    print(result)
+    ```
+    **cURL:**
+    ```bash
+    curl -X POST http://localhost:7860/api/predict \\
+      -F "file=@invoice.pdf"
+    ```
+    """)
+    extract_btn.click(
+        fn=extract_invoice_data,
+        inputs=[file_input],
+        outputs=[output_json]
+    )
+    # Example usage
+    gr.Examples(
+        examples=[],
+        inputs=[file_input],
+        outputs=[output_json],
+        fn=extract_invoice_data,
+        cache_examples=False
+    )
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_api=True
+    )