jithenderchoudary commited on
Commit
0991dae
·
verified ·
1 Parent(s): b01050a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py CHANGED
@@ -1,6 +1,84 @@
1
  import gradio as gr
2
  from utils.cad_operations import create_cad_model
3
  from utils.toolpath_generation import generate_toolpath, generate_gcode
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  # Step 1: Collect user input parameters
6
  def cnc_workflow(length, width, height, material, tool_size, operation_type):
 
1
  import gradio as gr
2
  from utils.cad_operations import create_cad_model
3
  from utils.toolpath_generation import generate_toolpath, generate_gcode
4
+ import cv2
5
+ import pytesseract
6
+ import pandas as pd
7
+ from PIL import Image
8
+ import re
9
+
10
+ # Path to your Tesseract executable
11
+ pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
12
+
13
+ # Function to preprocess the image (grayscale, thresholding, etc.)
14
+ def preprocess_image(image_path):
15
+ # Read the image using OpenCV
16
+ img = cv2.imread(image_path)
17
+
18
+ # Convert to grayscale
19
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
20
+
21
+ # Apply thresholding
22
+ _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
23
+
24
+ # Optional: Remove noise (you can experiment with different kernels)
25
+ kern = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
26
+ processed_image = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kern)
27
+
28
+ return processed_image
29
+
30
+ # Function to extract text from image using OCR
31
+ def extract_text_from_image(image):
32
+ # Use pytesseract to do OCR on the preprocessed image
33
+ text = pytesseract.image_to_string(image)
34
+ return text
35
+
36
+ # Function to extract invoice details using regular expressions
37
+ def extract_invoice_details(text):
38
+ # Example patterns (you can modify them based on the format of your invoices)
39
+ invoice_number = re.search(r'Invoice Number[:\s]*([A-Za-z0-9]+)', text)
40
+ customer_name = re.search(r'Customer Name[:\s]*([\w\s]+)', text)
41
+ date = re.search(r'Date[:\s]*([\d/]+)', text)
42
+ total_amount = re.search(r'Total Amount[:\s]*([\d,]+)', text)
43
+
44
+ # Extract matched values or set to 'N/A' if not found
45
+ invoice_details = {
46
+ 'Invoice Number': invoice_number.group(1) if invoice_number else 'N/A',
47
+ 'Customer Name': customer_name.group(1) if customer_name else 'N/A',
48
+ 'Date': date.group(1) if date else 'N/A',
49
+ 'Total Amount': total_amount.group(1) if total_amount else 'N/A',
50
+ }
51
+ return invoice_details
52
+
53
+ # Function to save extracted data to Excel
54
+ def save_to_excel(data, output_file):
55
+ # Convert the data to a pandas DataFrame and save it to an Excel file
56
+ df = pd.DataFrame(data)
57
+ df.to_excel(output_file, index=False)
58
+
59
+ def main():
60
+ # Path to the invoice image
61
+ image_path = 'path_to_your_invoice_image.jpg'
62
+
63
+ # Preprocess the image
64
+ preprocessed_image = preprocess_image(image_path)
65
+
66
+ # Extract text from the image using OCR
67
+ extracted_text = extract_text_from_image(preprocessed_image)
68
+
69
+ # Extract invoice details using regex
70
+ invoice_details = extract_invoice_details(extracted_text)
71
+
72
+ # Save extracted data to an Excel file
73
+ output_file = 'extracted_invoice_data.xlsx'
74
+ save_to_excel([invoice_details], output_file)
75
+
76
+ print(f"Data extracted and saved to {output_file}")
77
+
78
+ # Run the script
79
+ if __name__ == "__main__":
80
+ main()
81
+
82
 
83
  # Step 1: Collect user input parameters
84
  def cnc_workflow(length, width, height, material, tool_size, operation_type):