GiantAnalytics commited on
Commit
ea8ed28
·
verified ·
1 Parent(s): 15e13e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -9
app.py CHANGED
@@ -23,6 +23,7 @@ logger = logging.getLogger(__name__)
23
 
24
  # Download and cache the font file
25
  def get_font():
 
26
  try:
27
  font_path = Path("Roboto-Regular.ttf")
28
  if not font_path.exists():
@@ -34,33 +35,44 @@ def get_font():
34
  logger.error(f"Error in get_font: {str(e)}")
35
  return None
36
 
37
- # Initialize EasyOCR Reader for French
38
  try:
39
  reader = easyocr.Reader(['fr', 'en'], gpu=False)
40
  except Exception as e:
41
  logger.error(f"Error initializing EasyOCR: {str(e)}")
42
 
43
  def ocr_extract_text_and_tables(image):
 
44
  try:
45
  if image is None:
46
  return "No image provided", "No image provided", None
47
 
48
- # Convert to RGB if needed
 
 
 
 
49
  if len(image.shape) == 3 and image.shape[2] == 4:
50
  image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
51
-
 
 
52
  # Convert to grayscale for better OCR
53
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
54
 
55
  # Apply adaptive thresholding
56
  processed = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
57
 
58
- # 1. Extract general text using EasyOCR
59
  results = reader.readtext(processed)
60
  detected_text = [f"{text} (Confidence: {confidence:.2f})" for _, text, confidence in results]
61
 
62
- # 2. Use img2table for structured table extraction
63
- img = Img2TableImage(image)
 
 
 
 
64
  ocr = TesseractOCR(lang="fra")
65
  tables = img.extract_tables(ocr=ocr)
66
 
@@ -71,7 +83,7 @@ def ocr_extract_text_and_tables(image):
71
  for i, df in enumerate(table_data):
72
  df.to_csv(f"extracted_table_{i+1}.csv", index=False)
73
 
74
- # Annotate image with bounding boxes around detected text
75
  pil_image = Image.fromarray(image)
76
  draw = ImageDraw.Draw(pil_image)
77
 
@@ -91,10 +103,10 @@ def ocr_extract_text_and_tables(image):
91
  except Exception as e:
92
  return f"Error: {str(e)}", "Processing failed", None
93
 
94
- # Create Gradio interface
95
  iface = gr.Interface(
96
  fn=ocr_extract_text_and_tables,
97
- inputs=gr.Image(type="numpy", label="Upload Image"),
98
  outputs=[
99
  gr.Textbox(label="Extracted Text (French)"),
100
  gr.Textbox(label="Extracted Tables"),
 
23
 
24
  # Download and cache the font file
25
  def get_font():
26
+ """Download font for annotation if not available."""
27
  try:
28
  font_path = Path("Roboto-Regular.ttf")
29
  if not font_path.exists():
 
35
  logger.error(f"Error in get_font: {str(e)}")
36
  return None
37
 
38
+ # Initialize EasyOCR Reader for French & English
39
  try:
40
  reader = easyocr.Reader(['fr', 'en'], gpu=False)
41
  except Exception as e:
42
  logger.error(f"Error initializing EasyOCR: {str(e)}")
43
 
44
  def ocr_extract_text_and_tables(image):
45
+ """Extract text and tables from an image."""
46
  try:
47
  if image is None:
48
  return "No image provided", "No image provided", None
49
 
50
+ # Ensure image is in the correct format
51
+ if isinstance(image, Image.Image):
52
+ image = np.array(image, dtype=np.uint8) # Convert PIL to numpy
53
+
54
+ # If image has an alpha channel (RGBA), convert to RGB
55
  if len(image.shape) == 3 and image.shape[2] == 4:
56
  image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
57
+ elif len(image.shape) == 2: # If grayscale, convert to BGR
58
+ image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
59
+
60
  # Convert to grayscale for better OCR
61
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
62
 
63
  # Apply adaptive thresholding
64
  processed = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
65
 
66
+ # 1️⃣ Extract General Text using EasyOCR
67
  results = reader.readtext(processed)
68
  detected_text = [f"{text} (Confidence: {confidence:.2f})" for _, text, confidence in results]
69
 
70
+ # 2️⃣ Save image to a temporary file for `img2table`
71
+ temp_image_path = "temp_table_image.jpg"
72
+ cv2.imwrite(temp_image_path, image)
73
+
74
+ # 3️⃣ Use img2table for structured table extraction
75
+ img = Img2TableImage(temp_image_path) # Use file path instead of np.ndarray
76
  ocr = TesseractOCR(lang="fra")
77
  tables = img.extract_tables(ocr=ocr)
78
 
 
83
  for i, df in enumerate(table_data):
84
  df.to_csv(f"extracted_table_{i+1}.csv", index=False)
85
 
86
+ # 4️⃣ Annotate Image with Bounding Boxes for Detected Text
87
  pil_image = Image.fromarray(image)
88
  draw = ImageDraw.Draw(pil_image)
89
 
 
103
  except Exception as e:
104
  return f"Error: {str(e)}", "Processing failed", None
105
 
106
+ # Create Gradio Interface
107
  iface = gr.Interface(
108
  fn=ocr_extract_text_and_tables,
109
+ inputs=gr.Image(type="pil", label="Upload Image"), # Ensures PIL image input
110
  outputs=[
111
  gr.Textbox(label="Extracted Text (French)"),
112
  gr.Textbox(label="Extracted Tables"),