File size: 5,709 Bytes
d79b7f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import sys
sys.path.append('src')  # So Python can find our modules

from preprocessing import load_image, convert_to_grayscale, remove_noise, binarize, preprocess_pipeline
import numpy as np
import matplotlib.pyplot as plt

# Test 1: Load a valid image
print("Test 1: Loading receipt1.jpg...")
image = load_image('data/raw/receipt1.jpg')
print(f"✅ Success! Image shape: {image.shape}")
print(f"   Data type: {image.dtype}")
print(f"   Value range: {image.min()} to {image.max()}")

# Test 2: Visualize it
print("\nTest 2: Displaying image...")
plt.imshow(image)
plt.title("Loaded Receipt")
plt.axis('off')
plt.show()
print("✅ If you see the receipt image, it worked!")

# Test 3: Try loading non-existent file
print("\nTest 3: Testing error handling...")
try:
    load_image('data/raw/fake_image.jpg')
    print("❌ Should have raised FileNotFoundError!")
except FileNotFoundError as e:
    print(f"✅ Correctly raised error: {e}")

# Test 4: Grayscale conversion
print("\nTest 4: Converting to grayscale...")
gray = convert_to_grayscale(image)
print(f"✅ Success! Grayscale shape: {gray.shape}")
print(f"   Original had 3 channels, now has: {len(gray.shape)} dimensions")

# Visualize side-by-side
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
ax1.imshow(image)
ax1.set_title("Original (RGB)")
ax1.axis('off')

ax2.imshow(gray, cmap='gray')  # cmap='gray' tells matplotlib to display in grayscale
ax2.set_title("Grayscale")
ax2.axis('off')

plt.tight_layout()
plt.show()

# Test 5: Already grayscale (should return as-is)
print("\nTest 5: Converting already-grayscale image...")
gray_again = convert_to_grayscale(gray)
print(f"✅ Returned without error: {gray_again.shape}")
assert gray_again is gray, "Should return same object if already grayscale"
print("✅ Correctly returned the same image!")

print("\n🎉 Grayscale tests passed!")

# Test 6: Binarization - Simple method
print("\nTest 6: Simple binarization...")
binary_simple = binarize(gray, method='simple')
print(f"✅ Success! Binary shape: {binary_simple.shape}")
print(f"   Unique values: {np.unique(binary_simple)}")  # Should be [0, 255]

# Test 7: Binarization - Adaptive method
print("\nTest 7: Adaptive binarization...")
binary_adaptive = binarize(gray, method='adaptive', block_size=11, C=2)
print(f"✅ Success! Binary shape: {binary_adaptive.shape}")
print(f"   Unique values: {np.unique(binary_adaptive)}")

# Visualize comparison
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

axes[0, 0].imshow(image)
axes[0, 0].set_title("1. Original (RGB)")
axes[0, 0].axis('off')

axes[0, 1].imshow(gray, cmap='gray')
axes[0, 1].set_title("2. Grayscale")
axes[0, 1].axis('off')

axes[1, 0].imshow(binary_simple, cmap='gray')
axes[1, 0].set_title("3. Simple Threshold")
axes[1, 0].axis('off')

axes[1, 1].imshow(binary_adaptive, cmap='gray')
axes[1, 1].set_title("4. Adaptive Threshold")
axes[1, 1].axis('off')

plt.tight_layout()
plt.show()

# Test 8: Error handling
print("\nTest 8: Testing error handling...")
try:
    binarize(image, method='adaptive')  # RGB image (3D) should fail
    print("❌ Should have raised ValueError!")
except ValueError as e:
    print(f"✅ Correctly raised error: {e}")

print("\n🎉 Binarization tests passed!")

# Test 9: Noise removal
print("\nTest 9: Noise removal...")
denoised = remove_noise(gray, kernel_size=3)
print(f"✅ Success! Denoised shape: {denoised.shape}")

# Test different kernel sizes
denoised_light = remove_noise(gray, kernel_size=3)
denoised_heavy = remove_noise(gray, kernel_size=7)

# Visualize comparison
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

axes[0].imshow(gray, cmap='gray')
axes[0].set_title("Original Grayscale")
axes[0].axis('off')

axes[1].imshow(denoised_light, cmap='gray')
axes[1].set_title("Denoised (kernel=3)")
axes[1].axis('off')

axes[2].imshow(denoised_heavy, cmap='gray')
axes[2].set_title("Denoised (kernel=7)")
axes[2].axis('off')

plt.tight_layout()
plt.show()
print("   Notice: kernel=7 is blurrier but removes more noise")

# Test 10: Error handling
print("\nTest 10: Noise removal error handling...")
try:
    remove_noise(gray, kernel_size=4)  # Even number
    print("❌ Should have raised ValueError!")
except ValueError as e:
    print(f"✅ Correctly raised error: {e}")

print("\n🎉 Noise removal tests passed!")

# Test 11: Full pipeline
print("\nTest 11: Full preprocessing pipeline...")

# Test with all steps
full_processed = preprocess_pipeline(image, 
                                     steps=['grayscale', 'denoise', 'binarize'],
                                     denoise_kernel=3,
                                     binarize_method='adaptive')
print(f"✅ Full pipeline success! Shape: {full_processed.shape}")

# Test with selective steps (your clean images)
clean_processed = preprocess_pipeline(image, 
                                      steps=['grayscale', 'binarize'],
                                      binarize_method='adaptive')
print(f"✅ Clean pipeline success! Shape: {clean_processed.shape}")

# Visualize comparison
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

axes[0].imshow(image)
axes[0].set_title("Original")
axes[0].axis('off')

axes[1].imshow(full_processed, cmap='gray')
axes[1].set_title("Full Pipeline\n(grayscale → denoise → binarize)")
axes[1].axis('off')

axes[2].imshow(clean_processed, cmap='gray')
axes[2].set_title("Clean Pipeline\n(grayscale → binarize)")
axes[2].axis('off')

plt.tight_layout()
plt.show()

print("\n🎉 Pipeline tests passed!")

print("\n🎉 All tests passed!")