Spaces:

LTPhat
/

SudokuSolver

Runtime error

App Files Files Community

LTPhat commited on Jan 29, 2024

Commit

1f1fc6b

1 Parent(s): 954d8ce

code

Browse files

Files changed (13) hide show

create_dataset/create_fontstyle.py +34 -0
create_dataset/data_generation_sample.py +24 -0
create_dataset/digital_mnist_digits.py +74 -0
helper_number_page.py +132 -0
image_solver.py +72 -0
model/evaluation.py +64 -0
model/get_model.py +50 -0
model/train_classifier.py +110 -0
processing.py +241 -0
realtime_solver.py +89 -0
sudoku_solve.py +85 -0
threshold.py +32 -0
utils.py +192 -0

create_dataset/create_fontstyle.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import glob
+import random
+from PIL import Image, ImageDraw, ImageFont
+font_folder = 'font'
+font_name = ['arial', 'bodoni','calibri','futura','heveltica','times-new-roman']
+def fontstyle_list(font_folder, font_name):
+    font_list = []
+    for i in font_name:
+        font_dir = glob.glob(font_folder + "\\"+ i +"\\*.ttf")
+        for j in font_dir:
+            font_list.append(j)
+    return font_list
+def draw_img(label, font_list):
+    img = Image.new('L', (256, 256))
+    size = random.randint(150, 250)
+    x = random.randint(60, 90)
+    y = random.randint(30, 60)
+    draw = ImageDraw.Draw(img)
+    # font = ImageFont.truetype(, )
+    font = ImageFont.truetype(font_list[0], size)
+    draw.text((x, y), str(label), (200),font=font)
+    img = img.resize((28, 28), Image.BILINEAR)
+    return img, label
+if __name__ == "__main__":
+    fonts = fontstyle_list(font_folder, font_name)
+    print(fonts)
+    print(len(fonts))

create_dataset/data_generation_sample.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from PIL import Image, ImageFont, ImageDraw
+import random
+import cv2
+import numpy as np
+#font size
+fonts = ['font\Arial\Arial.ttf', 'font\calibri\Calibri-Bold.ttf','font\heveltica\helvetica_bold.ttf','font\Times-new-roman\Times.ttf']
+img = Image.new('L', (256, 256))
+target = random.randint(0, 9)
+size = random.randint(150, 250)
+x = random.randint(60, 90)
+y = random.randint(30, 60)
+draw = ImageDraw.Draw(img)
+# font = ImageFont.truetype(, )
+font = ImageFont.truetype(fonts[random.randint(0,3)], size)
+draw.text((x, y), str(target), (200),font=font)
+img = img.resize((28, 28), Image.BILINEAR)
+img = np.array(img)
+cv2.imshow("Image", img)
+cv2.waitKey(0)

create_dataset/digital_mnist_digits.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from torch.utils.data import Dataset
+from create_fontstyle import fontstyle_list
+import torch
+from PIL import Image
+from PIL import ImageFont
+from PIL import ImageDraw
+import glob
+import random
+import os
+font_folder = 'font'
+font_name = ['arial', 'bodoni','calibri','futura','heveltica','times-new-roman']
+fonts = fontstyle_list(font_folder, font_name)
+class PrintedMNIST(Dataset):
+    """
+    Generate digital mnist dataset for digits recognition
+    """
+    def __init__(self, samples, random_state, transform = None):
+        self.samples = samples
+        self.random_state = random_state
+        self.transfrom = transform
+        self.fonts = fonts
+        random.seed(random_state)
+    def __len__(self):
+        return self.samples
+    def __getitem__(self, index):
+        color = random.randint(200,255)
+        #Generate image
+        img = Image.new("L",(256, 256))
+        label = random.randint(0,9)
+        size = random.randint(180, 220)
+        x = random.randint(60, 80)
+        y = random.randint(30, 60)
+        draw = ImageDraw.Draw(img)
+        #Choose random font style in font style list
+        font = ImageFont.truetype(random.choice(self.fonts), size)
+        draw.text((x,y), str(label), color, font = fonts)
+        img = img.resize((28,28), Image.BILINEAR)
+        if self.transfrom:
+            img = self.transfrom(img)
+        return img, label
+class AddSPNoise(object):
+    def __init__(self, prob):
+        self.prob = prob
+    def __call__(self, tensor):
+        sp = (torch.rand(tensor.size()) < self.prob) * tensor.max()
+        return tensor + sp
+    def __repr__(self):
+        return self.__class__.__name__ + "(prob={0})".format(self.prob)
+class AddGaussianNoise(object):
+    def __init__(self, mean=0.0, std=1.0):
+        self.mean = mean
+        self.std = std
+    def __call__(self, tensor):
+        return tensor + torch.randn(tensor.size()) * self.std + self.mean
+    def __repr__(self):
+        return self.__class__.__name__ + "(mean={0}, std={1})".format(
+            self.mean, self.std
+        )

helper_number_page.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import numpy as np
+import cv2
+from processing import *
+from utils import *
+from sudoku_solve import Sudoku_solver
+input_str = "000000000008236400010050020500000009100000007080000050005000200000807000000020000"
+input_str2 = "800010009050807010004090700060701020508060107010502090007040600080309040300050008"
+def draw_grid():
+    base_img = 1* np.ones((600,600,3))
+    width = base_img.shape[0] // 9
+    cv2.rectangle(base_img, (0,0), (base_img.shape[0], base_img.shape[1]), (0,0,0), 10)
+    for i in range(1,10):
+        if i % 3 == 0:
+            cv2.line(base_img, (i*width, 0), (i*width, base_img.shape[1]), (0,0,0), 6)
+            cv2.line(base_img, (0, i* width), (base_img.shape[0], i*width), (0,0,0), 6)
+        else:
+            cv2.line(base_img, (i*width, 0), (i*width, base_img.shape[1]), (0,0,0), 2)
+            cv2.line(base_img, (0, i* width), (base_img.shape[0], i*width), (0,0,0), 2)
+    return base_img
+def draw_digit(base_img, input_str):
+    width = base_img.shape[0] // 9
+    board = convert_str_to_board(input_str)
+    for j in range(9):
+        for i in range(9):
+            if board[j][i] !=0 : # Only draw new number to blank cell in warped image, avoid overlapping
+                p1 = (i * width, j * width)  # Top left corner of a bounding box
+                p2 = ((i + 1) * width, (j + 1) * width)  # Bottom right corner of bounding box
+                # Find the center of square to draw digit
+                center = ((p1[0] + p2[0]) // 2, (p1[1] + p2[1]) // 2)
+                text_size, _ = cv2.getTextSize(str(board[j][i]), cv2.FONT_HERSHEY_SIMPLEX, 1, 6)
+                text_origin = (center[0] - text_size[0] // 2, center[1] + text_size[1] // 2)
+                cv2.putText(base_img, str(board[j][i]),
+                            text_origin, cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 0), 6)
+    return base_img, board
+def solve(board):
+    unsolved_board = board.copy()
+    sudoku = Sudoku_solver(board, 9)
+    sudoku.solve()
+    res_board = sudoku.board
+    return res_board, unsolved_board
+def draw_result(base_img, unsolved_board, solved_board):
+    width = base_img.shape[0] // 9
+    for j in range(9):
+        for i in range(9):
+            p1 = (i * width, j * width)  # Top left corner of a bounding box
+            p2 = ((i + 1) * width, (j + 1) * width)  # Bottom right corner of bounding box
+            # Find the center of square to draw digit
+            center = ((p1[0] + p2[0]) // 2, (p1[1] + p2[1]) // 2)
+            text_size, _ = cv2.getTextSize(str(solved_board[j][i]), cv2.FONT_HERSHEY_SIMPLEX, 1, 6)
+            text_origin = (center[0] - text_size[0] // 2, center[1] + text_size[1] // 2)
+            if unsolved_board[j][i] != solved_board[j][i]:
+                cv2.putText(base_img, str(solved_board[j][i]),
+                        text_origin, cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 6)
+            else:
+                cv2.putText(base_img, str(solved_board[j][i]),
+                        text_origin, cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 0), 6)
+    return base_img
+### CHECK VALID SODOKU PUZZLE INPUT FROM USER
+def get_column(board, index):
+    return np.array([row[index] for row in board])
+def valid_row_or_col(array):
+    if np.all(array == 0) == True:
+        return True
+    return len(set(array[array!=0])) == len(list(array[array!=0]))
+def valid_single_box(board, box_x, box_y):
+    box = board[box_x*3 : box_x*3 + 3, box_y*3: box_y*3+3]
+    if len(list(box[box!=0])) == 0:
+        return True
+    return len(set(box[box!=0])) == len(list(box[box!=0]))
+def valid_input_str(input_str):
+    board = convert_str_to_board(input_str)
+    # Check valid row
+    for i in range(0,len(board)):
+        if valid_row_or_col(board[i]) == False:
+            return False
+    # Check valid column
+    for j in range(0, len(board[0])):
+        if valid_row_or_col(get_column(board, j)) == False:
+            return False
+    # Check valid box
+    for i in range(0, 3):
+        for j in range(0, 3):
+            if valid_single_box(board, i, j) == False:
+                return False
+    return True
+def valid_board(board):
+    # Check valid row
+    for i in range(0,len(board)):
+        if valid_row_or_col(board[i]) == False:
+            return False
+    # Check valid column
+    for j in range(0, len(board[0])):
+        if valid_row_or_col(get_column(board, j)) == False:
+            return False
+    # Check valid box
+    for i in range(0, 3):
+        for j in range(0, 3):
+            if valid_single_box(board, i, j) == False:
+                return False
+    return True
+if __name__ == "__main__":
+    base_img = draw_grid()
+    res_img = base_img.copy()
+    base_img, board = draw_digit(base_img, input_str)
+    cv2.imshow("IMG", base_img)
+    cv2.waitKey(0)
+    res_board, unsolved_board = solve(board)
+    res_img = draw_result(res_img, unsolved_board, res_board)
+    cv2.imshow("Show result", res_img)
+    cv2.waitKey(0)
+    res = valid_input_str(input_str2)
+    print(res)

image_solver.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import numpy as np
+import torch
+from utils import *
+from processing import *
+from threshold import preprocess
+import time
+import cv2
+from sudoku_solve import Sudoku_solver
+import matplotlib.pyplot as plt
+# This module performs sudoku solver which input is a image file.
+classifier = torch.load('digit_model.h5',map_location ='cpu')
+classifier.eval()
+def image_solver(img, model):
+    original_img = img.copy()
+    threshold = preprocess(img)
+    corners_img, corners_list, org_img = find_contours(threshold, img)
+    try:
+        # Warped original img
+        warped, matrix = warp_image(corners_list, corners_img)
+        # Threshold warped img
+        warped_processed = preprocess(warped) # warped_processed is gray-scaled img
+        #Get lines
+        horizontal = grid_line_helper(warped_processed, shape_location=0)
+        vertical = grid_line_helper(warped_processed, shape_location=1)
+        # Create mask
+        if img.shape[0] > 600 or img.shape[1] > 600:
+            # Resize will get better result ??
+            grid_mask = create_grid_mask(horizontal, vertical)
+            grid_mask = cv2.resize(grid_mask,(600,600), cv2.INTER_AREA)
+            number_img = cv2.bitwise_and(cv2.resize(warped_processed, (600,600), cv2.INTER_AREA), grid_mask)
+        else:
+            grid_mask = create_grid_mask(horizontal, vertical)
+            # Extract number
+            number_img = cv2.bitwise_and(warped_processed, grid_mask)
+        # Split into squares
+        squares = split_squares(number_img)
+        cleaned_squares = clean_square_all_images(squares)
+        # Resize and scale pixel
+        resized_list = resize_square(cleaned_squares)
+        norm_resized = normalize(resized_list)
+        # # Recognize digits
+        rec_str = recognize_digits(model, norm_resized, original_img)
+        board = convert_str_to_board(rec_str)
+        # Solve
+        unsolved_board = board.copy()
+        sudoku = Sudoku_solver(board, 9)
+        start_time = time.time()
+        sudoku.solve()
+        solved_board = sudoku.board
+        # Unwarp
+        _, warp_with_nums = draw_digits_on_warped(warped, solved_board, unsolved_board)
+        dst_img = unwarp_image(warp_with_nums, corners_img, corners_list, time.time() - start_time)
+        return dst_img, solved_board
+    except TypeError:
+        print("Can not warp image. Please try another image")
+if __name__ == "__main__":
+    url = "streamlit_app\image_from_user\Test40.jpg" # Url for test image
+    res, solved_board = image_solver(url, classifier)
+    cv2.imshow("Result", cv2.resize(res, (700,700), cv2.INTER_AREA))
+    cv2.waitKey(0)

model/evaluation.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from PIL import Image, ImageFont, ImageDraw
+from torch.utils.data import Dataset, DataLoader
+import torch.nn as nn
+import torchvision.transforms as transforms
+import numpy as np
+import matplotlib.pyplot as plt
+from train_classifier import model, device, train_loader, val_loader, train_acc, train_loss, val_loss, val_acc
+# Just visualize model results
+def visualize_sample():
+    """
+    Visualize sample in dataloader
+    """
+    train_features, train_labels = next(iter(train_loader))
+    print(f"Feature batch shape: {train_features.size()}")
+    print(f"Labels batch shape: {train_labels.size()}")
+    img = train_features[0].squeeze()
+    label = train_labels[0]
+    plt.imshow(img, cmap="gray")
+    plt.show()
+    print(f"Label: {label}")
+def plot_metrics(train_loss, train_acc, val_loss, val_acc):
+    fig, ag = plt.subplots(1,2,figsize = (15,6))
+    ag[0].plot(train_loss,label = 'train')
+    ag[0].plot(val_loss,label = 'val')
+    ag[0].legend()
+    ag[0].set_title('Loss versus epochs')
+    ag[1].plot(train_acc,label='train')
+    ag[1].plot(val_acc,label='test')
+    ag[1].legend()
+    ag[1].set_title('Accuracy versus epochs')
+    plt.show()
+def predict_batch(model, data_loader):
+    """
+    Get prediction on one random batch
+    """
+    batch_id = np.random.randint(0, len(data_loader))
+    for index, batch in enumerate(data_loader):
+        if index == batch_id:
+            inputs, labels = batch[0], batch[1]
+    model = model.to(device)
+    inputs = inputs.to(device)
+    outputs = model(inputs)
+    preds = outputs.argmax(dim=1)
+    preds=preds.cpu().numpy()
+    labels=labels.numpy()
+    return inputs, preds, labels
+if __name__ == "__main__":
+    visualize_sample()
+    plot_metrics(train_loss, train_acc, val_loss, val_acc)
+    inputs, preds, labels = predict_batch(model, val_loader)
+    print(preds)
+    print(labels)
+    print("Accuracy on random batch: {}/{}".format(np.sum(preds==labels), len(preds)))

model/get_model.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import torch
+from torchvision.models import resnet18, resnet101, resnet50
+import torchvision
+import torch.nn as nn
+def get_model(model_name, pretrained=True):
+    if model_name == "resnet18":
+        net = torchvision.models.resnet18(pretrained=pretrained)
+        # Replace 1st layer to use it on grayscale images
+        net.conv1 = nn.Conv2d(
+            1,
+            64,
+            kernel_size=(7, 7),
+            stride=(2, 2),
+            padding=(3, 3),
+            bias=False,
+        )
+        net.fc = nn.Linear(in_features=2048, out_features=10, bias=True)
+    if model_name == "resnet50":
+        net = torchvision.models.resnet50(pretrained=pretrained)
+        # Replace 1st layer to use it on grayscale images
+        net.conv1 = nn.Conv2d(
+            1,
+            64,
+            kernel_size=(7, 7),
+            stride=(2, 2),
+            padding=(3, 3),
+            bias=False,
+        )
+        net.fc = nn.Linear(in_features=2048, out_features=10, bias=True)
+    if model_name == "resnet101":
+        net = torchvision.models.resnet101(pretrained=pretrained)
+        # Replace 1st layer to use it on grayscale images
+        net.conv1 = nn.Conv2d(
+            1,
+            64,
+            kernel_size=(7, 7),
+            stride=(2, 2),
+            padding=(3, 3),
+            bias=False,
+        )
+        net.fc = nn.Linear(in_features=2048, out_features=10, bias=True)
+    return net

model/train_classifier.py ADDED Viewed

	@@ -0,0 +1,110 @@

+from PIL import Image, ImageFont, ImageDraw
+import random
+import glob
+from torch.utils.data import Dataset, DataLoader
+import torch.nn as nn
+import torchvision.transforms as transforms
+import numpy as np
+import torch.optim as optim
+import torch
+import time
+import copy
+from create_dataset.digital_mnist_digits import PrintedMNIST
+from get_model import get_model
+# Define parameters
+batch_size = 64
+net = get_model("resnet50")
+n_epochs = 10
+device = "cuda" if torch.cuda.is_available() == True else "cpu"
+#Define optimizer
+learning_rate = 1e-3
+criterion = nn.CrossEntropyLoss()
+optimizer = optim.Adam(net.parameters(), lr=learning_rate)
+def load_dataset(batch_size):
+    """
+    Load dataset using Pytorch Dataloader
+    """
+    train_transform = transforms.Compose([
+        transforms.RandomRotation(10),
+        transforms.ToTensor(),
+        # AddGaussianNoise(0, 1.0),
+        # AddSPNoise(0.1),
+    ])
+    val_transforms = transforms.Compose([transforms.ToTensor()])
+    train_set = PrintedMNIST(50000, 42, train_transform)
+    val_set = PrintedMNIST(5000, 33, val_transforms)
+    train_loader = DataLoader(train_set, batch_size=batch_size)
+    val_loader = DataLoader(val_set, batch_size=batch_size)
+    return train_loader, val_loader
+def train(model, train_loader, val_loader, criterion, optimizer, num_epochs):
+    # since = time.time()
+    best_model_wts = copy.deepcopy(model.state_dict())
+    best_acc = 0.0
+    train_loss_track = []
+    train_acc_track = []
+    val_loss_track = []
+    val_acc_track = []
+    for epoch in range(num_epochs):
+        print(f'Epoch {epoch + 1}/{num_epochs}')
+        print('-' * 10)
+        # Training loop
+        train_loss, train_correct = 0, 0
+        model.train()
+        for batch in train_loader:
+            images, labels = batch[0].to(device), batch[1].to(device)  # load the batch to the available device (cpu/gpu)
+            outputs = model(images)
+            loss = criterion(outputs, labels)
+            preds = outputs.argmax(dim=1).cpu().numpy()
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            np_labels_train = labels.cpu().numpy()
+            train_loss += loss.item() * batch_size
+            train_correct += np.sum(preds == np_labels_train)
+        train_loss_avg = train_loss/len(train_loader.sampler)
+        train_acc_avg = train_correct/len(train_loader.sampler)
+        print('Train Loss: ',train_loss_avg)
+        print('Train Accuracy: ', train_acc_avg)
+        train_loss_track.append(train_loss_avg)
+        train_acc_track.append(train_acc_avg)
+        #Validation loop
+        model.eval()
+        with torch.no_grad():
+            valid_loss, valid_correct = 0, 0
+            for batch in val_loader:
+                images, labels = batch[0].to(device), batch[1].to(device)  # load the batch to the available device
+                outputs = model(images)
+                loss = criterion(outputs, labels)
+                preds = outputs.argmax(dim=1).cpu().numpy()
+                np_label_val = labels.cpu().numpy()
+                valid_loss += loss.item() * batch_size
+                valid_correct += np.sum(preds == np_label_val)
+            if valid_correct > best_acc:
+                best_acc = valid_correct
+                best_model_wts = copy.deepcopy(model.state_dict())
+            valid_loss_avg = valid_loss/len(val_loader.sampler)
+            valid_acc_avg = valid_correct/len(val_loader.sampler)
+            print('Validation Loss: ', valid_loss_avg)
+            print('Validation Accuracy: ',valid_acc_avg)
+            val_loss_track.append(valid_loss_avg)
+            val_acc_track.append(valid_acc_avg)
+    # Return model with best metrics
+    model.load_state_dict(best_model_wts)
+    return model, train_loss_track, train_acc_track, val_loss_track, val_acc_track
+if __name__ == "__main__":
+    train_loader, val_loader = load_dataset(batch_size)
+    model, train_loss, train_acc, val_loss, val_acc = train(net, train_loader, val_loader, criterion, optimizer, n_epochs)

processing.py ADDED Viewed

	@@ -0,0 +1,241 @@

+import cv2
+import numpy as np
+from threshold import preprocess
+from utils import find_corners, draw_circle_at_corners, grid_line_helper, draw_line
+from utils import clean_square_helper, classify_one_digit
+#----------------Process pipe line------------------------------#
+# 1) Threshold Adaptive to get gray-scale image to find contours
+# 2) Find contours from original image
+# 3) Image alignment (warp image) on original image
+# 4) Get horizontal, vertical line and create grid mask
+# 5) Extract numbers and split gray-scale image into 81 squares
+# 6) Clean noise pixels of each square
+# 7) Recognize digits
+# 8) Solve sudoku
+# 9) Draw solved board on warped image
+# 10) Unwarped image --> Result
+def find_contours(img, original):
+    """
+    contours: A tuple of all point creating contour lines, each contour is a np array of points (x,y).
+    hierachy: [Next, Previous, First_Child, Parent]
+    contour approximation: https://pyimagesearch.com/2021/10/06/opencv-contour-approximation/
+    """
+    # find contours on threshold image
+    contours, hierachy =  cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    #sort the largest contour to find the puzzle
+    contours = sorted(contours, key = cv2.contourArea, reverse = True)
+    polygon = None
+    # find the largest rectangle-shape contour to make sure this is the puzzle
+    for con in contours:
+        area = cv2.contourArea(con)
+        perimeter = cv2.arcLength(con, closed = True)
+        approx = cv2.approxPolyDP(con, epsilon=0.01 * perimeter, closed =  True)
+        num_of_ptr = len(approx)
+        if num_of_ptr == 4 and area > 1000:
+            polygon = con   #finded puzzle
+            break
+    if polygon is not None:
+        # find corner
+        top_left = find_corners(polygon, limit_func= min, compare_func= np.add)
+        top_right = find_corners(polygon, limit_func= max, compare_func= np.subtract)
+        bot_left = find_corners(polygon,limit_func=min, compare_func= np.subtract)
+        bot_right = find_corners(polygon,limit_func=max, compare_func=np.add)
+        #Check polygon is square, if not return []
+        #Set threshold rate for width and height to determine square bounding box
+        if not (0.5 < ((top_right[0]-top_left[0]) / (bot_right[1]-top_right[1]))<1.5):
+            print("Exception 1 : Get another image to get square-shape puzzle")
+            return [],[],[]
+        if bot_right[1] - top_right[1] == 0:
+            print("Exception 2 : Get another image to get square-shape puzzle")
+            return [],[],[]
+        corner_list = [top_left, top_right, bot_right, bot_left]
+        draw_original = original.copy()
+        cv2.drawContours(draw_original, [polygon], 0, (0,255,0), 3)
+        #draw circle at each corner point
+        for x in corner_list:
+            draw_circle_at_corners(draw_original, x)
+        return draw_original, corner_list, original
+        # draw_original: Img which drown contour and corner
+        # corner_list: list of 4 corner points
+        # original: Original imgs
+    print("Can not detect puzzle")
+    return [],[],[]
+def warp_image(corner_list, original):
+    """
+    Input: 4 corner points and threshold grayscale image
+    Output: Perspective transformation matrix and transformed image
+    Perspective transformation: https://theailearner.com/tag/cv2-warpperspective/
+    """
+    try:
+        corners = np.array(corner_list, dtype= "float32")
+        top_left, top_right, bot_left, bot_right = corners[0], corners[1], corners[2], corners[3]
+        #Get the largest side to be the side of squared transfromed puzzle
+        side = int(max([
+            np.linalg.norm(top_right - bot_right),
+            np.linalg.norm(top_left - bot_left),
+            np.linalg.norm(bot_right - bot_left),
+            np.linalg.norm(top_left - top_right)
+        ]))
+        out_ptr = np.array([[0,0],[side-1,0],[side-1,side-1], [0,side-1]],dtype="float32")
+        transfrom_matrix = cv2.getPerspectiveTransform(corners, out_ptr)
+        transformed_image = cv2.warpPerspective(original, transfrom_matrix, (side, side))
+        return transformed_image, transfrom_matrix
+    except IndexError:
+        print("Can not detect corners")
+    except:
+        print("Something went wrong. Try another image")
+def get_grid_line(img, length = 10):
+    """
+    Get horizontal and vertical lines from warped image
+    """
+    horizontal = grid_line_helper(img, shape_location= 1)
+    vertical = grid_line_helper(img, shape_location=0)
+    return vertical, horizontal
+def create_grid_mask(horizontal, vertical):
+    """
+    Completely detect all lines by using Hough Transformation
+    Create grid mask to extract number by using bitwise_and with warped images
+    """
+    # combine two line to make a grid
+    grid = cv2.add(horizontal, vertical)
+    # Apply threshold to cover more area
+    # grid = cv2.adaptiveThreshold(grid, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 235, 2)
+    morpho_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
+    grid = cv2.dilate(grid, morpho_kernel, iterations=2)
+    # find the line by Houghline transfromation
+    lines = cv2.HoughLines(grid, 0.3, np.pi/90, 200)
+    lines_img = draw_line(grid, lines)
+    # Extract all the lines
+    mask = cv2.bitwise_not(lines_img)
+    return mask
+def split_squares(number_img):
+    """
+    Split number img into 81 squares.
+    """
+    square_list = []
+    side = number_img.shape[0] // 9
+    #find each square and append to square_list
+    for j in range(0,9):
+        for i in range(0,9):
+            top_left_square = (i * side, j * side)
+            bot_right_square = ((i+1) * side, (j+1) * side)
+            square_list.append(number_img[top_left_square[1]:bot_right_square[1], top_left_square[0]: bot_right_square[0]])
+    return square_list
+def clean_square(square_list):
+    """
+    Return cleaned-square list and number of digits available in the image
+    Clean-square list has both 0 and images
+    """
+    cleaned_squares = []
+    count = 0
+    for sq in square_list:
+        new_img, is_num = clean_square_helper(sq)
+        if is_num:
+            cleaned_squares.append(new_img)
+            count += 1
+        else:
+            cleaned_squares.append(0)
+    return cleaned_squares, count
+def clean_square_all_images(square_list):
+    """
+    Return cleaned-square list
+    Clean-square list has all images(images with no number with be black image after cleaning)
+    """
+    square_cleaned_list = []
+    for i in square_list:
+        clean_square, _ = clean_square_helper(i)
+        square_cleaned_list.append(clean_square)
+    return square_cleaned_list
+def recognize_digits(model, resized, org_img):
+    res_str = ""
+    for img in resized:
+        digit = classify_one_digit(model, img, org_img)
+        res_str += str(digit)
+    return res_str
+def draw_digits_on_warped(warped_img, solved_board, unsolved_board):
+    """
+    Function to draw digits from solved board to warped img
+    """
+    width = warped_img.shape[0] // 9
+    img_w_text = np.zeros_like(warped_img)
+    for j in range(9):
+        for i in range(9):
+            if unsolved_board[j][i] == 0: # Only draw new number to blank cell in warped image, avoid overlapping
+                p1 = (i * width, j * width)  # Top left corner of a bounding box
+                p2 = ((i + 1) * width, (j + 1) * width)  # Bottom right corner of bounding box
+                # Find the center of square to draw digit
+                center = ((p1[0] + p2[0]) // 2, (p1[1] + p2[1]) // 2)
+                text_size, _ = cv2.getTextSize(str(solved_board[j][i]), cv2.FONT_HERSHEY_SIMPLEX, 1, 6)
+                text_origin = (center[0] - text_size[0] // 2, center[1] + text_size[1] // 2)
+                cv2.putText(warped_img, str(solved_board[j][i]),
+                            text_origin, cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 6)
+    return img_w_text, warped_img
+def unwarp_image(img_src, img_dest, pts, time):
+    pts = np.array(pts)
+    height, width = img_src.shape[0], img_src.shape[1]
+    pts_source = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1], [0, width - 1]],
+                          dtype='float32')
+    matrix, status = cv2.findHomography(pts_source, pts)
+    # Covert to original view perspective
+    warped = cv2.warpPerspective(img_src, matrix, (img_dest.shape[1], img_dest.shape[0]))
+    # Draw a black rectangle in img_dest
+    cv2.fillConvexPoly(img_dest, pts, 0, 16)
+    dst_img = cv2.add(img_dest, warped)
+    dst_img_height, dst_img_width = dst_img.shape[0], dst_img.shape[1]
+    cv2.putText(dst_img, "Time solved: {} s".format(str(np.round(time,4))), (dst_img_width - 360, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
+    return dst_img

realtime_solver.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import numpy as np
+import torch
+from utils import *
+from processing import *
+from threshold import preprocess
+import time
+import cv2
+from sudoku_solve import Sudoku_solver
+from PIL import Image
+from helper_number_page import valid_board
+classifier = torch.load('digit_classifier.h5',map_location ='cpu')
+classifier.eval()
+frameWidth = 960
+frameHeight = 720
+cap = cv2.VideoCapture(0)
+frame_rate = 60
+# width is id number 3, height is id 4
+cap.set(3, frameWidth)
+cap.set(4, frameHeight)
+# change brightness
+cap.set(10, 150)
+prev = 0
+while cap.isOpened():
+    time_elapsed = time.time() - prev
+    success, img = cap.read()
+    if time_elapsed > 1. / frame_rate:
+        prev = time.time()
+        final_img = img.copy()
+        to_process_img = img.copy()
+        #Processing
+        thresholded_img = preprocess(to_process_img) # Gray-scale img
+        corners_img, corners_list, org_img = find_contours(thresholded_img, to_process_img)
+        if corners_list:
+            # Warped original img
+            warped, matrix = warp_image(corners_list, corners_img)
+            # Threshold warped img
+            warped_processed = preprocess(warped) # warped_processed is gray-scaled img
+            #Get lines
+            horizontal = grid_line_helper(warped_processed, shape_location=0)
+            vertical = grid_line_helper(warped_processed, shape_location=1)
+            # Create mask
+            grid_mask = create_grid_mask(horizontal, vertical)
+            # Resize will get better result ??
+            grid_mask = cv2.resize(grid_mask,(600,600), cv2.INTER_AREA)
+            # Extract number
+            number_img = cv2.bitwise_and(cv2.resize(warped_processed, (600,600), cv2.INTER_AREA), grid_mask)
+            # number_img = cv2.bitwise_and(warped_processed, grid_mask)
+            # Split into squares
+            squares = split_squares(number_img)
+            cleaned_squares = clean_square_all_images(squares)
+            # Resize and scale pixel
+            resized_list = resize_square(cleaned_squares)
+            norm_resized = normalize(resized_list)
+            # # Recognize digits
+            rec_str = recognize_digits(classifier, norm_resized, org_img)
+            board = convert_str_to_board(rec_str)
+            # Solve
+            unsolved_board = board.copy()
+            sudoku = Sudoku_solver(board, 9)
+            start_time = time.time()
+            sudoku.solve()
+            solved_board = sudoku.board
+            # Unwarp
+            _, warp_with_nums = draw_digits_on_warped(warped, solved_board, unsolved_board)
+            final_img = unwarp_image(warp_with_nums, corners_img, corners_list, time.time() - start_time)
+            cv2.imshow("Result", final_img)
+            if valid_board(solved_board):
+                cv2.waitKey(1000)
+        else:
+            cv2.imshow("Result", final_img)
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+cv2.destroyAllWindows()
+cap.release()

sudoku_solve.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import numpy as np
+class Sudoku_solver():
+    """
+    Solve Sudoku using Backtracking algorithm
+    """
+    def __init__(self, board, size):
+        self.board = board
+        self.size = size
+    def print_board(self):
+        """
+        Visualize result board
+        """
+        for i in range(len(self.board)):
+            if i % 3 == 0 and i != 0:
+                print("- - - - - - - - - - - - - ")
+        for j in range(len(self.board[0])):
+            if j % 3 == 0 and j != 0:
+                print(" | ", end="")
+            if j == 8:
+                print(self.board[i][j])
+            else:
+                print(str(self.board[i][j]) + " ", end="")
+    def valid(self, num, pos):
+        """
+        Check valid board when adding new num in position pos
+        """
+        # Check valid row
+        for j in range(len(self.board[0])):
+            if self.board[pos[0]][j] == num and pos[1] != j:
+                return False
+        # Check valid column
+        for i in range(len(self.board)):
+            if self.board[i][pos[1]] == num and pos[0] != i:
+                return False
+        # Check valid box
+        # There are 9 boxes
+        box_x = pos[0] // 3
+        box_y = pos[1] // 3
+        for i in range(box_x*3, box_x*3+3):
+            for j in range(box_y*3, box_y*3+3):
+                if self.board[i][j] == num and (i, j) != pos:
+                    return False
+        return True
+    def find_empty_cell(self):
+        """
+        Find empty cell and return its position
+        """
+        for i in range(len(self.board)):
+            for j in range(len(self.board[0])):
+                if self.board[i][j] == 0:
+                    return (i, j)
+        return None
+    def solve(self):
+        pos = self.find_empty_cell()
+        # Base case, complete the board
+        if not pos:
+            return True
+        else:
+            row, col = pos
+        for i in range(1, 10):
+            if self.valid(i, (row, col)):
+                self.board[row][col] = i
+                if self.solve():
+                    return True
+                self.board[row][col] = 0
+        return False

threshold.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import cv2
+import numpy as np
+def preprocess(img):
+    """
+    Input: Original image
+    Output: Gray-scale processed image
+    """
+    # convert RGB to gray-scale
+    if (np.array(img).shape[2] != 1):
+        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    #Gassian blur
+    blured = cv2.GaussianBlur(gray_img, (9,9), 0)
+    #set a threshold
+    thresh = cv2.adaptiveThreshold(blured, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
+    #invert so that the grid line and text are line, the rest is black
+    inverted = cv2.bitwise_not(thresh, 0)
+    morphy_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
+    # Opening morphology to remove noise (while dot etc...)
+    morph = cv2.morphologyEx(inverted, cv2.MORPH_OPEN, morphy_kernel)
+    # dilate to increase border size
+    result = cv2.dilate(morph, morphy_kernel, iterations=1)
+    return result
+if __name__ == "__main__":
+    img = "testimg\sudoku_real_4.jpeg"
+    img = cv2.imread(img)
+    processed = preprocess(img)
+    cv2.imshow("img", cv2.resize(img, (600,600), cv2.INTER_AREA))
+    cv2.waitKey(0)

utils.py ADDED Viewed

	@@ -0,0 +1,192 @@

+import cv2
+import numpy as np
+import operator
+import torch
+def find_corners(polygon, limit_func, compare_func):
+    """
+    Input: Rectangle puzzle extract from contours
+    Output: One of four cornet point depend on limit_func, compare_func
+    # limit_fn is the min or max function
+    # compare_fn is the np.add or np.subtract function
+    Note: (0,0) point is at the top-left
+    top-left: (x+y) min
+    top-right: (x-y) max
+    bot-left: (x-y) min
+    bot-right: (x+y) max
+    """
+    index, _ = limit_func(enumerate([compare_func(ptr[0][0], ptr[0][1]) for ptr in polygon]), key = operator.itemgetter(1))
+    return polygon[index][0][0], polygon[index][0][1]
+def draw_circle_at_corners(original, ptr):
+    """
+    Helper function to draw circle at corners
+    """
+    cv2.circle(original, ptr, 5, (0,255,0), cv2.FILLED)
+def grid_line_helper(img, shape_location, length = 10):
+    """
+    Helper function to fine vertical, horizontal line
+    Find horizontal line: shape_location = 0
+    Find vertical line: shape_location = 1
+    """
+    clone_img = img.copy()
+    row_or_col = clone_img.shape[shape_location]
+    # Find the distance between lines
+    size = row_or_col // length
+    # Morphological transfromation to find line
+    # Define morphology kernel
+    if shape_location == 0:
+        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (size,1))
+    else:
+        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,size))
+    clone_img = cv2.erode(clone_img, kernel)
+    clone_img = cv2.dilate(clone_img, kernel)
+    return clone_img
+def draw_line(img, lines):
+    """
+    Draw all lines in lines got from cv2.HoughLine()
+    """
+    clone_img = img.copy()
+    # lines list from cv2.HoughLine() is 3d array
+    # Convert to 2d array
+    lines = np.squeeze(lines)
+    for rho, theta in lines:
+        a = np.cos(theta)
+        b = np.sin(theta)
+        x0 = a*rho
+        y0 = b*rho
+        x1 = int(x0 + 1000 * (-b))
+        y1 = int(y0 + 1000 * a)
+        x2 = int(x0 - 1000 * (-b))
+        y2 = int(y0 - 1000 * a)
+        #Draw line every loop
+        cv2.line(clone_img, (x1,y1), (x2,y2), (255,255,255), 4)
+    return clone_img
+def clean_square_helper(img):
+    """
+    Clean noises in every square splited
+    Input: One of 81 squares
+    Output: Cleaned square and boolean var which so that there is number in it
+    """
+    if np.isclose(img, 0).sum() / (img.shape[0] * img.shape[1]) >= 0.96:
+        return np.zeros_like(img), False
+    # if there is very little white in the region around the center, this means we got an edge accidently
+    height, width = img.shape
+    mid = width // 2
+    if np.isclose(img[:, int(mid - width * 0.38):int(mid + width * 0.38)], 0).sum() / (2 * width * 0.38 * height) >= 0.98:
+        return np.zeros_like(img), False
+    # center image
+    contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    contours = sorted(contours, key=cv2.contourArea, reverse=True)
+    x, y, w, h = cv2.boundingRect(contours[0])
+    start_x = (width - w) // 2
+    start_y = (height - h) // 2
+    new_img = np.zeros_like(img)
+    new_img[start_y:start_y + h, start_x:start_x + w] = img[y:y + h, x:x + w]
+    return new_img, True
+def resize_square(clean_square_list):
+    """
+    Resize clean squares into 28x28 in order to feed to classifier
+    """
+    resized_list = []
+    for img in clean_square_list:
+        resized = cv2.resize(img, (28,28), interpolation=cv2.INTER_AREA)
+        resized_list.append(resized)
+    return resized_list
+def resize_square32(clean_square_list):
+    """
+    Resize clean squares into 32x32 in order to feed to tf classifier
+    """
+    resized_list = []
+    for img in clean_square_list:
+        resized = cv2.resize(img, (32,32), interpolation=cv2.INTER_AREA)
+        resized_list.append(resized)
+    return resized_list
+def classify_one_digit(model, resize_square, org_image):
+    """
+    Determine whether each square has number by counting number of (not black) pixel and compare to threshold value
+    Using classifier to predict number in square
+    - Return 0 if the square is blank
+    - Return predict digit if the square has number
+    """
+    threshold = 0
+    if (org_image.shape[0] > 600 or org_image.shape[1] > 600) or (org_image.shape[1] > 600 or org_image.shape[2] > 600):
+        threshold = 40
+    else:
+        threshold = 60
+    # Determine blank square
+    if (resize_square != resize_square.min()).sum() < threshold:
+        return str(0)
+    model.eval()
+    # Convert to shape (1,1,28,28) to be compatible with dataloader for evaluation
+    iin = torch.Tensor(resize_square).unsqueeze(0).unsqueeze(0)
+    with torch.no_grad():
+        out = model(iin)
+        # Get index of predict digit
+        _, index = torch.max(out, 1)
+    pred_digit = index.item()
+    return str(pred_digit)
+def normalize(resized_list):
+    """
+    Scale pixel value for recognition
+    """
+    return [img/255 for img in resized_list]
+def convert_str_to_board(string, step = 9):
+    """
+    Convert recognized string into 2D array for sudoku solving
+    """
+    board = []
+    for i in range(0, len(string), step):
+        board.append([int(char) for char in string[i:i+step]])
+    return np.array(board)