Spaces:

Ujjwal123
/

DigitizeGrid

Runtime error

App Files Files Community

DigitizeGrid / extractpuzzle.py

Ujjwal123

ClueExtractionStatus changed to clueExtractionStatus

d42572c about 2 years ago

raw

history blame contribute delete

31.7 kB

	import cv2
	import numpy as np
	import math
	from sklearn.linear_model import LinearRegression
	import pytesseract
	import re


	pytesseract.pytesseract.tesseract_cmd = "C:/Program Files/Tesseract-OCR/tesseract.exe"
	pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"

	def first_preprocessing(image):
	gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
	canny = cv2.Canny(gray,75,25)
	contours,hierarchies = cv2.findContours(canny,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
	sorted_contours = sorted(contours,key = cv2.contourArea,reverse = True)
	largest_contour = sorted_contours[0]
	box = cv2.boundingRect(sorted_contours[0])
	x = box[0]
	y = box[1]
	w = box[2]
	h = box[3]
	result = cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), -1)
	return result

	def remove_head(image):
	custom_config = r'--oem 3 --psm 6' # Tesseract OCR configuration
	detected_text = pytesseract.image_to_string(image, config=custom_config)
	lines = detected_text.split('\n')

	# Find the first line containing some text
	line_index = 0
	for i, line in enumerate(lines):
	if line.strip() != '':
	line_index = i
	break
	first_newline_idx = detected_text.find('\n')
	result = cv2.rectangle(image, (0, line_index), (image.shape[1], first_newline_idx), (255,255,255), thickness=cv2.FILLED)
	return result

	def second_preprocessing(image):
	gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
	canny = cv2.Canny(gray,75,25)
	contours,hierarchies = cv2.findContours(canny,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
	sorted_contours = sorted(contours,key = cv2.contourArea,reverse = True)
	largest_contour = sorted_contours[0]
	box2 = cv2.boundingRect(sorted_contours[0])
	x = box2[0]
	y = box2[1]
	w = box2[2]
	h = box2[3]
	result2 = cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), -1)
	return result2

	def find_vertical_profile(image):
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV \| cv2.THRESH_OTSU)
	vertical_profile = np.sum(binary, axis=0)
	return vertical_profile

	def detect_steepest_changes(projection_profile, threshold=0.4, start_idx=0, min_valley_width=10, min_search_width=50):
	differences = np.diff(projection_profile)
	change_points = np.where(np.abs(differences) > threshold * np.max(np.abs(differences)))[0]
	left_boundaries = []
	right_boundaries = []

	for idx in change_points:
	if idx <= start_idx:
	continue

	if idx - start_idx >= min_search_width:
	decreasing_profile = projection_profile[idx:]
	if np.any(decreasing_profile > 0):
	right_boundary = idx + np.argmin(decreasing_profile)
	right_boundaries.append(right_boundary)
	else:
	continue
	valley_start = max(start_idx, idx - min_valley_width)
	valley_start = valley_start-40
	valley_end = min(idx + min_valley_width, len(projection_profile) - 1)
	valley = valley_start + np.argmin(projection_profile[valley_start:valley_end])
	left_boundaries.append(valley)

	break

	return left_boundaries, right_boundaries

	def crop_text_columns(image, projection_profile, threshold=0.4):
	start_idx = 0
	text_columns = []

	while True:
	left_boundaries, right_boundaries = detect_steepest_changes(projection_profile, threshold, start_idx)
	if not left_boundaries or not right_boundaries:
	break
	left = left_boundaries[0]
	right = right_boundaries[0]
	text_column = image[:, left:right]
	text_columns.append(text_column)

	start_idx = right

	return text_columns


	def parse_clues(clue_text):
	lines = clue_text.split('\n')
	clues = {}
	number = None
	column = 0
	for line in lines:
	if "column separation" in line:
	column += 1
	continue
	pattern = r"^(\d+(?:\.\d+)?)\s*(.+)" # Updated pattern to handle decimal point numbers for clues
	match = re.search(pattern, line)
	if match:
	number = float(match.group(1)) # Convert the matched number to float if there is a decimal point
	if number not in clues:
	clues[number] = [column,match.group(2).strip()]
	else:
	continue
	elif number is None:
	continue
	elif clues[number][0] != column:
	continue
	else:
	clues[number][1] += " " + line.strip() # Append to the previous clue if it's a multiline clue

	return clues

	def parse_crossword_clues(text):
	# Check if "Down" clues are present
	match = re.search(r'[dD][oO][wW][nN]\n', text)
	if match:
	across_clues, down_clues = re.split(r'[dD][oO][wW][nN]\n', text)
	else:
	# If "Down" clues are not present, set down_clues to an empty string
	across_clues, down_clues = text, ""

	across = parse_clues(across_clues)
	down = parse_clues(down_clues)

	return across, down


	def classify_text(filtered_columns):
	text = ""
	custom_config = r'--oem 3 --psm 6'
	for i, column in enumerate(filtered_columns):
	column2 = cv2.cvtColor(column, cv2.COLOR_BGR2RGB)
	scale_factor = 2.0 # You can adjust this value

	# Calculate the new dimensions after scaling
	new_width = int(column2.shape[1] * scale_factor)
	new_height = int(column2.shape[0] * scale_factor)

	# Resize the image using OpenCV
	scaled_image = cv2.resize(column2, (new_width, new_height), interpolation=cv2.INTER_LINEAR)

	# Apply image enhancement techniques
	denoised_image = cv2.fastNlMeansDenoising(scaled_image, None, h=10, templateWindowSize=7, searchWindowSize=21)
	enhanced_image = cv2.cvtColor(denoised_image, cv2.COLOR_BGR2GRAY) # Convert to grayscale # Apply histogram equalization
	detected_text = pytesseract.image_to_string(enhanced_image, config=custom_config)
	# print(detected_text)
	text+=detected_text
	across_clues, down_clues = parse_crossword_clues(text)
	return across_clues,down_clues

	def get_text(image):
	image = cv2.cvtColor(image,cv2.COLOR_GRAY2BGR)
	result = first_preprocessing(image)
	result1 = remove_head(result)
	result2 = second_preprocessing(result1)
	vertical_profile = find_vertical_profile(result2)
	combined_columns = crop_text_columns(result2,vertical_profile)
	across,down = classify_text(combined_columns)
	return across,down


	################################ Grid Extraction begins here ###########################
	########################################################################################


	# for applying non max suppression of the contours
	def calculate_iou(image, contour1, contour2):
	# Create masks for each contour
	mask1 = np.zeros_like(image, dtype=np.uint8)
	cv2.drawContours(mask1, [contour1], -1, 255, thickness=cv2.FILLED)

	mask2 = np.zeros_like(image, dtype=np.uint8)
	cv2.drawContours(mask2, [contour2], -1, 255, thickness=cv2.FILLED)

	# Find the intersection between the two masks
	intersection = cv2.bitwise_and(mask1, mask2)

	# Calculate the intersection area
	intersection_area = cv2.countNonZero(intersection)

	# Calculate the union area (Not the accurate one but works alright XD !)
	union_area = cv2.contourArea(cv2.convexHull(np.concatenate((contour1, contour2))))

	# Calculate the IoU
	iou = intersection_area / union_area
	return iou

	# remove overlapping contours, non square and not quardatic contours
	# this check every contour with every other contour so be careful
	def filter_contours(img_gray2, contours, iou_threshold = 0.6, asp_ratio = 1,tolerance = 0.5):
	# Remove overlapping contours, removing that are not square
	filtered_contours = []
	epsilon = 0.02
	for contour in contours:

	# Approximate the contour to reduce the number of points
	epsilon_multiplier = epsilon * cv2.arcLength(contour, True)
	approximated_contour = cv2.approxPolyDP(contour, epsilon_multiplier, True)

	# find the aspect ratio of the contour, if it is close to 1 then keep it otherwise discard
	_,_,w,h = cv2.boundingRect(approximated_contour)
	if(abs(float(w)/h - asp_ratio) > tolerance ): continue

	# Calculate the IoU with all existing contours
	iou_values = [calculate_iou(img_gray2,np.array(approximated_contour), np.array(existing_contour)) for existing_contour in filtered_contours]

	# If the IoU value with all existing contours is below the threshold, add the current contour
	if not any(iou_value > iou_threshold for iou_value in iou_values):
	filtered_contours.append(approximated_contour)

	return filtered_contours

	# https://stackoverflow.com/questions/383480/intersection-of-two-lines-defined-in-rho-theta-parameterization/383527#383527
	# Define the parametricIntersect function
	def parametricIntersect(r1, t1, r2, t2):
	ct1 = np.cos(t1)
	st1 = np.sin(t1)
	ct2 = np.cos(t2)
	st2 = np.sin(t2)
	d = ct1 * st2 - st1 * ct2
	if d != 0.0:
	x = int((st2 * r1 - st1 * r2) / d)
	y = int((-ct2 * r1 + ct1 * r2) / d)
	return x, y
	else:
	return None

	# Group the coordinate to a list such that each point in a list may belong to a line
	def group_lines(coordinates,axis=0,threshold=10):
	sorted_coordinates = list(sorted(coordinates,key=lambda x: x[axis]))
	groups = []
	current_group = []

	for i in range(len(sorted_coordinates)):
	if i!=0 and abs(current_group[0][axis] - sorted_coordinates[i][axis]) > threshold: # condition to change the group
	if len(current_group) > 4:
	groups.append(current_group)
	current_group = []
	current_group.append(sorted_coordinates[i]) # condition to append to the group
	if(len(current_group) > 4):
	groups.append(current_group)
	return groups

	# Use the Grouped Lines to Fit a line using Linear Regression
	def fit_lines(grouped_lines,is_horizontal = False):
	actual_lines = []
	for coordinates in grouped_lines:
	# Converting into numpy array
	coordinates_arr = np.array(coordinates)
	# Separate the x and y coordinates
	x = coordinates_arr[:, 0]
	y = coordinates_arr[:, 1]
	# Fit a linear regression model
	regressor = LinearRegression()
	regressor.fit(y.reshape(-1, 1), x)
	# Get the slope and intercept of the fitted line
	slope = regressor.coef_[0]
	intercept = regressor.intercept_

	if(is_horizontal):
	intercept = np.mean(y)
	actual_lines.append((slope,intercept))

	return actual_lines

	# Calculates difference between two consecutive elements in an array
	def average_distance(arr):
	n = len(arr)
	distance_sum = 0

	for i in range(n - 1):
	distance_sum += abs(arr[i+1] - arr[i])

	average = distance_sum / (n - 1)
	return average

	# If two adjacent lines are near than some threshold, then merge them
	# Returns Results in y = mx + b from
	def average_out_similar_lines(lines_m_c,lines_coord,del_threshold,is_horizontal=False):
	averaged_lines = []
	i = 0
	while(i < len(lines_m_c) - 1):

	_, intercept1 = lines_m_c[i]
	_, intercept2 = lines_m_c[i + 1]

	if abs(intercept2 - intercept1) < del_threshold:
	new_points = np.array(lines_coord[i] + lines_coord[i+1][:-1])
	# Separate the x and y coordinates
	x = new_points[:, 0]
	y = new_points[:, 1]

	# Fit a linear regression model
	regressor = LinearRegression()
	regressor.fit(y.reshape(-1, 1), x)

	# Get the slope and intercept of the fitted line
	slope = regressor.coef_[0]
	intercept = regressor.intercept_

	if(is_horizontal):
	intercept = np.mean(y)
	averaged_lines.append((slope,intercept))
	i+=2
	else:
	averaged_lines.append(lines_m_c[i])
	i+=1
	if(i < len(lines_m_c)):
	averaged_lines.append(lines_m_c[i])

	return averaged_lines

	# If two adjacent lines are near than some threshold, then merge them
	# Returns Results in normalized vector form
	def average_out_similar_lines1(lines_m_c,lines_coord,del_threshold):
	averaged_lines = []
	i = 0
	while(i < len(lines_m_c) - 1):

	_, intercept1 = lines_m_c[i]
	_, intercept2 = lines_m_c[i + 1]

	if abs(intercept2 - intercept1) < del_threshold:
	new_points = np.array(lines_coord[i] + lines_coord[i+1][:-1])
	coordinates = np.array(new_points)
	points = coordinates[:, None, :].astype(np.int32)
	# Fit a line using linear regression
	[vx, vy, x, y] = cv2.fitLine(points, cv2.DIST_L2, 0, 0.01, 0.01)
	averaged_lines.append((vx, vy, x, y))
	i+=2
	else:
	new_points = np.array(lines_coord[i])

	coordinates = np.array(new_points)
	points = coordinates[:, None, :].astype(np.int32)
	# Fit a line using linear regression
	[vx, vy, x, y] = cv2.fitLine(points, cv2.DIST_L2, 0, 0.01, 0.01)
	averaged_lines.append((vx, vy, x, y))
	i+=1
	if(i < len(lines_m_c)):
	new_points = np.array(lines_coord[i])
	coordinates = np.array(new_points)
	points = coordinates[:, None, :].astype(np.int32)
	# Fit a line using linear regression
	[vx, vy, x, y] = cv2.fitLine(points, cv2.DIST_L2, 0, 0.01, 0.01)
	averaged_lines.append((vx, vy, x, y))

	return averaged_lines

	def get_square_color(image, box):

	# Determine the size of the square region
	square_size = (box[1][0] - box[0][0]) / 3

	# Determine the coordinates of the square region inside the box
	top_left = (box[0][0] + square_size, box[0][1] + square_size)
	bottom_right = (box[0][0] + square_size2, box[0][1] + square_size2)

	# Extract the square region from the image
	square_region = image[int(top_left[1]):int(bottom_right[1]), int(top_left[0]):int(bottom_right[0])]

	# Calculate the mean pixel value of the square region
	mean_value = np.mean(square_region)

	# Determine whether the square region is predominantly black or white
	if mean_value < 128:
	square_color = "."
	else:
	square_color = "A"

	return square_color

	# accepts image in grayscale
	def extract_grid(image):

	# Apply Gaussian blur to reduce noise and improve edge detection
	blurred = cv2.GaussianBlur(image, (3, 3), 0)
	# Apply Canny edge detection
	edges = cv2.Canny(blurred, 50, 150)

	# Apply dilation to connect nearby edges and make them more contiguous
	kernel = np.ones((5, 5), np.uint8)
	dilated = cv2.dilate(edges, kernel, iterations=1)

	# # Applying canny edge detector
	# detecting contours on the canny image
	contours, _ = cv2.findContours(dilated, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)

	# sorting the contours by the descending order area of the contour
	sorted_contours = list(sorted(contours, key=cv2.contourArea,reverse=True))
	# filtering out the top 10 largest by applying NMS and only selecting square ones (Apsect ratio 1)
	filtered_contours = filter_contours(image, sorted_contours[0:10],iou_threshold=0.6,asp_ratio=1,tolerance=0.2)

	# largest Contour Extraction
	largest_contour = []
	if(len(filtered_contours)):
	largest_contour = filtered_contours[0]
	else:
	largest_contour = sorted_contours[0]

	# --- Performing Perspective warp of the largest contour ---
	coordinates_list = []

	if(largest_contour.shape != (4,1,2)):
	largest_contour = cv2.convexHull(largest_contour)
	if(largest_contour.shape != (4,1,2)):
	rect = cv2.minAreaRect(largest_contour)
	largest_contour = cv2.boxPoints(rect)
	largest_contour = largest_contour.astype('int')

	coordinates_list = largest_contour.reshape(4, 2).tolist()

	# Convert coordinates_list to a numpy array
	coordinates_array = np.array(coordinates_list)

	# Find the convex hull of the points
	hull = cv2.convexHull(coordinates_array)

	# Find the extreme points of the convex hull
	extreme_points = np.squeeze(hull)

	# Sort the extreme points by their x and y coordinates to determine the order
	sorted_points = extreme_points[np.lexsort((extreme_points[:, 1], extreme_points[:, 0]))]

	# Extract top left, bottom right, top right, and bottom left points
	tl = sorted_points[0]
	tr = sorted_points[1]
	bl = sorted_points[2]
	br = sorted_points[3]

	if(tr[1] < tl[1]):
	tl,tr = tr,tl
	if(br[1] < bl[1]):
	bl,br = br,bl

	# Define pts1
	pts1 = [tl, bl, tr, br]

	# Calculate the bounding rectangle coordinates
	x, y, w, h = 0,0,400,400
	# Define pts2 as the corners of the bounding rectangle
	pts2 = [[3, 3], [400, 3], [3, 400], [400, 400]]

	# Calculate the perspective transformation matrix
	matrix = cv2.getPerspectiveTransform(np.float32(pts1), np.float32(pts2))

	# Apply the perspective transformation to the cropped_image
	transformed_img = cv2.warpPerspective(image, matrix, (403, 403))
	cropped_image = transformed_img.copy()

	# if the largest contour was not exactly quadilateral

	# -- Performing Hough Transform --

	similarity_threshold = math.floor(w/30) # Thresholds for filtering Similar Hough Lines

	# Applying Gaussian Blur to reduce noice and improve dege detection
	blurred = cv2.GaussianBlur(cropped_image, (5, 5), 0)
	# Perform Canny edge detection on the GrayScale Image
	edges = cv2.Canny(blurred, 50, 150)
	lines = cv2.HoughLines(edges, 1, np.pi/180, 200)

	# Filter out similar lines
	filtered_lines = []
	for line in lines:
	for r_theta in lines:
	arr = np.array(r_theta[0], dtype=np.float64)
	rho, theta = arr
	is_similar = False
	for filtered_line in filtered_lines:
	filtered_rho, filtered_theta = filtered_line
	# similarity threshold is 10
	if abs(rho - filtered_rho) < similarity_threshold and abs(theta - filtered_theta) < np.pi/180 * similarity_threshold:
	is_similar = True
	break
	if not is_similar:
	filtered_lines.append((rho, theta))

	# Filter out the horizontal and the vertical lines
	horizontal_lines = []
	vertical_lines = []
	for rho, theta in filtered_lines:
	a = np.cos(theta)
	b = np.sin(theta)
	x0 = a * rho
	y0 = b * rho
	x1 = int(x0 + 1000 * (-b))
	y1 = int(y0 + 1000 * (a))
	x2 = int(x0 - 1000 * (-b))
	y2 = int(y0 - 1000 * (a))

	slope = (y2 - y1) / (x2 - x1 + 0.0001)
	# do taninv(0.17) it is nearly equal to 10
	if( abs(slope) <= 0.18 ):
	horizontal_lines.append((rho,theta))
	elif (abs(slope) > 6):
	vertical_lines.append((rho,theta))

	# Find the intersection points of horizontal and vertical lines
	hough_corners = []
	for h_rho, h_theta in horizontal_lines:
	for v_rho, v_theta in vertical_lines:
	x, y = parametricIntersect(h_rho, h_theta, v_rho, v_theta)
	if x is not None and y is not None:
	hough_corners.append((x, y))

	# -- Performing Harris Corner Detection --

	# Create CLAHE object with specified clip limit
	clahe = cv2.createCLAHE(clipLimit=3, tileGridSize=(8, 8))
	clahe_image = clahe.apply(cropped_image)

	# harris corner detection for CLHAE IMAGE
	dst = cv2.cornerHarris(clahe_image,2,3,0.04)
	ret,dst = cv2.threshold(dst,0.1*dst.max(),255,0)
	dst = np.uint8(dst)
	dst = cv2.dilate(dst,None)
	ret, labels, stats, centroids = cv2.connectedComponentsWithStats(dst)
	criteria = (cv2.TERM_CRITERIA_EPS+cv2.TermCriteria_MAX_ITER,100,0.001)
	harris_corners = cv2.cornerSubPix(clahe_image,np.float32(centroids),(5,5),(-1,-1),criteria)

	drawn_image = cv2.cvtColor(cropped_image, cv2.COLOR_GRAY2BGR)
	for i in harris_corners:
	x,y = i
	image2 = cv2.circle(drawn_image, (int(x),int(y)), radius=0, color=(0, 0, 255), thickness=3)

	# -- Using Regression Model to approximate horizontal and vertical Lines

	# reducing to 0 decimal places
	corners1 = list(map(lambda coord: (round(coord[0], 0), round(coord[1], 0)), harris_corners))

	# adding the corners obtained from hough transform
	corners1 += hough_corners

	# removing the duplicate corners
	corners_no_dup = list(set(corners1))

	min_cell_width = w/30
	min_cell_height = h/30

	# grouping coordinates into probabale array that could fit a horizontal and vertical lien
	vertical_lines = group_lines(corners_no_dup,0,min_cell_height)
	horizontal_lines = group_lines(corners_no_dup,1,min_cell_height)

	actual_vertical_lines = fit_lines(vertical_lines)
	actual_horizontal_lines = fit_lines(horizontal_lines,is_horizontal=True)


	# Lines obtained from above method are not appropriate, we have to refine them

	x_probable = [i[1] for i in actual_horizontal_lines] # looking at the intercepts
	y_probable = [i[1] for i in actual_vertical_lines]

	del_x_avg = average_distance(x_probable)
	del_y_avg = average_distance(y_probable)

	averaged_horizontal_lines1 = [] # This step here is fishy and needs refinement
	averaged_vertical_lines1 = []
	multiplier = 0.95
	i = 0
	while(1):
	averaged_horizontal_lines = average_out_similar_lines(actual_horizontal_lines,horizontal_lines,del_y_avg*multiplier,is_horizontal=True)
	averaged_vertical_lines = average_out_similar_lines(actual_vertical_lines,vertical_lines,del_x_avg*multiplier,is_horizontal=False)
	i += 1
	if(i >= 20 or len(averaged_horizontal_lines) == len(averaged_vertical_lines)):
	break
	else:
	multiplier -= 0.05

	averaged_horizontal_lines1 = average_out_similar_lines1(actual_horizontal_lines,horizontal_lines,del_y_avg*multiplier)
	averaged_vertical_lines1 = average_out_similar_lines1(actual_vertical_lines,vertical_lines,del_x_avg*multiplier)


	# plotting the lines to image to find the intersection points
	drawn_image6 = np.ones_like(cropped_image)*255
	for vx,vy,cx,cy in averaged_horizontal_lines1 + averaged_vertical_lines1:
	w = cropped_image.shape[1]
	cv2.line(drawn_image6, (int(cx-vxw), int(cy-vyw)), (int(cx+vxw), int(cy+vyw)), (0, 0, 255),1,cv2.LINE_AA)

	# -- Finding Intersection points --

	# Applying Harris Corner Detection to find the intersection points
	mesh_image = drawn_image6.copy()
	dst = cv2.cornerHarris(mesh_image,2,3,0.04)

	ret,dst = cv2.threshold(dst,0.1*dst.max(),255,0)
	dst = np.uint8(dst)
	dst = cv2.dilate(dst,None)
	ret, labels, stats, centroids = cv2.connectedComponentsWithStats(dst)
	criteria = (cv2.TERM_CRITERIA_EPS+cv2.TermCriteria_MAX_ITER,100,0.001)
	harris_corners = cv2.cornerSubPix(mesh_image,np.float32(centroids),(5,5),(-1,-1),criteria)
	drawn_image = cv2.cvtColor(drawn_image6, cv2.COLOR_GRAY2BGR)
	harris_corners = list(sorted(harris_corners[1:],key = lambda x : x[1]))

	# -- Finding out the grid color --


	grayscale = cropped_image.copy()
	# Perform adaptive thresholding to obtain binary image
	_, binary = cv2.threshold(grayscale, 128, 255, cv2.THRESH_BINARY_INV \| cv2.THRESH_OTSU)

	# Perform morphological operations to remove small text regions
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
	binary = cv2.morphologyEx(binary, cv2.MORPH_ELLIPSE, kernel, iterations=1)

	# Invert the binary image
	inverted_binary = cv2.bitwise_not(binary)

	# Restore the image by blending the inverted binary image with the grayscale image
	restored_image = cv2.bitwise_or(inverted_binary, grayscale)

	# Apply morphological opening to remove small black dots
	kernel_opening = np.ones((3, 3), np.uint8)
	opened_image = cv2.morphologyEx(restored_image, cv2.MORPH_OPEN, kernel_opening, iterations=1)

	# Apply morphological closing to further refine the restored image
	kernel_closing = np.ones((5, 5), np.uint8)
	refined_image = cv2.morphologyEx(opened_image, cv2.MORPH_CLOSE, kernel_closing, iterations=1)

	# finding out the grid corner
	grid = []
	grid_nums = []
	across_clue_num = []
	down_clue_num = []

	# extracting the grid color and filling up the grid variable
	sorted_corners = np.array(list(sorted(harris_corners,key=lambda x:x[1])))
	if(len(sorted_corners) == len(averaged_horizontal_lines1) * len(averaged_vertical_lines1)):
	sorted_corners_grouped = []
	for i in range(0,len(sorted_corners),len(averaged_vertical_lines1)):
	temp_arr = sorted_corners[i:i+len(averaged_vertical_lines1)]
	temp_arr = list(sorted(temp_arr,key=lambda x: x[0]))
	sorted_corners_grouped.append(temp_arr)

	for h_line_idx in range(0,len(sorted_corners_grouped)-1):
	for corner_idx in range(0,len(sorted_corners_grouped[h_line_idx])-1):
	# grabbing the four box coordinates
	box = [sorted_corners_grouped[h_line_idx][corner_idx],sorted_corners_grouped[h_line_idx][corner_idx+1],
	sorted_corners_grouped[h_line_idx+1][corner_idx],sorted_corners_grouped[h_line_idx+1][corner_idx+1]]
	grid.append(get_square_color(refined_image,box))

	grid_formatted = []
	for i in range(0, len(grid), len(averaged_vertical_lines1) - 1):
	grid_formatted.append(grid[i:i + len(averaged_vertical_lines1) - 1])


	# if (x,y) is present in these array the cell (x,y) is already accounted as a part of answer of across or down
	in_horizontal = []
	in_vertical = []


	num = 0



	for x in range(0, len(averaged_vertical_lines1) - 1):
	for y in range(0, len(averaged_horizontal_lines1) - 1):

	# if the cell is black there's no need to number
	if grid_formatted[x][y] == '.':
	grid_nums.append(0)
	continue

	# if the cell is part of both horizontal and vertical cell then there's no need to number
	horizontal_presence = (x, y) in in_horizontal
	vertical_presence = (x, y) in in_vertical

	# present in both 1 1
	if horizontal_presence and vertical_presence:
	grid_nums.append(0)
	continue

	# present in one i.e 1 0
	if not horizontal_presence and vertical_presence:
	horizontal_length = 0
	temp_horizontal_arr = []
	# iterate in x direction until the end of the grid or until a black box is found
	while x + horizontal_length < len(averaged_horizontal_lines1) - 1 and grid_formatted[x + horizontal_length][y] != '.':
	temp_horizontal_arr.append((x + horizontal_length, y))
	horizontal_length += 1
	# if horizontal length is greater than 1, then append the temp_horizontal_arr to in_horizontal array
	if horizontal_length > 1:
	in_horizontal.extend(temp_horizontal_arr)
	num += 1
	across_clue_num.append(num)
	grid_nums.append(num)
	continue
	grid_nums.append(0)
	# present in one 1 0
	if not vertical_presence and horizontal_presence:
	# do the same for vertical
	vertical_length = 0
	temp_vertical_arr = []
	# iterate in y direction until the end of the grid or until a black box is found
	while y + vertical_length < len(averaged_vertical_lines1) - 1 and grid_formatted[x][y+vertical_length] != '.':
	temp_vertical_arr.append((x, y+vertical_length))
	vertical_length += 1
	# if vertical length is greater than 1, then append the temp_vertical_arr to in_vertical array
	if vertical_length > 1:
	in_vertical.extend(temp_vertical_arr)
	num += 1
	down_clue_num.append(num)
	grid_nums.append(num)
	continue
	grid_nums.append(0)

	if(not horizontal_presence and not vertical_presence):

	horizontal_length = 0
	temp_horizontal_arr = []
	# iterate in x direction until the end of the grid or until a black box is found
	while x + horizontal_length < len(averaged_horizontal_lines1) - 1 and grid_formatted[x + horizontal_length][y] != '.':
	temp_horizontal_arr.append((x + horizontal_length, y))
	horizontal_length += 1
	# if horizontal length is greater than 1, then append the temp_horizontal_arr to in_horizontal array

	# do the same for vertical
	vertical_length = 0
	temp_vertical_arr = []
	# iterate in y direction until the end of the grid or until a black box is found
	while y + vertical_length < len(averaged_vertical_lines1) - 1 and grid_formatted[x][y+vertical_length] != '.':
	temp_vertical_arr.append((x, y+vertical_length))
	vertical_length += 1
	# if vertical length is greater than 1, then append the temp_vertical_arr to in_vertical array

	if horizontal_length > 1 and horizontal_length > 1:
	in_horizontal.extend(temp_horizontal_arr)
	in_vertical.extend(temp_vertical_arr)
	num += 1
	across_clue_num.append(num)
	down_clue_num.append(num)
	grid_nums.append(num)
	elif vertical_length > 1:
	in_vertical.extend(temp_vertical_arr)
	num += 1
	down_clue_num.append(num)
	grid_nums.append(num)
	elif horizontal_length > 1:
	in_horizontal.extend(temp_horizontal_arr)
	num += 1
	across_clue_num.append(num)
	grid_nums.append(num)
	else:
	grid_nums.append(0)


	size = { 'rows' : len(averaged_horizontal_lines1)-1,
	'cols' : len(averaged_vertical_lines1)-1,
	}


	dict = {
	'size' : size,
	'grid' : grid,
	'gridnums': grid_nums,
	'across_nums': down_clue_num,
	'down_nums' : across_clue_num,
	}

	return dict

	if __name__ == "__main__":
	img = cv2.imread("D:\\D\\Major Project files\\opencv\\movie.png",0)
	down = extract_grid(img)
	print(down)
	# img = Image.open("chalena3.jpg")
	# img_gray = img.convert("L")
	# print(extract_grid(img_gray))