|
|
import cv2 |
|
|
import numpy as np |
|
|
import PyPDF2 |
|
|
import fitz |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
|
|
|
def rmv_dashedLines(clean_img1): |
|
|
clean_img=np.array(clean_img1) |
|
|
|
|
|
img_copy=clean_img.copy() |
|
|
|
|
|
kernel1 = np.ones((3,5),np.uint8) |
|
|
kernel2 = np.ones((9,9),np.uint8) |
|
|
|
|
|
imgGray=cv2.cvtColor(clean_img,cv2.COLOR_BGR2GRAY) |
|
|
imgBW=cv2.threshold(imgGray, 200, 255, cv2.THRESH_BINARY_INV)[1] |
|
|
|
|
|
img1=cv2.erode(imgBW, kernel1, iterations=1) |
|
|
img2=cv2.dilate(img1, kernel2, iterations=3) |
|
|
img3 = cv2.bitwise_and(imgBW,img2) |
|
|
img3= cv2.bitwise_not(img3) |
|
|
img4 = cv2.bitwise_and(imgBW,imgBW,mask=img3) |
|
|
h,w,c=clean_img.shape |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
imgLines= cv2.HoughLinesP(img4,1,np.pi/180,200,minLineLength=(w-h),maxLineGap = 120) |
|
|
|
|
|
|
|
|
for i in range(len(imgLines)): |
|
|
for x1,y1,x2,y2 in imgLines[i]: |
|
|
cv2.line(clean_img,(x1,y1),(x2,y2),(0,255,0),2) |
|
|
|
|
|
|
|
|
imgLines= cv2.HoughLinesP(img4,1,np.pi/180,150,minLineLength=w//4,maxLineGap = 90) |
|
|
|
|
|
im_copy=imgBW.copy() |
|
|
for i in range(len(imgLines)): |
|
|
for x1,y1,x2,y2 in imgLines[i]: |
|
|
if x1>4050 or y1>4050: |
|
|
cv2.line(clean_img,(x1,y1),(x2,y2),(0,255,0),2) |
|
|
green=clean_img[:,:,1] |
|
|
|
|
|
eroded=cv2.erode(green, kernel2, iterations=1) |
|
|
dilated=cv2.dilate(eroded, kernel2, iterations=1) |
|
|
|
|
|
return dilated |
|
|
|
|
|
|
|
|
|
|
|
def rmv_text(plan): |
|
|
|
|
|
|
|
|
file=open(plan,'rb') |
|
|
text=PyPDF2.PdfReader(file).pages[0].extract_text() |
|
|
|
|
|
pdf=fitz.open(plan) |
|
|
|
|
|
page=pdf.load_page(0) |
|
|
|
|
|
for item in list(text): |
|
|
draft=page.search_for(item) |
|
|
for rect in draft: |
|
|
annot = page.add_redact_annot(rect) |
|
|
page.apply_redactions() |
|
|
page.apply_redactions(images=fitz.PDF_REDACT_IMAGE_NONE) |
|
|
pix=page.get_pixmap() |
|
|
clean_img=Image.frombytes('RGB',[pix.width,pix.height],pix.samples) |
|
|
|
|
|
return clean_img |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|