Document Scanner
Implementation of a document scanner using Python an OpenCV
Utility functions
Let’s implement a few functions that we will use later.
import numpy as npimport cv2from skimage.filters import threshold_localimport cv2import matplotlib.pyplot as plt%matplotlib inline
- Euclidean distance: straight line distance between two points in a Euclidean plane.
def euclidean_distance(a,b): return np.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) **2)
- Function to sort vertices in a clockwise order
def sort_vertices(pts): # Initialize a list of ordered co-ordinates (clockwise) vertices = np.zeros((4, 2), dtype="float32") s = pts.sum(axis=1) # Top Left- Smallest Sum vertices[0] = pts[np.argmin(s)] # Bottom Right- Largest Sum vertices[2] = pts[np.argmax(s)] diff = np.diff(pts, axis=1) # Top Right- Smallest Difference vertices[1] = pts[np.argmin(diff)] # Bottom Left- Largest Difference vertices[3] = pts[np.argmax(diff)] # Return the clockwise ordered coordinates return vertices
- Function for rectangular perspectice transformation
def four_point_perspective_transform(image, pts): """ Takes four points and generates a rectangular perspective transformation to get a top-down view of the image """ # Order the points rect = sort_vertices(pts) (tl, tr, br, bl) = rect # Estimate the width of the new image # Max of width of top and bottom co-ordinates widthA = euclidean_distance(br, bl) widthB = euclidean_distance(tr, tl) maxWidth = max(int(widthA), int(widthB)) # Estimate the height of the new image # Max of the height of left and right coordinates heightA = euclidean_distance(tr, br) heightB = euclidean_distance(tl, bl) maxHeight = max(int(heightA), int(heightB)) # Estimate the destination points using the calculated dimensions dst = np.array([ [0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype="float32") # Compute the perspective transform matrix and then apply it M = cv2.getPerspectiveTransform(rect, dst) warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) # Return the warped image return warped
- Function to display images side-by-side
def display_sbs(imgLr, imgRr): f = plt.figure() f.add_subplot(1,2, 1) plt.imshow(np.rot90(imgLr,0)) f.add_subplot(1,2, 2) plt.imshow(np.rot90(imgRr,0)) plt.show(block=True)
Main program
Step 1: Reading the image and converting it into grayscale
Reading image
image = cv2.imread("images/receipt.jpg")
image_copy = image.copy()image = cv2.resize(image, (1500, 800))image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)image_blurred = cv2.GaussianBlur(image_gray, (5, 5), 0)image_edge = cv2.Canny(image_gray, 75, 200)
display_sbs(image, image_edge)
png
Step 2: find contours on the image
cnts, hiers = cv2.findContours(image_edge.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[-2:]# cnts = cnts[1]cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]for c in cnts: peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.02 * peri, True) if len(approx) == 4: screenCnt = approx breakcnts = cv2.findContours(image_edge.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[1]cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]for c in cnts: peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.02 * peri, True) if len(approx) == 4: screenCnt = approx breakcnts
Step 3: draw contours on the image
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)plt.imshow(np.rot90(image,0))plt.show(block=True)
png
Step 4: warp this image to display only the receipt
warped_image = four_point_perspective_transform(image, screenCnt.reshape(4,2))
warped_image = cv2.cvtColor(warped_image, cv2.COLOR_BGR2GRAY)T = threshold_local(warped_image, 11, offset=10, method="gaussian")warped_image = (warped_image > T).astype("uint8") * 255
display_sbs(image_copy, warped_image)
png
Finally, save this image.
cv2.imwrite('./images/scanned.png', warped_image)print("Image scanned and saved")
Image scanned and saved