Document Scanner using OpenCV

Document Scanner

Implementation of a document scanner using Python an OpenCV

Utility functions

Let’s implement a few functions that we will use later.

import numpy as np
import cv2
from skimage.filters import threshold_local
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
  1. Euclidean distance: straight line distance between two points in a Euclidean plane.
def euclidean_distance(a,b):
    return np.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) **2)
  1. Function to sort vertices in a clockwise order
def sort_vertices(pts):
    # Initialize a list of ordered co-ordinates (clockwise)
    vertices = np.zeros((4, 2), dtype="float32")

    s = pts.sum(axis=1)
    # Top Left- Smallest Sum
    vertices[0] = pts[np.argmin(s)]
    # Bottom Right- Largest Sum
    vertices[2] = pts[np.argmax(s)]

    diff = np.diff(pts, axis=1)
    # Top Right- Smallest Difference
    vertices[1] = pts[np.argmin(diff)]
    # Bottom Left- Largest Difference
    vertices[3] = pts[np.argmax(diff)]

    # Return the clockwise ordered coordinates
    return vertices
  1. Function for rectangular perspectice transformation
def four_point_perspective_transform(image, pts):
    """
    Takes four points and generates a rectangular perspective
    transformation to get a top-down view of the image
    """
    # Order the points
    rect = sort_vertices(pts)
    (tl, tr, br, bl) = rect

    # Estimate the width of the new image
    # Max of width of top and bottom co-ordinates
    widthA = euclidean_distance(br, bl)
    widthB = euclidean_distance(tr, tl)
    maxWidth = max(int(widthA), int(widthB))

    # Estimate the height of the new image
    # Max of the height of left and right coordinates
    heightA = euclidean_distance(tr, br)
    heightB = euclidean_distance(tl, bl)
    maxHeight = max(int(heightA), int(heightB))

    # Estimate the destination points using the calculated dimensions
    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype="float32")

    # Compute the perspective transform matrix and then apply it
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))

    # Return the warped image
    return warped
  1. Function to display images side-by-side
def display_sbs(imgLr, imgRr):
    f = plt.figure()
    f.add_subplot(1,2, 1)
    plt.imshow(np.rot90(imgLr,0))
    f.add_subplot(1,2, 2)
    plt.imshow(np.rot90(imgRr,0))
    plt.show(block=True)

Main program

Step 1: Reading the image and converting it into grayscale

Reading image

image = cv2.imread("images/receipt.jpg")
image_copy = image.copy()
image = cv2.resize(image, (1500, 800))
image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image_blurred = cv2.GaussianBlur(image_gray, (5, 5), 0)
image_edge = cv2.Canny(image_gray, 75, 200)
display_sbs(image, image_edge)

png

Step 2: find contours on the image

cnts, hiers = cv2.findContours(image_edge.copy(), cv2.RETR_LIST,
                        cv2.CHAIN_APPROX_SIMPLE)[-2:]
# cnts = cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]

for c in cnts:
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.02 * peri, True)

    if len(approx) == 4:
        screenCnt = approx
        breakcnts = cv2.findContours(image_edge.copy(), cv2.RETR_LIST,
                                     cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]

for c in cnts:
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.02 * peri, True)

    if len(approx) == 4:
        screenCnt = approx
        breakcnts

Step 3: draw contours on the image

cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
plt.imshow(np.rot90(image,0))
plt.show(block=True)

png

Step 4: warp this image to display only the receipt

warped_image = four_point_perspective_transform(image, screenCnt.reshape(4,2))
warped_image = cv2.cvtColor(warped_image, cv2.COLOR_BGR2GRAY)
T = threshold_local(warped_image, 11, offset=10, method="gaussian")
warped_image = (warped_image > T).astype("uint8") * 255
display_sbs(image_copy, warped_image)

png

Finally, save this image.

cv2.imwrite('./images/scanned.png', warped_image)
print("Image scanned and saved")
Image scanned and saved
Man Parvesh Singh Randhawa
Man Parvesh Singh Randhawa
CS Grad student

My interests include distributed computing, information security and Software Engineering.

Next
Previous