Passport MRZ Recognition with Dynamsoft C++ OCR SDK

Xiao Ling - Jul 5 '21 - - Dev Community

MRZ stands for machine readable zone. A passport has a machine-readable zone which contains the information about name, nationality, passport number, date of birth, sex, and expiration date of passport. This article introduces how to use Dynamsoft OCR SDK to recognize MRZ and parse the corresponding information.

About Dynamsoft OCR SDK

Prerequisites

Windows

Linux

sudo apt install libopencv-dev cmake
Enter fullscreen mode Exit fullscreen mode

Passport MRZ Recognition in C++

In the following paragraphs, we firstly use Dynamsoft OCR SDK to localize the machine-readable zone of a passport and recognize the corresponding text string, and then extract all information from the text string according to the standard format of passport booklets.

Setting up CMake project

I strongly recommend installing CMake extension in Visual Studio Code to create and debug a CMake project for both Windows and Linux.

Let us configure the header files and linking libraries of Dynamsoft OCR and OpenCV in CMakeLists:

cmake_minimum_required (VERSION 2.6)
project (mrz)
MESSAGE( STATUS "PROJECT_NAME: " ${PROJECT_NAME} )

# Check platforms
if (CMAKE_HOST_WIN32)
    set(WINDOWS 1)
elseif(CMAKE_HOST_UNIX)
    set(LINUX 1)
endif()

# Add search path for include and lib files
MESSAGE( STATUS "CPU architecture ${CMAKE_SYSTEM_PROCESSOR}" )
if(WINDOWS)
    link_directories("${PROJECT_SOURCE_DIR}/platform/windows/lib/") 
elseif(LINUX)
    link_directories("${PROJECT_SOURCE_DIR}/platform/linux/")
endif()
include_directories("${PROJECT_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/include/")

# Add the executable
find_package(OpenCV REQUIRED)
    add_executable(${PROJECT_NAME} mrzcv.cpp)
    if(WINDOWS)
        target_link_libraries (${PROJECT_NAME} "DynamsoftLabelRecognitionx64" ${OpenCV_LIBS})
    else()
        target_link_libraries (${PROJECT_NAME} "DynamsoftLabelRecognition" ${OpenCV_LIBS})
    endif()

# Copy DLLs
if(WINDOWS)
    add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD 
        COMMAND ${CMAKE_COMMAND} -E copy_directory
        "${PROJECT_SOURCE_DIR}/platform/windows/bin/"      
        $<TARGET_FILE_DIR:${PROJECT_NAME}>)
endif()
Enter fullscreen mode Exit fullscreen mode

The character model trained by deep neural network (DNN) can be found from the C++ dev package. We need to copy the whole model folder to the output directory, so does the template file.

# Copy template
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD 
        COMMAND ${CMAKE_COMMAND} -E copy_directory
        "${PROJECT_SOURCE_DIR}/template/"      
        $<TARGET_FILE_DIR:${PROJECT_NAME}>)

# Copy model files
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD 
        COMMAND ${CMAKE_COMMAND} -E copy_directory
        "${PROJECT_SOURCE_DIR}/CharacterModel"      
        $<TARGET_FILE_DIR:${PROJECT_NAME}>/CharacterModel)
Enter fullscreen mode Exit fullscreen mode

The online documentation can help you figure out how template parameters work.

Coding for MRZ detection and information parsing

Once the build configuration is done, we can move to the code part.

  1. Initialize the OCR object. A valid license key is required.

    CLabelRecognition dlr;
    dlr.InitLicense("LICENSE-KEY");
    
  2. Append a template file.

    int ret = dlr.AppendSettingsFromFile("template-file");
    

    Note: if the DirectoryPath configured in the template file is a relative path, you must put the template file along with the model folder.

    "CharacterModelArray" : [
    {
      "DirectoryPath": "CharacterModel",
      "FilterFilePath": "",
      "Name": "NumberUppercase"
    }
    ],
    
  3. Call OCR recognition method. We can use the built-in function of OpenCV to measure the passing time. The TickMeter class simplifies the code of time calculation for Windows and Linux.

    TickMeter tm;
    tm.start();
    errorCode = dlr.RecognizeByFile(pszImageFile, "locr");
    tm.stop();
    float costTime = tm.getTimeSec();
    
  4. Get the text recognition results that include the coordinate of the text zone, text lines, and text strings.

    DLRResultArray* pDLRResults = NULL;
        dlr.GetAllDLRResults(&pDLRResults);
        if (pDLRResults != NULL)
        {
            int rCount = pDLRResults->resultsCount;
            printf("\r\nRecognized %d results\r\n", rCount);
            for (int ri = 0; ri < rCount; ++ri)
            {
                printf("\r\nResult %d :\r\n", ri);
                int startX = 50, startY = 50;
                DLRResult* result = pDLRResults->results[ri];
                int lCount = result->lineResultsCount;
                for (int li = 0; li < lCount; ++li)
                {
                    printf("Line result %d: %s\r\n", li, result->lineResults[li]->text);
                    DLRPoint *points = result->lineResults[li]->location.points;
                    printf("x1: %d, y1: %d, x2: %d, y2: %d, x3: %d, y3: %d, x4: %d, y4: %d\r\n", points[0].x, 
                    points[0].y, points[1].x, points[1].y, points[2].x, points[2].y, points[3].x, points[3].y);
                }
            }
        }
        else
        {
            printf("\r\nNo data detected.\r\n");
        }
        dlr.FreeDLRResults(&pDLRResults);
    
  5. Parse the MRZ string and extract the corresponding information.

    string line1 = result->lineResults[0]->text;
    string line2 = result->lineResults[1]->text;
    // https://en.wikipedia.org/wiki/Machine-readable_passport
    // Type
    string tmp = "Type: ";
    tmp.insert(tmp.length(), 1, line1[0]);
    printf("%s\r\n", tmp.c_str());
    
    // Issuing country
    tmp = "Issuing country: "; line1.substr(2, 5);
    tmp += line1.substr(2, 3);      
    printf("%s\r\n", tmp.c_str());
    
    // Surname
    int index = 5;
    tmp = "Surname: ";
    for (; index < 44; index++)
    {
      if (line1[index] != '<')
      {
        tmp.insert(tmp.length(), 1, line1[index]);
      }
      else 
      {
        break;
      }
    }
    printf("%s\r\n", tmp.c_str());
    
    // Given names
    tmp = "Given Names: ";
    index += 2;
    for (; index < 44; index++)
    {
      if (line1[index] != '<')
      {
        tmp.insert(tmp.length(), 1, line1[index]);
      }
      else 
      {
        tmp.insert(tmp.length(), 1, ' ');
      }
    }
    printf("%s\r\n", tmp.c_str());
    
    // Passport number
    tmp = "Passport number: ";
    index = 0;
    for (; index < 9; index++)
    {
      if (line2[index] != '<')
      {
        tmp.insert(tmp.length(), 1, line2[index]);
      }
      else 
      {
        break;
      }
    }
    printf("%s\r\n", tmp.c_str());
    
    // Nationality
    tmp = "Nationality: ";
    tmp += line2.substr(10, 3);
    printf("%s\r\n", tmp.c_str());
    
    // Date of birth
    tmp = line2.substr(13, 6);
    tmp.insert(2, "/");
    tmp.insert(5, "/");
    tmp = "Date of birth (YYMMDD): " + tmp;
    printf("%s\r\n", tmp.c_str());
    
    // Sex
    tmp = "Sex: ";
    tmp.insert(tmp.length(), 1, line2[20]);
    printf("%s\r\n", tmp.c_str());
    
    // Expiration date of passport
    tmp = line2.substr(21, 6);
    tmp.insert(2, "/");
    tmp.insert(5, "/");
    tmp = "Expiration date of passport (YYMMDD): " + tmp;
    printf("%s\r\n", tmp.c_str());
    
    // Personal number
    if (line2[28] != '<')
    {
      tmp = "Personal number: ";
      for (index = 28; index < 42; index++)
      {
        if (line2[index] != '<')
        {
          tmp.insert(tmp.length(), 1, line2[index]);
        }
        else 
        {
          break;
        }
      }
      printf("%s\r\n", tmp.c_str());
    }
    

To make the program user-friendly, we use OpenCV to show the display window and draw relevant information on it.

line( ori, Point(x1, y1), Point(x2, y2), lineColor, thickness);
line( ori, Point(x2, y2), Point(x3, y3), lineColor, thickness);
line( ori, Point(x3, y3), Point(x4, y4), lineColor, thickness);
line( ori, Point(x4, y4), Point(x1, y1), lineColor, thickness);
drawText(ori, result->lineResults[li]->text, minX, minY - scale * 10);

imshow("Passport MRZ Recognition", ori);
Enter fullscreen mode Exit fullscreen mode

passport mrz ocr

A further improvement is to use hconcat function to stitch the images for better comparison.

hconcat(before, after, newMat);
imshow("Comparison", newMat);
Enter fullscreen mode Exit fullscreen mode

passport mrz recognition

Source Code

https://github.com/yushulx/cmake-cpp-barcode-qrcode/tree/main/examples/10.x/mrz

. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
Terabox Video Player