MRZ stands for machine readable zone. A passport has a machine-readable zone which contains the information about name, nationality, passport number, date of birth, sex, and expiration date of passport. This article introduces how to use Dynamsoft OCR SDK to recognize MRZ and parse the corresponding information.
About Dynamsoft OCR SDK
- Download Dynamsoft C++ OCR dev package, which supports Windows and Linux.
- Get a 30-day FREE Trial License.
Prerequisites
Windows
Linux
sudo apt install libopencv-dev cmake
Passport MRZ Recognition in C++
In the following paragraphs, we firstly use Dynamsoft OCR SDK to localize the machine-readable zone of a passport and recognize the corresponding text string, and then extract all information from the text string according to the standard format of passport booklets.
Setting up CMake project
I strongly recommend installing CMake extension in Visual Studio Code to create and debug a CMake project for both Windows and Linux.
Let us configure the header files and linking libraries of Dynamsoft OCR and OpenCV in CMakeLists:
cmake_minimum_required (VERSION 2.6)
project (mrz)
MESSAGE( STATUS "PROJECT_NAME: " ${PROJECT_NAME} )
# Check platforms
if (CMAKE_HOST_WIN32)
set(WINDOWS 1)
elseif(CMAKE_HOST_UNIX)
set(LINUX 1)
endif()
# Add search path for include and lib files
MESSAGE( STATUS "CPU architecture ${CMAKE_SYSTEM_PROCESSOR}" )
if(WINDOWS)
link_directories("${PROJECT_SOURCE_DIR}/platform/windows/lib/")
elseif(LINUX)
link_directories("${PROJECT_SOURCE_DIR}/platform/linux/")
endif()
include_directories("${PROJECT_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/include/")
# Add the executable
find_package(OpenCV REQUIRED)
add_executable(${PROJECT_NAME} mrzcv.cpp)
if(WINDOWS)
target_link_libraries (${PROJECT_NAME} "DynamsoftLabelRecognitionx64" ${OpenCV_LIBS})
else()
target_link_libraries (${PROJECT_NAME} "DynamsoftLabelRecognition" ${OpenCV_LIBS})
endif()
# Copy DLLs
if(WINDOWS)
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
"${PROJECT_SOURCE_DIR}/platform/windows/bin/"
$<TARGET_FILE_DIR:${PROJECT_NAME}>)
endif()
The character model trained by deep neural network (DNN) can be found from the C++ dev package. We need to copy the whole model folder to the output directory, so does the template file.
# Copy template
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
"${PROJECT_SOURCE_DIR}/template/"
$<TARGET_FILE_DIR:${PROJECT_NAME}>)
# Copy model files
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
"${PROJECT_SOURCE_DIR}/CharacterModel"
$<TARGET_FILE_DIR:${PROJECT_NAME}>/CharacterModel)
The online documentation can help you figure out how template parameters work.
Coding for MRZ detection and information parsing
Once the build configuration is done, we can move to the code part.
-
Initialize the OCR object. A valid license key is required.
CLabelRecognition dlr; dlr.InitLicense("LICENSE-KEY");
-
Append a template file.
int ret = dlr.AppendSettingsFromFile("template-file");
Note: if the
DirectoryPath
configured in the template file is a relative path, you must put the template file along with the model folder.
"CharacterModelArray" : [ { "DirectoryPath": "CharacterModel", "FilterFilePath": "", "Name": "NumberUppercase" } ],
-
Call OCR recognition method. We can use the built-in function of OpenCV to measure the passing time. The
TickMeter
class simplifies the code of time calculation for Windows and Linux.
TickMeter tm; tm.start(); errorCode = dlr.RecognizeByFile(pszImageFile, "locr"); tm.stop(); float costTime = tm.getTimeSec();
-
Get the text recognition results that include the coordinate of the text zone, text lines, and text strings.
DLRResultArray* pDLRResults = NULL; dlr.GetAllDLRResults(&pDLRResults); if (pDLRResults != NULL) { int rCount = pDLRResults->resultsCount; printf("\r\nRecognized %d results\r\n", rCount); for (int ri = 0; ri < rCount; ++ri) { printf("\r\nResult %d :\r\n", ri); int startX = 50, startY = 50; DLRResult* result = pDLRResults->results[ri]; int lCount = result->lineResultsCount; for (int li = 0; li < lCount; ++li) { printf("Line result %d: %s\r\n", li, result->lineResults[li]->text); DLRPoint *points = result->lineResults[li]->location.points; printf("x1: %d, y1: %d, x2: %d, y2: %d, x3: %d, y3: %d, x4: %d, y4: %d\r\n", points[0].x, points[0].y, points[1].x, points[1].y, points[2].x, points[2].y, points[3].x, points[3].y); } } } else { printf("\r\nNo data detected.\r\n"); } dlr.FreeDLRResults(&pDLRResults);
-
Parse the MRZ string and extract the corresponding information.
string line1 = result->lineResults[0]->text; string line2 = result->lineResults[1]->text; // https://en.wikipedia.org/wiki/Machine-readable_passport // Type string tmp = "Type: "; tmp.insert(tmp.length(), 1, line1[0]); printf("%s\r\n", tmp.c_str()); // Issuing country tmp = "Issuing country: "; line1.substr(2, 5); tmp += line1.substr(2, 3); printf("%s\r\n", tmp.c_str()); // Surname int index = 5; tmp = "Surname: "; for (; index < 44; index++) { if (line1[index] != '<') { tmp.insert(tmp.length(), 1, line1[index]); } else { break; } } printf("%s\r\n", tmp.c_str()); // Given names tmp = "Given Names: "; index += 2; for (; index < 44; index++) { if (line1[index] != '<') { tmp.insert(tmp.length(), 1, line1[index]); } else { tmp.insert(tmp.length(), 1, ' '); } } printf("%s\r\n", tmp.c_str()); // Passport number tmp = "Passport number: "; index = 0; for (; index < 9; index++) { if (line2[index] != '<') { tmp.insert(tmp.length(), 1, line2[index]); } else { break; } } printf("%s\r\n", tmp.c_str()); // Nationality tmp = "Nationality: "; tmp += line2.substr(10, 3); printf("%s\r\n", tmp.c_str()); // Date of birth tmp = line2.substr(13, 6); tmp.insert(2, "/"); tmp.insert(5, "/"); tmp = "Date of birth (YYMMDD): " + tmp; printf("%s\r\n", tmp.c_str()); // Sex tmp = "Sex: "; tmp.insert(tmp.length(), 1, line2[20]); printf("%s\r\n", tmp.c_str()); // Expiration date of passport tmp = line2.substr(21, 6); tmp.insert(2, "/"); tmp.insert(5, "/"); tmp = "Expiration date of passport (YYMMDD): " + tmp; printf("%s\r\n", tmp.c_str()); // Personal number if (line2[28] != '<') { tmp = "Personal number: "; for (index = 28; index < 42; index++) { if (line2[index] != '<') { tmp.insert(tmp.length(), 1, line2[index]); } else { break; } } printf("%s\r\n", tmp.c_str()); }
To make the program user-friendly, we use OpenCV to show the display window and draw relevant information on it.
line( ori, Point(x1, y1), Point(x2, y2), lineColor, thickness);
line( ori, Point(x2, y2), Point(x3, y3), lineColor, thickness);
line( ori, Point(x3, y3), Point(x4, y4), lineColor, thickness);
line( ori, Point(x4, y4), Point(x1, y1), lineColor, thickness);
drawText(ori, result->lineResults[li]->text, minX, minY - scale * 10);
imshow("Passport MRZ Recognition", ori);
A further improvement is to use hconcat
function to stitch the images for better comparison.
hconcat(before, after, newMat);
imshow("Comparison", newMat);
Source Code
https://github.com/yushulx/cmake-cpp-barcode-qrcode/tree/main/examples/10.x/mrz