- OpenCV提供的templateMatch只负责将(相关性等)计算出来,并不会直接提供目标的对应坐标,一般来说我们直接遍历最高的相关度,就可以得到匹配度最高的坐标。但是这样一般只能得到一个坐标。
- 在实际操作中,我们可能需要匹配一个不规则的图像,把这个不规则的图像放进矩形Mat里,会出现很多不应该参与匹配的地方参与结果的计算,导致识别率下降。
- 有时候面对半透明控件,其后的背景完全不一样,传统的匹配方法直接歇菜了,怎么办?
通过templateMatch算法,可以得到目标与原图像中等大子图像对应归一化的相关系数,这个归一化的相关系数可以看作是对于的概率(其实不是这样),可以设定一个阈值,把大于这个阈值的坐标都筛选出来。但是这样在一个成功匹配的坐标附近也会存在许多相关性稍小的坐标也大于这个阈值,我们无法区分这些坐标对于的图像是原来的图像还是其他的图像,这样就把这个问题转化为了怎么把这些副产物给去除。有cv经验的应该很快会想到[nms算法](非极大值抑制(NMS)算法讲解|理论+代码 - 知乎 (zhihu.com))。想了解的同学可以点进去看看。下面就只提供代码实现。
OpenCV的templateMatch中提供了一个可选的参数mask,这个mask是和目标等大的一张图,可以是U8C1也可以是FP32,其中U8C1对于每个点的含义是为0则放弃匹配该点,非0就会匹配,FP32是会将这个点像素在计算相关性时赋予对于的权重。要求比较简单,只需要不匹配不规则图像中的空白部分就好了,可以在mask中把这里涂黑,要匹配的地方涂白就好了(绿幕抠像?)。
对于半透明控件,某个坐标对应的像素值就是会随着背景变化而变化的。templateMatch这种通过计算字节上相似度的算法会因为背景变化而导致整个图像的像素发生整体性的大规模变化而受到影响。但是即便整个图像的像素发生变化,寻找目标颜色与坐标的相对关系是基本不变的(目标具有某种特征,这也就是人为什么可以对这种控件进行识别)。可以用特征匹配的方法,利用这个特性对透明控件进行匹配。
需要注意的是部分算法来自于nonfree的xfeature,使用时请注意避免纠纷,当然也需要使用者手动打开这个编译开关,相关代码Fork自OpenCV: Features2D + Homography to find a known object
#ifdef LIBMATCH_EXPORTS #define LIBMATCH_API extern "C" __declspec(dllexport) struct objectEx { cv::Rect_<float> rect; float prob; }; struct objectEx2 { cv::Point2f dots[4]; }; static void qsort_descent_inplace(std::vector<objectEx>& objects) { if (objects.empty()) return; std::sort(objects.begin(), objects.end(), [](const objectEx& a, const objectEx& b) {return a.prob > b.prob; }); } static inline float intersection_area(const objectEx& a, const objectEx& b) { cv::Rect_<float> inter = a.rect & b.rect; return inter.area(); } static void nms_sorted_bboxes(const std::vector<objectEx>& faceobjects, std::vector<int>& picked, float nms_threshold) { picked.clear(); const int n = faceobjects.size(); std::vector<float> areas(n); for (int i = 0; i < n; i++) { areas[i] = faceobjects[i].rect.area(); } for (int i = 0; i < n; i++) { const objectEx& a = faceobjects[i]; int keep = 1; for (int j = 0; j < (int)picked.size(); j++) { const objectEx& b = faceobjects[picked[j]]; // intersection over union float inter_area = intersection_area(a, b); float union_area = areas[i] + areas[picked[j]] - inter_area; // float IoU = inter_area / union_area if (inter_area / union_area > nms_threshold) keep = 0; } if (keep) picked.push_back(i); } } const int version = 230622; #else #define LIBMATCH_API extern "C" __declspec(dllimport) struct objectEx { struct Rect{ float x, y, width, height; } rect; float prob; }; struct objectEx2 { struct { float x, y; }dots[4]; }; #endif LIBMATCH_API int match_get_version(); LIBMATCH_API size_t match_scan( uint8_t* src_img_data, const size_t src_img_size, uint8_t* target_img_data, const size_t target_img_size, const float prob_threshold, const float nms_threshold, objectEx* RetObejectArr, const size_t maxRetCount, const uint32_t MaskColor //Just For BGR,if high 2bit isn`t zero,mask will be disabled ); LIBMATCH_API bool match_feat( uint8_t* src_img_data, const size_t src_img_size, uint8_t* target_img_data, const size_t target_img_size, objectEx2 &result );
// libmatch.cpp : 定义 DLL 的导出函数。 // #include "pch.h" #include "framework.h" #include "libmatch.h" LIBMATCH_API int match_get_version() { return version; } LIBMATCH_API size_t match_scan( uint8_t* src_img_data, const size_t src_img_size, uint8_t* target_img_data, const size_t target_img_size, const float prob_threshold, const float nms_threshold, objectEx* RetObejectArr, const size_t maxRetCount, const uint32_t MaskColor //Just For BGR,if high 2bit isn`t zero,mask will be disabled ) { //Read and Process img Start cv::_InputArray src_img_arr(src_img_data, src_img_size); cv::Mat src_mat = cv::imdecode(src_img_arr, cv::IMREAD_GRAYSCALE); if (src_mat.empty()) { std::cout << "[Match] Err Can`t Read src_img" << std::endl; return -1; } cv::_InputArray target_img_arr(target_img_data, target_img_size); cv::Mat target_mat = cv::imdecode(target_img_arr, cv::IMREAD_GRAYSCALE); if (target_mat.empty()) { std::cout << "[Match] Err Can`t Read target_img" << std::endl; return -1; } if (target_mat.cols > src_mat.cols || target_mat.rows > src_mat.rows) { std::cout << "[Match]ERR Target is too large" << std::endl; return false; } //Read Over //Template Match Start cv::Mat result(src_mat.cols - target_mat.cols + 1, src_mat.rows - target_mat.rows + 1, CV_32FC1); if ((MaskColor & 0xff000000) != 0) { cv::matchTemplate(src_mat, target_mat, result, cv::TM_CCOEFF_NORMED); } else { cv::Mat temp_target_mat = cv::imdecode(target_img_arr, cv::IMREAD_COLOR); cv::Mat maks_mat = cv::Mat::zeros(target_mat.rows, target_mat.cols, CV_8U); //Replace MaskColor for (int i = 0; i < temp_target_mat.rows; i++) for (int j = 0; j < temp_target_mat.cols; j++) { cv::Vec3b temp_color=temp_target_mat.at<cv::Vec3b>(cv::Point(j, i)); if (((temp_color[0] << 16) | (temp_color[1] << 8) | temp_color[2]) != MaskColor) { // std::cout << ((temp_color[0] << 16) | (temp_color[1] << 8) | temp_color[2]) << std::endl; maks_mat.at<uint8_t>(cv::Point(j, i)) = 255; } } // cv::imshow("result", maks_mat); // cv::waitKey(); cv::matchTemplate(src_mat, target_mat, result, cv::TM_CCOEFF_NORMED, maks_mat); } //Template Match Over //BackEnd Process std::vector <objectEx> proposals; for (int i = 0; i < result.rows; ++i) for (int j = 0; j < result.cols; ++j) { if (result.at<float>(cv::Point(j, i)) >= prob_threshold) { objectEx buf; buf.prob = result.at<float>(cv::Point(j, i)); buf.rect.x = j; buf.rect.y = i; buf.rect.height = target_mat.rows; buf.rect.width = target_mat.cols; proposals.push_back(buf); } } std::vector<int> picked; qsort_descent_inplace(proposals); nms_sorted_bboxes(proposals, picked, nms_threshold); std::vector <objectEx> objects; for (auto x : picked) objects.emplace_back(proposals[x]); //BackEnd Over memcpy(RetObejectArr, objects.data(), sizeof(objectEx) * std::min(objects.size(), maxRetCount)); return objects.size(); } LIBMATCH_API bool match_feat( uint8_t* src_img_data, const size_t src_img_size, uint8_t* target_img_data, const size_t target_img_size, objectEx2 &result ) { //Read and Process img Start cv::_InputArray src_img_arr(src_img_data, src_img_size); cv::Mat src_mat = cv::imdecode(src_img_arr, cv::IMREAD_GRAYSCALE); if (src_mat.empty()) { std::cout << "[Match] Err Can`t Read src_img" << std::endl; return false; } cv::_InputArray target_img_arr(target_img_data, target_img_size); cv::Mat target_mat = cv::imdecode(target_img_arr, cv::IMREAD_GRAYSCALE); if (target_mat.empty()) { std::cout << "[Match] Err Can`t Read target_img" << std::endl; return false; } //Read Over //-- Step 1: Detect the keypoints using SURF Detector, compute the descriptors int minHessian = 400; cv::Ptr<cv::xfeatures2d::SURF> detector = cv::xfeatures2d::SURF::create(minHessian); std::vector<cv::KeyPoint> keypoints_object, keypoints_scene; cv::Mat descriptors_object, descriptors_scene; detector->detectAndCompute(target_mat, cv::noArray(), keypoints_object, descriptors_object); detector->detectAndCompute(src_mat,cv::noArray(), keypoints_scene, descriptors_scene); //-- Step 2: Matching descriptor vectors with a FLANN based matcher // Since SURF is a floating-point descriptor NORM_L2 is used cv::Ptr<cv::DescriptorMatcher> matcher = cv::DescriptorMatcher::create(cv::DescriptorMatcher::FLANNBASED); std::vector< std::vector<cv::DMatch> > knn_matches; matcher->knnMatch(descriptors_object, descriptors_scene, knn_matches, 2); //-- Filter matches using the Lowe's ratio test const float ratio_thresh = 0.75f; std::vector<cv::DMatch> good_matches; for (size_t i = 0; i < knn_matches.size(); i++) { if (knn_matches[i][0].distance < ratio_thresh * knn_matches[i][1].distance) { good_matches.push_back(knn_matches[i][0]); } } if (good_matches.size() == 0) return false; //-- Draw matches //Mat img_matches; //drawMatches(img_object, keypoints_object, img_scene, keypoints_scene, good_matches, img_matches, Scalar::all(-1), // Scalar::all(-1), std::vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS); //-- Localize the object std::vector<cv::Point2f> obj; std::vector<cv::Point2f> scene; for (size_t i = 0; i < good_matches.size(); i++) { //-- Get the keypoints from the good matches obj.push_back(keypoints_object[good_matches[i].queryIdx].pt); scene.push_back(keypoints_scene[good_matches[i].trainIdx].pt); } cv::Mat H = findHomography(obj, scene, cv::RANSAC); //-- Get the corners from the image_1 ( the object to be "detected" ) std::vector<cv::Point2f> obj_corners(4); obj_corners[0] = cv::Point2f(0, 0); obj_corners[1] = cv::Point2f((float)target_mat.cols, 0); obj_corners[2] = cv::Point2f((float)target_mat.cols, (float)target_mat.rows); obj_corners[3] = cv::Point2f(0, (float)target_mat.rows); std::vector<cv::Point2f> buf_corners(4); cv::perspectiveTransform(obj_corners, buf_corners, H); memcpy(result.dots, buf_corners.data(), buf_corners.size() * sizeof(cv::Point2f)); return true; }
紧张而刺激的高考在本月落下了帷幕,结束了长达12年的通识教育,笔者终于能够潜下心来研究这些东西背后的数学原理。由于笔者的能力有限,本文存在不严谨的部分,希望读者可以谅解。
算法交流群:904511841,143858000