早期的RCNN目标检测算法中使用selective search算法来产生region proposals.
该算法的大致流程如下:
1、利用图分割算法将图像分成不同的区域,并为这些区域产生相应的包围框
2、计算包围框之间的相似度,相似度可以分为四个部分(颜色相似度、纹理相似度和大小,相交约束),合并相近的包围框
3、排除过大、过小、长宽比过大、过小的包围框即可
参考代码:https://github.com/tttanikawa/selective-search-cpp
#pragma once #include <algorithm> #include <array> #include <cassert> #include <chrono> #include <cmath> #include <iostream> #include <iterator> #include <map> #include <memory> #include <opencv2/core.hpp> #include <opencv2/highgui.hpp> #include <opencv2/imgcodecs.hpp> #include <opencv2/imgproc.hpp> #include <opencv2/ml.hpp> #include <opencv2/objdetect.hpp> #include <random> #include <unordered_map> #include <vector> namespace std { template <> class hash<std::pair<int, int>> { public: std::size_t operator()(const std::pair<int, int> &x) const { return hash<int>()(x.first) ^ hash<int>()(x.second); } }; } // namespace std namespace ss { inline double square(double a) { return a * a; } inline double diff(const cv::Mat &img, int x1, int y1, int x2, int y2) { return sqrt(square(img.at<cv::Vec3f>(y1, x1)[0] - img.at<cv::Vec3f>(y2, x2)[0]) + square(img.at<cv::Vec3f>(y1, x1)[1] - img.at<cv::Vec3f>(y2, x2)[1]) + square(img.at<cv::Vec3f>(y1, x1)[2] - img.at<cv::Vec3f>(y2, x2)[2])); } struct UniverseElement { int rank; int p; int size; UniverseElement() : rank(0), size(1), p(0) {} UniverseElement(int rank, int size, int p) : rank(rank), size(size), p(p) {} bool operator==(const UniverseElement &other) const { return rank == other.rank && p == other.p && size == other.size; } }; class Universe { private: std::vector<UniverseElement> elements; int num; public: Universe(int num) : num(num) { elements.reserve(num); for (int i = 0; i < num; i++) { elements.emplace_back(0, 1, i); } } ~Universe() {} int findFast(int x) { return elements[x].p; } int find(int x) { int y = x; while (y != elements[y].p) { y = elements[y].p; } elements[x].p = y; return y; } void join(int x, int y) { if (elements[x].rank > elements[y].rank) { elements[y].p = x; elements[x].size += elements[y].size; } else { elements[x].p = y; elements[y].size += elements[x].size; if (elements[x].rank == elements[y].rank) { elements[y].rank++; } } num--; } int size(int x) const { return elements[x].size; } int numSets() const { return num; } }; struct edge { int a; int b; double w; }; bool operator<(const edge &a, const edge &b) { return a.w < b.w; } inline double calThreshold(int size, double scale) { return scale / size; } std::shared_ptr<Universe> segmentGraph(int numVertices, int numEdges, std::vector<edge> &edges, double scale) { std::sort(edges.begin(), edges.end()); auto universe = std::make_shared<Universe>(numVertices); std::vector<double> threshold(numVertices, scale); for (auto &pedge : edges) { int a = universe->find(pedge.a); int b = universe->find(pedge.b); if (a != b) { if ((pedge.w <= threshold[a]) && (pedge.w <= threshold[b])) { universe->join(a, b); a = universe->find(a); threshold[a] = pedge.w + calThreshold(universe->size(a), scale); } } } return universe; } // image segmentation using "Efficient Graph-Based Image Segmentation" // 类似并查集 std::shared_ptr<Universe> segmentation(const cv::Mat &img, double scale, double sigma, int minSize) { const int width = img.cols; const int height = img.rows; cv::Mat imgF; img.convertTo(imgF, CV_32FC3); cv::Mat blurred; cv::GaussianBlur(imgF, blurred, cv::Size(5, 5), sigma); std::vector<edge> edges(width * height * 4); int num = 0; for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { if (x < width - 1) { edges[num].a = y * width + x; edges[num].b = y * width + (x + 1); edges[num].w = diff(blurred, x, y, x + 1, y); num++; } if (y < height - 1) { edges[num].a = y * width + x; edges[num].b = (y + 1) * width + x; edges[num].w = diff(blurred, x, y, x, y + 1); num++; } if ((x < width - 1) && (y < height - 1)) { edges[num].a = y * width + x; edges[num].b = (y + 1) * width + (x + 1); edges[num].w = diff(blurred, x, y, x + 1, y + 1); num++; } if ((x < width - 1) && (y > 0)) { edges[num].a = y * width + x; edges[num].b = (y - 1) * width + (x + 1); edges[num].w = diff(blurred, x, y, x + 1, y - 1); num++; } } } edges.erase(edges.begin() + num, edges.end()); auto universe = segmentGraph(width * height, num, edges, scale); for (int i = 0; i < num; i++) { int a = universe->find(edges[i].a); int b = universe->find(edges[i].b); if ((a != b) && ((universe->size(a) < minSize) || (universe->size(b) < minSize))) { universe->join(a, b); } } return universe; } void visualize(const cv::Mat &img, std::shared_ptr<Universe> universe) { const int height = img.rows; const int width = img.cols; std::vector<cv::Vec3b> colors; cv::Mat segmentated(height, width, CV_8UC3); std::random_device rnd; std::mt19937 mt(rnd()); std::uniform_int_distribution<> rand256(0, 255); for (int i = 0; i < height * width; i++) { cv::Vec3b color(rand256(mt), rand256(mt), rand256(mt)); colors.push_back(color); } for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { segmentated.at<cv::Vec3b>(y, x) = colors[universe->findFast(y * width + x)]; } } cv::imshow("Initial Segmentation Result", segmentated); cv::waitKey(1); } struct Region { int size; cv::Rect rect; std::vector<int> labels; std::vector<float> colourHist; std::vector<float> textureHist; std::vector<cv::Vec2i> points; Region() {} Region(const cv::Rect &rect, int label) : rect(rect) { labels.push_back(label); } Region( const cv::Rect &rect, int size, const std::vector<float> &&colourHist, const std::vector<float> &&textureHist, const std::vector<int> &&labels) : rect(rect), size(size), colourHist(std::move(colourHist)), textureHist(std::move(textureHist)), labels(std::move(labels)) {} Region &operator=(const Region ®ion) = default; Region &operator=(Region &®ion) noexcept { if (this != ®ion) { this->size = region.size; this->rect = region.rect; this->labels = std::move(region.labels); this->colourHist = std::move(region.colourHist); this->textureHist = std::move(region.textureHist); } return *this; } Region(Region &®ion) noexcept { *this = std::move(region); } }; std::shared_ptr<Universe> generateSegments(const cv::Mat &img, double scale, double sigma, int minSize) { auto universe = segmentation(img, scale, sigma, minSize); //visualize(img, universe); return universe; } double calcSimOfColour(const Region &r1, const Region &r2) { assert(r1.colourHist.size() == r2.colourHist.size()); float sum = 0.0; for (auto i1 = r1.colourHist.cbegin(), i2 = r2.colourHist.cbegin(); i1 != r1.colourHist.cend(); i1++, i2++) { sum += std::min(*i1, *i2); } return sum; } double calcSimOfTexture(const Region &r1, const Region &r2) { assert(r1.colourHist.size() == r2.colourHist.size()); double sum = 0.0; // cbegin, cend返回const迭代器 for (auto i1 = r1.textureHist.cbegin(), i2 = r2.textureHist.cbegin(); i1 != r1.textureHist.cend(); i1++, i2++) { sum += std::min(*i1, *i2); } return sum; } inline double calcSimOfSize(const Region &r1, const Region &r2, int imSize) { return (1.0 - (double)(r1.size + r2.size) / imSize);// 降低两个较大的矩形合并的概率 } inline double calcSimOfRect(const Region &r1, const Region &r2, int imSize) { return (1.0 - (double)((r1.rect | r2.rect).area() - r1.size - r2.size) / imSize); //相交面积越大,相似度越高 } inline double calcSimilarity(const Region &r1, const Region &r2, int imSize) { return (calcSimOfColour(r1, r2) + calcSimOfTexture(r1, r2) + calcSimOfSize(r1, r2, imSize) + calcSimOfRect(r1, r2, imSize)); // 相似度包括颜色、纹理、size、rect } void calcColourHist(const cv::Mat &img, std::shared_ptr<Universe> universe, int label, Region ®ion) { std::array<std::vector<unsigned char>, 3> hsv; for (auto &e : hsv) { e.reserve(region.points.size()); } for (cv::Vec2i point : region.points) { for (int channel = 0; channel < 3; channel++) { hsv[channel].push_back(img.at<cv::Vec3b>(point[0], point[1])[channel]); } } int channels[] = {0}; const int bins = 25; int histSize[] = {bins}; float range[] = {0, 256}; const float *ranges[] = {range}; for (int channel = 0; channel < 3; channel++) { cv::Mat hist; cv::Mat input(hsv[channel]); cv::calcHist(&input, 1, channels, cv::Mat(), hist, 1, histSize, ranges, true, false); cv::normalize(hist, hist, 1.0, 0.0, cv::NORM_L1); std::vector<float> histogram; hist.copyTo(histogram); if (region.colourHist.empty()) { region.colourHist = std::move(histogram); } else { std::copy(histogram.begin(), histogram.end(), std::back_inserter(region.colourHist)); } } } cv::Mat calcTextureGradient(const cv::Mat &img) { cv::Mat sobelX, sobelY; cv::Sobel(img, sobelX, CV_32F, 1, 0); cv::Sobel(img, sobelY, CV_32F, 0, 1); cv::Mat magnitude, angle; cv::cartToPolar(sobelX, sobelY, magnitude, angle, true); return angle; } // 统计一阶sobel算子的方向角直方图 void calcTextureHist(const cv::Mat &img, const cv::Mat &gradient, std::shared_ptr<Universe> universe, int label, Region ®ion) { const int orientations = 8; std::array<std::array<std::vector<unsigned char>, orientations>, 3> intensity; for (auto &e : intensity) { for (auto &ee : e) { ee.reserve(region.points.size()); } } for (cv::Vec2i point : region.points) { for (int channel = 0; channel < 3; channel++) { int angle = (int)(gradient.at<cv::Vec3f>(point[0], point[1])[channel] / 22.5) % orientations; intensity[channel][angle].push_back(img.at<cv::Vec3b>(point[0], point[1])[channel]); } } int channels[] = {0}; const int bins = 10; int histSize[] = {bins}; float range[] = {0, 256}; const float *ranges[] = {range}; for (int channel = 0; channel < 3; channel++) { for (int angle = 0; angle < orientations; angle++) { cv::Mat hist; cv::Mat input(intensity[channel][angle]); cv::calcHist(&input, 1, channels, cv::Mat(), hist, 1, histSize, ranges, true, false); cv::normalize(hist, hist, 1.0, 0.0, cv::NORM_L1); std::vector<float> histogram; hist.copyTo(histogram); if (region.textureHist.empty()) { region.textureHist = std::move(histogram); } else { std::copy(histogram.begin(), histogram.end(), std::back_inserter(region.textureHist)); } } } } std::map<int, Region> extractRegions(const cv::Mat &img, std::shared_ptr<Universe> universe) { std::map<int, Region> R; for (int y = 0; y < img.rows; y++) { for (int x = 0; x < img.cols; x++) { int label = universe->findFast(y * img.cols + x); if (R.find(label) == R.end()) { R[label] = Region(cv::Rect(100000, 100000, 0, 0), label); } Region ®ion = R[label]; if (region.rect.x > x) { region.rect.x = x; } if (region.rect.y > y) { region.rect.y = y; } if (region.rect.br().x < x) { region.rect.width = x - region.rect.x + 1; } if (region.rect.br().y < y) { region.rect.height = y - region.rect.y + 1; } region.points.push_back(cv::Vec2i(y, x)); } } cv::Mat gradient = calcTextureGradient(img); cv::Mat hsv; cv::cvtColor(img, hsv, cv::COLOR_BGR2HSV); for (auto &labelRegion : R) { labelRegion.second.size = labelRegion.second.points.size(); calcColourHist(hsv, universe, labelRegion.first, labelRegion.second); calcTextureHist(img, gradient, universe, labelRegion.first, labelRegion.second); } return R; } inline bool isIntersecting(const Region &a, const Region &b) { return ((a.rect & b.rect).area() != 0); } using LabelRegion = std::pair<int, Region>; using Neighbour = std::pair<int, int>; std::vector<Neighbour> extractNeighbours(const std::map<int, Region> &R) { std::vector<Neighbour> neighbours; neighbours.reserve(R.size() * (R.size() - 1) / 2); for (auto a = R.cbegin(); a != R.cend(); a++) { auto tmp = a; tmp++; for (auto b = tmp; b != R.cend(); b++) { if (isIntersecting(a->second, b->second)) { neighbours.push_back(std::make_pair(std::min(a->first, b->first), std::max(a->first, b->first))); } } } return neighbours; } std::vector<float> merge(const std::vector<float> &a, const std::vector<float> &b, int asize, int bsize) { std::vector<float> newVector; newVector.reserve(a.size()); for (auto ai = a.begin(), bi = b.begin(); ai != a.end(); ai++, bi++) { newVector.push_back(((*ai) * asize + (*bi) * bsize) / (asize + bsize)); } return newVector; }; Region mergeRegions(const Region &r1, const Region &r2) { assert(r1.colourHist.size() == r2.colourHist.size()); assert(r1.textureHist.size() == r2.textureHist.size()); int newSize = r1.size + r2.size; std::vector<int> newLabels(r1.labels); std::copy(r2.labels.begin(), r2.labels.end(), std::back_inserter(newLabels)); return Region(r1.rect | r2.rect, newSize, std::move(merge(r1.colourHist, r2.colourHist, r1.size, r2.size)), std::move(merge(r1.textureHist, r2.textureHist, r1.size, r2.size)), std::move(newLabels)); } std::vector<cv::Rect> selectiveSearch(const cv::Mat &img, double scale = 1.0, double sigma = 0.8, int minSize = 50, int smallest = 1000, int largest = 270000, double distorted = 5.0) { assert(img.channels() == 3); auto universe = generateSegments(img, scale, sigma, minSize); int imgSize = img.total(); auto R = extractRegions(img, universe); // 先通过采样获得regions auto neighbours = extractNeighbours(R); // 为region提取neighbours, 保存为vector<pair<int, int>> std::unordered_map<std::pair<int, int>, double> S; for (auto &n : neighbours) { S[n] = calcSimilarity(R[n.first], R[n.second], imgSize); // 为每个pair 计算相似度 } using NeighbourSim = std::pair<std::pair<int, int>, double>; while (!S.empty()) { auto cmp = [](const NeighbourSim &a, const NeighbourSim &b) { return a.second < b.second; }; auto m = std::max_element(S.begin(), S.end(), cmp); //找到最大相似度的 //if(m->second<10) break; // 这里为啥不把相似度太低的直接排除 int i = m->first.first; int j = m->first.second; auto ij = std::make_pair(i, j); int t = R.rbegin()->first + 1;// 最大序号加1 R[t] = mergeRegions(R[i], R[j]);// 保存region i + region j std::vector<std::pair<int, int>> keyToDelete; for (auto &s : S) { auto key = s.first; if ((i == key.first) || (i == key.second) || (j == key.first) || (j == key.second)) { keyToDelete.push_back(key); // 所有i,j相关的区域都加进去 } } for (auto &key : keyToDelete) { S.erase(key); if (key == ij) { continue; } int n = (key.first == i || key.first == j) ? key.second : key.first;// n 除i,j外的其他 S[std::make_pair(n, t)] = calcSimilarity(R[n], R[t], imgSize); // 重新计算相似度, S的大小每次减1,知道为0? } } std::vector<cv::Rect> proposals; proposals.reserve(R.size()); for (auto &r : R) { // exclude same rectangle (with different segments) if (std::find(proposals.begin(), proposals.end(), r.second.rect) != proposals.end()) { continue; } // exclude regions that is smaller/larger than assigned size if (r.second.size < smallest || r.second.size > largest) // { continue; } double w = r.second.rect.width; double h = r.second.rect.height; // exclude distorted rects if ((w / h > distorted) || (h / w > distorted)) { continue; } proposals.push_back(r.second.rect); } return proposals; } } // namespace ss
test:
#include "selective_search.hpp" #include <opencv2/opencv.hpp> int main(int argc, char** argv) { std::string fileName = "./deer.jpg"; cv::Mat img = cv::imread(fileName, cv::IMREAD_COLOR); // selective search auto proposals = ss::selectiveSearch(img, 500, 0.8, 50, 20000, 100000, 2.5); // do something... for (auto&& rect : proposals) { cv::rectangle(img, rect, cv::Scalar(0, 255, 0), 3, 8); } cv::imwrite("./result.jpg", img); cv::imshow("result", img); cv::waitKey(0); return 0; }
makefile:
CC = g++ CFLAGS = -g -Wall SRCS = test.cpp PROG = test all: $(PROG) clean: $(RM) $(PROG) OPENCV = `pkg-config opencv --cflags --libs` LIBS = $(OPENCV) $(PROG):$(SRCS) $(CC) $(CFLAGS) -o $(PROG) $(SRCS) $(LIBS)
图分割结果:
最后产生的包围框:
图分割和其他部分的耗时对比:
1158397: 815254