其它优秀的字符串搜索代码:点击
使用说明:
一般都是四个参数,
参数1: 你要搜索的缓冲区
参数2: 参数1缓冲区的大小
参数3: 要搜索的字符串
参数4: 参数3的缓冲大小
代码实现
search.h
#pragma once /* function: Boyer-Moore字符匹配算法 Param: @text 要搜索的缓冲区开始 @n 要搜索的缓冲区大小 @pattern 需要匹配的字符串 @m 需要匹配的字符串长度 */ int BinarySearch(unsigned char *text, int n, unsigned char *pattern, int m);
.cpp实现
使用BinarySearch即可.
#pragma once #include "search.h" #define maxNum 256 #ifndef MIN # define MIN(A,B) ((A)<(B)?(A):(B)) #endif #ifndef MAX # define MAX(A,B) ((A)>(B)?(A):(B)) #endif void PreBmBc(unsigned char *pattern, int m, int bmBc[]) { int i; for (i = 0; i < 256; i++) {//一个字符占八位,共256个字符,把所有字符都覆盖到,这里的初始化是将所有字符失配时的移动距离都赋值为m bmBc[i] = m; } for (i = 0; i < m - 1; i++) {//针对模式串pattern中存在的每一个字符,计算出它们最靠右的(非最后一个字符)地方距离串末尾的距离,即它们失配时该移动的距离,这一操作更新了初始化中一些字符的移动距离 bmBc[pattern[i]] = m - 1 - i; } } /* function: 旧版的好后缀辅助数组(好后缀长度)求解方法 Param: @pattern 需要匹配的字符串 @suff 好后缀辅助数组 @m 需要匹配的字符串长度 */ void suffix_old(char *pattern, int m, int suff[]) { int i, j; suff[m - 1] = m; for (i = m - 2; i >= 0; i--) { j = i; while (j >= 0 && pattern[j] == pattern[m - 1 - i + j]) j--; suff[i] = i - j; } } /* function: 新版的好后缀辅助数组(好后缀长度)求解方法 Param: @pattern 需要匹配的字符串 @suff 好后缀辅助数组 @m 需要匹配的字符串长度 */ void suffix(unsigned char *pattern, int m, int suff[]) { int f, g, i; suff[m - 1] = m; g = m - 1; for (i = m - 2; i >= 0; --i) { if (i > g&&suff[i + m - 1 - f] < i - g) suff[i] = suff[i + m - 1 - f]; else { if (i < g) g = i; f = i; while (g >= 0 && pattern[g] == pattern[g + m - 1 - f]) --g; suff[i] = f - g; } } } /* function: 好后缀数组求解方法 Param: @pattern 需要匹配的字符串 @bmGs 好后缀数组 @m 需要匹配的字符串长度 */ void PreBmGs(unsigned char *pattern, int m, int bmGs[]) { int i, j; int suff[maxNum]; // 计算后缀数组 suffix(pattern, m, suff); // 先全部赋值为m,包含Case3 for (i = 0; i < m; i++) { bmGs[i] = m; } // Case2 j = 0; for (i = m - 1; i >= 0; i--) { if (suff[i] == i + 1) { for (; j < m - 1 - i; j++) { if (bmGs[j] == m) bmGs[j] = m - 1 - i; } } } // Case1 for (i = 0; i <= m - 2; i++) { bmGs[m - 1 - suff[i]] = m - 1 - i; } } /* function: Boyer-Moore字符匹配算法 Param: @text 文本内容 @n 文本内容长度 @pattern 需要匹配的字符串 @m 需要匹配的字符串长度 */ int BinarySearch(unsigned char *text, int n, unsigned char *pattern, int m) { int * bmBc = new int[maxNum]; int * bmGs = new int[m]; PreBmBc(pattern, m, bmBc); PreBmGs(pattern, m, bmGs); int i, pos; pos = 0; while (pos <= n - m) { for (i = m - 1; i >= 0 && pattern[i] == text[i + pos]; i--); if (i < 0) { delete bmGs; delete bmBc; return &text[pos] - text; } else { pos += MAX(bmBc[text[i + pos]] - m + 1 + i, bmGs[i]); } } return -1; }
上面的代码是有注释,也是这个相同实现
void preBmBc(char *x, int m, int bmBc[]) { int i; for (i = 0; i < ASIZE; ++i) bmBc[i] = m; for (i = 0; i < m - 1; ++i) bmBc[x[i]] = m - i - 1; } void suffixes(char *x, int m, int *suff) { int f, g, i; suff[m - 1] = m; g = m - 1; for (i = m - 2; i >= 0; --i) { if (i > g && suff[i + m - 1 - f] < i - g) suff[i] = suff[i + m - 1 - f]; else { if (i < g) g = i; f = i; while (g >= 0 && x[g] == x[g + m - 1 - f]) --g; suff[i] = f - g; } } } void preBmGs(char *x, int m, int bmGs[]) { int i, j, suff[XSIZE]; suffixes(x, m, suff); for (i = 0; i < m; ++i) bmGs[i] = m; j = 0; for (i = m - 1; i >= 0; --i) if (suff[i] == i + 1) for (; j < m - 1 - i; ++j) if (bmGs[j] == m) bmGs[j] = m - 1 - i; for (i = 0; i <= m - 2; ++i) bmGs[m - 1 - suff[i]] = m - 1 - i; } void BM(char *x, int m, char *y, int n) { int i, j, bmGs[XSIZE], bmBc[ASIZE]; /* Preprocessing */ preBmGs(x, m, bmGs); preBmBc(x, m, bmBc); /* Searching */ j = 0; while (j <= n - m) { for (i = m - 1; i >= 0 && x[i] == y[i + j]; --i); if (i < 0) { OUTPUT(j); j += bmGs[0]; } else j += MAX(bmGs[i], bmBc[y[i + j]] - m + 1 + i); } }
void TUNEDBM(char *x, int m, char *y, int n) { int j, k, shift, bmBc[ASIZE]; /* Preprocessing */ preBmBc(x, m, bmBc); shift = bmBc[x[m - 1]]; bmBc[x[m - 1]] = 0; memset(y + n, x[m - 1], m); /* Searching */ j = 0; while (j < n) { k = bmBc[y[j + m -1]]; while (k != 0) { j += k; k = bmBc[y[j + m -1]]; j += k; k = bmBc[y[j + m -1]]; j += k; k = bmBc[y[j + m -1]]; } if (memcmp(x, y + j, m - 1) == 0 && j < n) OUTPUT(j); j += shift; /* shift */ } }
int attempt(char *y, char *x, int m, int start, int wall) { int k; k = wall - start; while (k < m && x[k] == y[k + start]) ++k; return(k); } void KMPSKIP(char *x, int m, char *y, int n) { int i, j, k, kmpStart, per, start, wall; int kmpNext[XSIZE], list[XSIZE], mpNext[XSIZE], z[ASIZE]; /* Preprocessing */ preMp(x, m, mpNext); preKmp(x, m, kmpNext); memset(z, -1, ASIZE*sizeof(int)); memset(list, -1, m*sizeof(int)); z[x[0]] = 0; for (i = 1; i < m; ++i) { list[i] = z[x[i]]; z[x[i]] = i; } /* Searching */ wall = 0; per = m - kmpNext[m]; i = j = -1; do { j += m; } while (j < n && z[y[j]] < 0); if (j >= n) return; i = z[y[j]]; start = j - i; while (start <= n - m) { if (start > wall) wall = start; k = attempt(y, x, m, start, wall); wall = start + k; if (k == m) { OUTPUT(start); i -= per; } else i = list[i]; if (i < 0) { do { j += m; } while (j < n && z[y[j]] < 0); if (j >= n) return; i = z[y[j]]; } kmpStart = start + k - kmpNext[k]; k = kmpNext[k]; start = j - i; while (start < kmpStart || (kmpStart < start && start < wall)) { if (start < kmpStart) { i = list[i]; if (i < 0) { do { j += m; } while (j < n && z[y[j]] < 0); if (j >= n) return; i = z[y[j]]; } start = j - i; } else { kmpStart += (k - mpNext[k]); k = mpNext[k]; } } } }