23
Aug
0
[C] 4 Exact String Matching Algorithms (in c)
I read (and wrote) 1 month ago some algorithms about pattern matching in text.
You can find plenty of this on the web anyway here is my code.
The code
/** Function to find a token in a string/text
* Rabin Karp - use a hash function
* Morris Pratt - Ameliored Brute Force
* Knuth Morris Pratt (KMP) - Ameliored Morris Pratt
* Boyer-Moore-Horspool
*
* find a token x of size m into a text y of size n with alphabet sig
*/
#include <cstring>
#include <cstdio>
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
/*
Karp Rabin:
uses hasing function
preprocessing phase in O(m) time complexity and constant space
searching phase in O(mxn) time complexity
O(m+n) expected running time
Hash function has to be:
Easily computable
Higly disciminating
hash(y[j+1 to j+m]) must be easily computable from hash(y[j to j+m-1]) and y[j+m]
*/
void Rabin_Karp(const char x[], const int m, const char y[], const int n){
// compute first hash keys
int hashX = 0;
int hashY = 0;
for(int idxHash = 0 ; idxHash < m ; ++idxHash){
hashX = ((hashX<<1) + x[idxHash]);
hashY = ((hashY<<1) + y[idxHash]);
}
int iter = 0;
while( iter <= n-m ){
if(hashX == hashY && memcmp(x, &y[iter] , m) == 0){
printf("Found at %d\n", iter);
}
hashY = ((hashY - (y[iter] << (m-1)) ) << 1 ) + y[iter + m];
++iter;
}
}
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
/*
Morris-Pratt
left to right
preprocessing in O(m) space and time complexity
searching in O(m+n)
at most 2n - 1 comparisons
The function PrepareNext is creating an array next[m+1]
which count the prefixe redundonce
*/
void MP_PrepareNext(const char x[], const int m, int next[]){
int idxLocal = -1;
next[0] = -1;
for(int idx = 0 ; idx < m ; ++idx){
while(-1 < idxLocal && x[idx] != x[idxLocal]){
idxLocal = next[idxLocal];
}
next[idx + 1] = (++idxLocal);
}
}
void Morris_Pratt(const char x[], const int m, const char y[], const int n){
int next[m+1];
MP_PrepareNext(x, m, next);
int idxLocal = 0;
for(int idx = 0 ; idx < n ; ++idx){
// enter in the loop only if char are !=
while( -1 < idxLocal && x[idxLocal] != y[idx]){
idxLocal = next[idxLocal];
}
++idxLocal;
if(m <= idxLocal){
printf("Found at %d\n", idx - idxLocal + 1);
idxLocal = next[idxLocal];
}
}
}
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
/*
Knuth-Morris-Pratt
left to right
preprocessing in O(m) space and time complexity
searching in O(m+n)
at most 2n - 1 comparisons
bounded by log<phi>(m)
*/
void KMP_PrepareNext(const char x[], const int m, int next[]){
int idxLocal = -1;
next[0] = -1;
for(int idx = 0 ; idx < m ; ++idx){
while(-1 < idxLocal && x[idx] != x[idxLocal]){
idxLocal = next[idxLocal];
}
++idxLocal;
if(x[idx + 1] == x[idxLocal]){
next[idx + 1] = next[idxLocal];
}
else{
next[idx + 1] = idxLocal;
}
}
}
void Knuth_Morris_Pratt(const char x[], const int m, const char y[], const int n){
int next[m+1];
KMP_PrepareNext(x, m, next);
int idxLocal = 0;
for(int idx = 0 ; idx < n ; ++idx){
// enter in the loop only if char are !=
while( -1 < idxLocal && x[idxLocal] != y[idx]){
idxLocal = next[idxLocal];
}
++idxLocal;
if(m <= idxLocal){
printf("Found at %d\n", idx - idxLocal + 1);
idxLocal = next[idxLocal];
}
}
}
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
/*
Boyer-Moore-Horspool :
simplified Boyer Moore
use only bad char shift table
easy to implement
preprocessing phase in O(m+sig) time and O(sig) space complexity;
searching phase in O(mxn) time complexity;
*/
// prepare bad char shifting array
// badCharShift['a'] => m if 'a' does not appear in x
// else badCharShift['a'] => position of last 'a' from right
void BM_BadChar(const char x[],const int m, int badCharShift[]) {
for (int idx = 0; idx < 256; ++idx){
badCharShift[idx] = m;
}
for (int idx = 0; idx < m - 1; ++idx){
badCharShift[x[idx]] = m - idx - 1;
}
}
void Boyer_Moore_Horspool(const char x[],const int m,const char y[],const int n) {
int badCharShift[256];
BM_BadChar(x, m, badCharShift);
int idx = 0;
while (idx <= n - m) {
const int lastChar = y[idx + m - 1];
// if last char equal, then compare all string
if( x[m - 1] == lastChar && memcmp(x, &y[idx], m-1) == 0){
printf("Found at %d\n", idx);
}
idx += badCharShift[lastChar];
}
}
///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
int main(){
const char* const y = "HEREzerb hHEREjbr HEazejrhHEREb azerhjb aaabhzb erHERE";
const int n = strlen(y);
const char* const x = "HERE";
const int m = strlen(x);
printf("Try to find: %s\ninto: %s\n", x, y);
printf("Rabin_Karp:\n");
Rabin_Karp(x,m,y,n);
printf("Morris_Pratt:\n");
Morris_Pratt(x,m,y,n);
printf("Knuth_Morris_Pratt:\n");
Knuth_Morris_Pratt(x,m,y,n);
printf("Boyer_Moore_Horspool:\n");
Boyer_Moore_Horspool(x,m,y,n);
return 0;
}
Output
Try to find: HERE into: HEREzerb hHEREjbr HEazejrhHEREb azerhjb aaabhzb erHERE Rabin_Karp: Found at 0 Found at 10 Found at 26 Found at 50 Morris_Pratt: Found at 0 Found at 10 Found at 26 Found at 50 Knuth_Morris_Pratt: Found at 0 Found at 10 Found at 26 Found at 50 Boyer_Moore_Horspool: Found at 0 Found at 10 Found at 26 Found at 50
Reference
Enjoyed reading this post?
Subscribe to the RSS feed and have all new posts delivered straight to you.
Subscribe to the RSS feed and have all new posts delivered straight to you.
