23
Aug
0

[C] 4 Exact String Matching Algorithms (in c)

I read (and wrote) 1 month ago some algorithms about pattern matching in text.

You can find plenty of this on the web anyway here is my code.

The code

/** Function to find a token in a string/text
  * Rabin Karp - use a hash function
  * Morris Pratt - Ameliored Brute Force
  * Knuth Morris Pratt (KMP) - Ameliored Morris Pratt
  * Boyer-Moore-Horspool
  *
  * find a token x of size m into a text y of size n with alphabet sig
  */

#include <cstring>
#include <cstdio>

///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
/*
Karp Rabin:
    uses hasing function
    preprocessing phase in O(m) time complexity and constant space
    searching phase in O(mxn) time complexity
    O(m+n) expected running time

Hash function has to be:
    Easily computable
    Higly disciminating
    hash(y[j+1 to j+m]) must be easily computable from hash(y[j to j+m-1]) and y[j+m]
*/
void Rabin_Karp(const char x[], const int m, const char y[], const int n){
    // compute first hash keys
    int hashX = 0;
    int hashY = 0;
    for(int idxHash = 0 ; idxHash < m ; ++idxHash){
        hashX = ((hashX<<1) + x[idxHash]);
        hashY = ((hashY<<1) + y[idxHash]);
    }

    int iter = 0;
    while( iter <= n-m ){
        if(hashX == hashY && memcmp(x, &y[iter] , m) == 0){
            printf("Found at %d\n", iter);
        }
        hashY = ((hashY - (y[iter] << (m-1)) ) << 1 ) + y[iter + m];
        ++iter;
    }
}

///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
/*
Morris-Pratt
  left to right
  preprocessing in O(m) space and time complexity
  searching in O(m+n)
  at most 2n - 1 comparisons

The function PrepareNext is creating an array next[m+1]
which count the prefixe redundonce
*/

void MP_PrepareNext(const char x[], const int m, int next[]){
    int idxLocal = -1;
    next[0] = -1;
    for(int idx = 0 ; idx < m ; ++idx){
        while(-1 < idxLocal && x[idx] != x[idxLocal]){
            idxLocal = next[idxLocal];
        }
        next[idx + 1] = (++idxLocal);
    }
}

void Morris_Pratt(const char x[], const int m, const char y[], const int n){
    int next[m+1];
    MP_PrepareNext(x, m, next);

    int idxLocal = 0;
    for(int idx = 0 ; idx < n ; ++idx){
        // enter in the loop only if char are !=
        while( -1 < idxLocal && x[idxLocal] != y[idx]){
            idxLocal = next[idxLocal];
        }
        ++idxLocal;
        if(m <= idxLocal){
            printf("Found at %d\n", idx - idxLocal + 1);
            idxLocal = next[idxLocal];
        }
    }
}

///////////////////////////////////////////////////////
///////////////////////////////////////////////////////
/*
Knuth-Morris-Pratt
  left to right
  preprocessing in O(m) space and time complexity
  searching in O(m+n)
  at most 2n - 1 comparisons
  bounded by log<phi>(m)

*/

void KMP_PrepareNext(const char x[], const int m, int next[]){
    int idxLocal = -1;
    next[0] = -1;
    for(int idx = 0 ; idx < m ; ++idx){
        while(-1 < idxLocal && x[idx] != x[idxLocal]){
            idxLocal = next[idxLocal];
        }
        ++idxLocal;
        if(x[idx + 1] == x[idxLocal]){
            next[idx + 1] = next[idxLocal];
        }
        else{
            next[idx + 1] = idxLocal;
        }
    }
}

void Knuth_Morris_Pratt(const char x[], const int m, const char y[], const int n){
    int next[m+1];
    KMP_PrepareNext(x, m, next);

    int idxLocal = 0;
    for(int idx = 0 ; idx < n ; ++idx){
        // enter in the loop only if char are !=
        while( -1 < idxLocal && x[idxLocal] != y[idx]){
            idxLocal = next[idxLocal];
        }
        ++idxLocal;
        if(m <= idxLocal){
            printf("Found at %d\n", idx - idxLocal + 1);
            idxLocal = next[idxLocal];
        }
    }
}

///////////////////////////////////////////////////////
///////////////////////////////////////////////////////

/*
Boyer-Moore-Horspool  :
    simplified Boyer Moore
    use only bad char shift table
    easy to implement
    preprocessing phase in O(m+sig) time and O(sig) space complexity;
    searching phase in O(mxn) time complexity;
*/

// prepare bad char shifting array
// badCharShift['a'] => m if 'a' does not appear in x
// else badCharShift['a'] => position of last 'a' from right
void BM_BadChar(const char x[],const int m, int badCharShift[]) {
   for (int idx = 0; idx < 256; ++idx){
      badCharShift[idx] = m;
   }

   for (int idx = 0; idx < m - 1; ++idx){
      badCharShift[x[idx]] = m - idx - 1;
   }
}

void Boyer_Moore_Horspool(const char x[],const int m,const char y[],const int n) {
   int badCharShift[256];
   BM_BadChar(x, m, badCharShift);

   int idx = 0;
   while (idx <= n - m) {
      const int lastChar = y[idx + m - 1];
      // if last char equal, then compare all string
      if( x[m - 1] == lastChar && memcmp(x, &y[idx], m-1) == 0){
          printf("Found at %d\n", idx);
      }
      idx += badCharShift[lastChar];
   }
}

///////////////////////////////////////////////////////
///////////////////////////////////////////////////////

int main(){
    const char* const y = "HEREzerb hHEREjbr HEazejrhHEREb azerhjb aaabhzb erHERE";
    const int n = strlen(y);
    const char* const x = "HERE";
    const int m = strlen(x);

    printf("Try to find: %s\ninto: %s\n", x, y);

    printf("Rabin_Karp:\n");
    Rabin_Karp(x,m,y,n);

    printf("Morris_Pratt:\n");
    Morris_Pratt(x,m,y,n);

    printf("Knuth_Morris_Pratt:\n");
    Knuth_Morris_Pratt(x,m,y,n);

    printf("Boyer_Moore_Horspool:\n");
    Boyer_Moore_Horspool(x,m,y,n);

    return 0;
}

Output

Try to find: HERE
into: HEREzerb hHEREjbr HEazejrhHEREb azerhjb aaabhzb erHERE
Rabin_Karp:
Found at 0
Found at 10
Found at 26
Found at 50
Morris_Pratt:
Found at 0
Found at 10
Found at 26
Found at 50
Knuth_Morris_Pratt:
Found at 0
Found at 10
Found at 26
Found at 50
Boyer_Moore_Horspool:
Found at 0
Found at 10
Found at 26
Found at 50

Reference

Enjoyed reading this post?
Subscribe to the RSS feed and have all new posts delivered straight to you.

Comments are closed.

Celadon theme by the Themes Boutique