c语言

c语言字符串匹配算法怎么写

小亿
98
2024-01-03 10:09:41
栏目: 编程语言

C语言字符串匹配算法有很多种,下面介绍几种常用的算法实现。

  1. Brute-Force算法(朴素算法)
int strStr(char* haystack, char* needle) {
    int i, j;
    int len1 = strlen(haystack);
    int len2 = strlen(needle);
    
    for (i = 0; i <= len1 - len2; i++) {
        for (j = 0; j < len2; j++) {
            if (haystack[i + j] != needle[j]) {
                break;
            }
        }
        if (j == len2) {
            return i;
        }
    }
    
    return -1;
}
  1. KMP算法(Knuth-Morris-Pratt算法)
void getNext(char* needle, int* next) {
    int i, j;
    int len = strlen(needle);
    
    next[0] = -1;
    i = 0;
    j = -1;
    while (i < len) {
        if (j == -1 || needle[i] == needle[j]) {
            i++;
            j++;
            next[i] = j;
        } else {
            j = next[j];
        }
    }
}

int strStr(char* haystack, char* needle) {
    int i, j;
    int len1 = strlen(haystack);
    int len2 = strlen(needle);
    int* next = (int*)malloc(sizeof(int) * len2);
    
    getNext(needle, next);
    
    i = 0;
    j = 0;
    while (i < len1 && j < len2) {
        if (j == -1 || haystack[i] == needle[j]) {
            i++;
            j++;
        } else {
            j = next[j];
        }
    }
    
    free(next);
    if (j == len2) {
        return i - j;
    } else {
        return -1;
    }
}
  1. Boyer-Moore算法
#define MAX_CHAR 256

int max(int a, int b) {
    return a > b ? a : b;
}

void badCharHeuristic(char* needle, int* badChar) {
    int i;
    int len = strlen(needle);
    for (i = 0; i < MAX_CHAR; i++) {
        badChar[i] = -1;
    }
    for (i = 0; i < len; i++) {
        badChar[(int)needle[i]] = i;
    }
}

void goodSuffixHeuristic(char* needle, int* goodSuffix) {
    int i, j;
    int len = strlen(needle);
    int* suffix = (int*)malloc(sizeof(int) * (len + 1));
    
    for (i = 0; i <= len; i++) {
        suffix[i] = -1;
    }
    
    for (i = len - 1; i >= 0; i--) {
        if (suffix[i] == -1) {
            suffix[i] = len - i - 1;
            for (j = 0; j < i; j++) {
                if (needle[j] == needle[len - i + j]) {
                    suffix[j] = len - i + j;
                }
            }
        }
    }
    
    for (i = 0; i < len; i++) {
        goodSuffix[i] = len - suffix[i + 1];
    }
    for (i = 0; i <= len; i++) {
        goodSuffix[len] = len - suffix[i + 1];
    }
    
    free(suffix);
}

int strStr(char* haystack, char* needle) {
    int i = 0, j = 0;
    int len1 = strlen(haystack);
    int len2 = strlen(needle);
    int badChar[MAX_CHAR];
    int* goodSuffix = (int*)malloc(sizeof(int) * (len2 + 1));
    
    badCharHeuristic(needle, badChar);
    goodSuffixHeuristic(needle, goodSuffix);
    
    while (i <= len1 - len2) {
        j = len2 - 1;
        while (j >= 0 && needle[j] == haystack[i + j]) {
            j--;
        }
        if (j < 0) {
            free(goodSuffix);
            return i;
        } else {
            i += max(goodSuffix[j], j - badChar[(int)haystack[i + j]]);
        }
    }
    
    free(goodSuffix);
    return -1;
}

以上是几种常用的字符串匹配算法的C语言

0
看了该问题的人还看了