C++字符串查找优化策略

发布时间：2024-10-09 15:15:29 作者：小樊
来源：亿速云阅读：142

在C++中，字符串查找是一个常见的操作。为了提高查找效率，可以采用一些优化策略。以下是一些建议：

使用std::string::find函数：C++标准库提供了std::string::find函数，用于查找子字符串在原字符串中的位置。这个函数已经经过了优化，可以满足大部分情况下的字符串查找需求。

#include <iostream>
#include <string>

int main() {
    std::string str = "Hello, world!";
    std::string sub = "world";

    size_t pos = str.find(sub);
    if (pos != std::string::npos) {
        std::cout << "Substring found at position: " << pos << std::endl;
    } else {
        std::cout << "Substring not found" << std::endl;
    }

    return 0;
}

KMP算法：KMP（Knuth-Morris-Pratt）算法是一种高效的字符串查找算法，它可以在O(n+m)的时间复杂度内完成查找，其中n是原字符串的长度，m是子字符串的长度。KMP算法的核心在于预处理子字符串，构建一个部分匹配表（Partial Match Table），用于在查找过程中跳过不必要的比较。

#include <iostream>
#include <vector>
#include <string>

std::vector<int> build_partial_match_table(const std::string& sub) {
    int table_size = sub.length();
    std::vector<int> table(table_size, 0);
    int j = 0;

    for (int i = 1; i < table_size; ++i) {
        while (j > 0 && sub[i] != sub[j]) {
            j = table[j - 1];
        }
        if (sub[i] == sub[j]) {
            ++j;
        }
        table[i] = j;
    }

    return table;
}

size_t kmp_search(const std::string& str, const std::string& sub) {
    std::vector<int> table = build_partial_match_table(sub);
    int n = str.length();
    int m = sub.length();
    int j = 0;

    for (int i = 0; i < n; ++i) {
        while (j > 0 && str[i] != sub[j]) {
            j = table[j - 1];
        }
        if (str[i] == sub[j]) {
            ++j;
        }
        if (j == m) {
            return i - m + 1;
        }
    }

    return std::string::npos;
}

int main() {
    std::string str = "Hello, world!";
    std::string sub = "world";

    size_t pos = kmp_search(str, sub);
    if (pos != std::string::npos) {
        std::cout << "Substring found at position: " << pos << std::endl;
    } else {
        std::cout << "Substring not found" << std::endl;
    }

    return 0;
}

Boyer-Moore算法：Boyer-Moore算法是一种高效的字符串查找算法，它可以在O(n+m)的时间复杂度内完成查找，其中n是原字符串的长度，m是子字符串的长度。Boyer-Moore算法的核心在于预处理子字符串，构建一个坏字符表（Bad Character Table）和一个好后缀表（Good Suffix Table），用于在查找过程中跳过不必要的比较。

#include <iostream>
#include <vector>
#include <string>

std::vector<int> build_bad_character_table(const std::string& sub) {
    int table_size = 256;
    std::vector<int> table(table_size, sub.length());

    for (int i = 0; i < sub.length() - 1; ++i) {
        table[sub[i]] = i + 1;
    }

    return table;
}

std::vector<int> build_good_suffix_table(const std::string& str, const std::string& sub) {
    int str_length = str.length();
    int sub_length = sub.length();
    std::vector<int> table(sub_length, 0);
    int last_occurrence = sub_length;

    for (int i = str_length - 1; i >= 0; --i) {
        if (str[i] == sub[sub_length - 1]) {
            table[sub_length - 1] = i + 1;
            last_occurrence = sub_length;
        } else {
            table[sub[i]] = last_occurrence - 1;
        }
    }

    for (int i = sub_length - 2; i >= 0; --i) {
        table[sub[i]] = std::max(table[sub[i]], table[sub[i + 1]]);
    }

    return table;
}

size_t boyer_moore_search(const std::string& str, const std::string& sub) {
    std::vector<int> bad_character_table = build_bad_character_table(sub);
    std::vector<int> good_suffix_table = build_good_suffix_table(str, sub);
    int n = str.length();
    int m = sub.length();
    int i = 0;

    while (i <= n - m) {
        int j = m - 1;

        while (j >= 0 && str[i + j] == sub[j]) {
            --j;
        }

        if (j < 0) {
            return i;
        } else {
            i += std::max(bad_character_table[str[i + j]], good_suffix_table[j]);
        }
    }

    return std::string::npos;
}

int main() {
    std::string str = "Hello, world!";
    std::string sub = "world";

    size_t pos = boyer_moore_search(str, sub);
    if (pos != std::string::npos) {
        std::cout << "Substring found at position: " << pos << std::endl;
    } else {
        std::cout << "Substring not found" << std::endl;
    }

    return 0;
}

根据具体需求和场景，可以选择合适的字符串查找算法进行优化。

C++字符串查找优化策略

相关阅读