您好,登录后才能下订单哦!
密码登录
登录注册
点击 登录注册 即表示同意《亿速云用户服务条款》
AC自动机(Aho-Corasick算法)是一种用于在文本中查找多个模式字符串的高效算法。它基于Trie树(字典树)和有限状态自动机实现。以下是一个简单的C语言实现:
#include<stdio.h>
#include <stdlib.h>
#include<string.h>
#define ALPHABET_SIZE 256
typedef struct TrieNode {
struct TrieNode *children[ALPHABET_SIZE];
int isEndOfPattern;
int patternIndex;
struct TrieNode *fail;
} TrieNode;
TrieNode *createTrieNode() {
TrieNode *node = (TrieNode *)malloc(sizeof(TrieNode));
node->isEndOfPattern = 0;
node->patternIndex = -1;
node->fail = NULL;
for (int i = 0; i < ALPHABET_SIZE; i++) {
node->children[i] = NULL;
}
return node;
}
void insertPattern(TrieNode *root, const char *pattern, int patternIndex) {
TrieNode *node = root;
for (int i = 0; pattern[i] != '\0'; i++) {
if (node->children[(int)pattern[i]] == NULL) {
node->children[(int)pattern[i]] = createTrieNode();
}
node = node->children[(int)pattern[i]];
}
node->isEndOfPattern = 1;
node->patternIndex = patternIndex;
}
void buildFailureLinks(TrieNode *root) {
TrieNode *queue[1000];
int front = 0, rear = 0;
for (int i = 0; i < ALPHABET_SIZE; i++) {
if (root->children[i]) {
root->children[i]->fail = root;
queue[rear++] = root->children[i];
}
}
while (front< rear) {
TrieNode *currentNode = queue[front++];
for (int i = 0; i < ALPHABET_SIZE; i++) {
if (currentNode->children[i]) {
TrieNode *childNode = currentNode->children[i];
queue[rear++] = childNode;
TrieNode *failNode = currentNode->fail;
while (failNode && !failNode->children[i]) {
failNode = failNode->fail;
}
if (!failNode) {
childNode->fail = root;
} else {
childNode->fail = failNode->children[i];
if (childNode->fail->isEndOfPattern) {
childNode->isEndOfPattern = 1;
childNode->patternIndex = childNode->fail->patternIndex;
}
}
}
}
}
}
void searchPatterns(const char *text, TrieNode *root) {
TrieNode *node = root;
for (int i = 0; text[i] != '\0'; i++) {
while (node && !node->children[(int)text[i]]) {
node = node->fail;
}
node = node->children[(int)text[i]];
if (!node) {
node = root;
}
TrieNode *temp = node;
while (temp && temp->isEndOfPattern) {
printf("Pattern %d found at index %d\n", temp->patternIndex, i - strlen(patterns[temp->patternIndex]) + 1);
temp = temp->fail;
}
}
}
int main() {
const char *patterns[] = {"he", "she", "his", "hers"};
int numPatterns = sizeof(patterns) / sizeof(patterns[0]);
TrieNode *root = createTrieNode();
for (int i = 0; i < numPatterns; i++) {
insertPattern(root, patterns[i], i);
}
buildFailureLinks(root);
const char *text = "ushers";
searchPatterns(text, root);
return 0;
}
这个程序首先创建一个Trie树,然后将要搜索的模式字符串插入到Trie树中。接下来,它构建失败链接,这是AC自动机的关键部分。最后,它在给定的文本中搜索模式字符串。当找到匹配时,它会输出模式字符串的索引和在文本中的位置。
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。