较简单的正则匹配引擎实现

前端之家收集整理的这篇文章主要介绍了较简单的正则匹配引擎实现前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。


这是较为简单的正则表达式引擎
目前只支持. + | 三种特殊字符
发现bug 请联系
已有封装好的函数,请直接使用
未来将逐渐更新

例子如图
// regulatr.cpp : 定义控制台应用程序的入口点。
//

/*______________________________________________________
regular expression engine
written by 唐hz
2015.8.10
_______________________________________________________*/
#include "stdafx.h"
#include<iostream>
#include<vector>
using namespace std;
struct State;
#define error -1//此处不确定
#define normal 1
#define finish 16
#define mul_way 2
#define start 8
#define any 32
#define error_back 64
#define self_loop1 4
struct Linker{
State *from;
State *to;
char request;
};
struct LinkerList{
Linker l;
struct LinkerList *next;
};
struct State{
int status;
int position;
LinkerList * in;
LinkerList * out;
};
struct Position{
State* p;
int i;
};

State g_State[100];
int g_next[100];
int g_start,g_finish,g_maxi=-1;
int g_can_not_any=-1;

void next(char *p){
int n=strlen(p);
int i=1,j=0;
g_next[j]=-1;
while(i<n){
if(p[i]==p[j])
g_next[i++]=j++;
else {
if(j!=0)
j=g_next[j-1]+1;
else{
j=0;
g_next[i++]=-1;
}
}

}
}

void addlinker(LinkerList **a,char b,State *c,State *d){
if(*a==NULL){
(*a)=new LinkerList;
(*a)->l.request=b;
(*a)->l.from=c;
(*a)->l.to=d;
(*a)->next=NULL;
return ;
}
LinkerList * temp=*a;
while(temp->next!=NULL){
temp=temp->next;
}
temp->next=new LinkerList;
temp=temp->next;
temp->l.request=b;
temp->l.from=c;
temp->l.to=d;
temp->next=NULL;
return ;
}
void addlinker2(LinkerList **a,State *d){
if(*a==NULL){
(*a)=new LinkerList;
(*a)->l.request=b;
(*a)->l.from=c;
(*a)->l.to=d;
(*a)->next=NULL;
return ;
}
LinkerList * temp=*a,*temp1;
while(temp->next&&temp->next->next!=NULL){
temp=temp->next;
}
temp1=temp->next;
temp->next=new LinkerList;
temp=temp->next;
temp->l.request=b;
temp->l.from=c;
temp->l.to=d;
temp->next=temp1;
return ;
}
void addlinker1(LinkerList **a,State *d){
if(*a==NULL){
(*a)=new LinkerList;
(*a)->l.request=b;
(*a)->l.from=c;
(*a)->l.to=d;
(*a)->next=NULL;
return ;
}
LinkerList * temp;
temp=new LinkerList;
temp->l.request=b;
temp->l.from=c;
temp->l.to=d;
temp->next=*a;
*a=temp;
return ;
}
void connect(State &a,State &b,char r,int x=0){
if(x==0){
addlinker(&(a.out),r,&a,&b);
addlinker(&(b.in),&b);}
else if(x==1){
addlinker1(&(a.out),&b);
addlinker1(&(b.in),&b);
}
else if(x==2){
addlinker2(&(a.out),&b);
addlinker2(&(b.in),&b);
}
}

void compile(char *p,char*q){
int i=0,j=0;
while(i<(int)strlen(q)){
if(q[i]=='|'){
i+=2;
}
else if(q[i]=='+'){
i++;
}
else p[j++]=q[i++];
}
p[j]='\0';
}

void construction(char *p){
char q[100];
compile(q,p);
next(q);
int i=0;
for(int j=0;j<100;j++){
g_State[j].in=g_State[j].out=NULL;
g_State[j].status=0;//
g_State[j].position=j;
}
g_State[0].status|=start;
connect(g_State[0],g_State[0],error,0);
while(*p!='\n')
{
switch(*p){
case '|':
if(i>0) i--;
p++;
g_State[i].status|=mul_way;
break;
case '+':
i--;
p--;
connect(g_State[i],g_State[i],*p,2);
g_State[i].status|=self_loop1;
i++;
p+=2;
break;
default:
if(!(g_State[i].status&start)&&!(g_State[i].status&error_back)){
g_State[i].status|=error_back;
connect(g_State[i],g_State[g_next[i-1]+1],0);}
if(*p=='.'){
g_State[i].status|=any;
}
connect(g_State[i],g_State[i+1],1);
g_State[i].status|=normal;
i++;
p++;}
}
g_State[i].status|=finish;

}//查int 字节
void realease(){
LinkerList *temp,*temp1;
for(int i=0;i<100;i++)
{
temp=g_State[i].in;
while(temp){
temp1=temp;
temp=temp->next;
delete temp1;
}
temp=g_State[i].out;
while(temp){
temp1=temp;
temp=temp->next;
delete temp1;
}
}
}

bool NextState(Position a,char *p,vector<Position> & find,int i){
auto temp=a.p->out;
Position rec;
bool j=false;
while(temp){
if(temp->l.request!=error&&temp->l.request==*(p+i)||(temp->l.request=='.'&&a.p->status&any&&a.p->position!=g_can_not_any)){
j=true;
if(temp->l.to->position==a.p->position&&temp->l.request!='.'&&a.p->status&any){
g_can_not_any=a.p->position;
}
rec.p=((temp->l).to);
rec.i=i+1;
if(rec.i<(int)strlen(p))
find.push_back(rec);
}
else
{
if((temp->l.request==error)&&(!(a.p->status&finish))&&!j){
if(((temp->l).to)->position<g_can_not_any){
cout<<"使g_can_not_any回复初值\n";
g_can_not_any=-1;}
rec.p=((temp->l).to);
rec.i=i;
if(!(rec.p->status&start))
find.push_back(rec);
}
}
temp=temp->next;
}
if(j) return true;

return false;

}
bool isexist( char *p){
vector<Position> find;
Position temp;
temp.p=g_State;
temp.i=0;
find.push_back(temp);
g_start=1;
g_finish=0;
g_maxi=-1;
bool canfinish=false;
while(!find.empty()){
if(temp.p->status&finish) {g_finish=temp.i;
break;
}
if(temp.i>g_maxi)
g_maxi=temp.i;
find.pop_back();
NextState(temp,p,find,temp.i);
if(!find.empty())
temp=find.back();
else if(g_maxi<(int)strlen(p)){
temp.i=g_maxi+1;
temp.p=g_State;
g_start=temp.i+1;//人是从1数而机器从0数,故加1
find.push_back(temp);
}
}
if(g_finish>=g_start) return true;
return false;
}
void output(char *p,int a,int b){
cout<<"\n位于"<<a<<"和"<<b<<"之间\n";
for(int i=a-1;i<b;i++)
cout<<p[i];
cout<<"\n";
}
void findWholeMatch(char *p){
char *str=p;
int rec=0;
while(isexist(p)&&rec<(int)strlen(str)){
output(str,g_start+rec,g_finish+rec);
rec+=g_finish;
p+=g_finish;
}

}
int main(){
char expression[100];
char content[1000];
fgets(expression,100,stdin);
fgets(content,1000,stdin);
construction(expression);
cout<<"构建完成 \n";
/*
if(isexist(content)){
cout<<"存在\n";
cout<<"开始在"<<g_start<<"结束在"<<g_finish<<"\n";
output(content,g_start,g_finish);
}
else cout<<"不存在\n";
*/

findWholeMatch(content);
system("pause");
realease();

}// regulatr.cpp : 定义控制台应用程序的入口点。
//

/*______________________________________________________
regular expression engine
written by 唐hz
2015.8.10
_______________________________________________________*/
#include "stdafx.h"
#include<iostream>
#include<vector>
using namespace std;
struct State;
#define error -1//此处不确定
#define normal 1
#define finish 16
#define mul_way 2
#define start 8
#define any 32
#define error_back 64
#define self_loop1 4
struct Linker{
State *from;
State *to;
char request;
};
struct LinkerList{
Linker l;
struct LinkerList *next;
};
struct State{
int status;
int position;
LinkerList * in;
LinkerList * out;
};
struct Position{
State* p;
int i;
};

State g_State[100];
int g_next[100];
int g_start,g_maxi=-1;
int g_can_not_any=-1;

void next(char *p){
int n=strlen(p);
int i=1,j=0;
g_next[j]=-1;
while(i<n){
if(p[i]==p[j])
g_next[i++]=j++;
else {
if(j!=0)
j=g_next[j-1]+1;
else{
j=0;
g_next[i++]=-1;
}
}

}
}

void addlinker(LinkerList **a,State *d){
if(*a==NULL){
(*a)=new LinkerList;
(*a)->l.request=b;
(*a)->l.from=c;
(*a)->l.to=d;
(*a)->next=NULL;
return ;
}
LinkerList * temp;
temp=new LinkerList;
temp->l.request=b;
temp->l.from=c;
temp->l.to=d;
temp->next=*a;
*a=temp;
return ;
}
void connect(State &a,&b);
}
}

void compile(char *p,j=0;
while(i<(int)strlen(q)){
if(q[i]=='|'){
i+=2;
}
else if(q[i]=='+'){
i++;
}
else p[j++]=q[i++];
}
p[j]='\0';
}

void construction(char *p){
char q[100];
compile(q,0);
while(*p!='\n')
{
switch(*p){
case '|':
if(i>0) i--;
p++;
g_State[i].status|=mul_way;
break;
case '+':
i--;
p--;
connect(g_State[i],1);
g_State[i].status|=normal;
i++;
p++;}
}
g_State[i].status|=finish;

}//查int 字节
void realease(){
LinkerList *temp,*temp1;
for(int i=0;i<100;i++)
{
temp=g_State[i].in;
while(temp){
temp1=temp;
temp=temp->next;
delete temp1;
}
temp=g_State[i].out;
while(temp){
temp1=temp;
temp=temp->next;
delete temp1;
}
}
}

bool NextState(Position a,int i){
auto temp=a.p->out;
Position rec;
bool j=false;
while(temp){
if(temp->l.request!=error&&temp->l.request==*(p+i)||(temp->l.request=='.'&&a.p->status&any&&a.p->position!=g_can_not_any)){
j=true;
if(temp->l.to->position==a.p->position&&temp->l.request!='.'&&a.p->status&any){
g_can_not_any=a.p->position;
}
rec.p=((temp->l).to);
rec.i=i+1;
if(rec.i<(int)strlen(p))
find.push_back(rec);
}
else
{
if((temp->l.request==error)&&(!(a.p->status&finish))&&!j){
if(((temp->l).to)->position<g_can_not_any){
cout<<"使g_can_not_any回复初值\n";
g_can_not_any=-1;}
rec.p=((temp->l).to);
rec.i=i;
if(!(rec.p->status&start))
find.push_back(rec);
}
}
temp=temp->next;
}
if(j) return true;

return false;

}
bool isexist( char *p){
vector<Position> find;
Position temp;
temp.p=g_State;
temp.i=0;
find.push_back(temp);
g_start=1;
g_finish=0;
g_maxi=-1;
bool canfinish=false;
while(!find.empty()){
if(temp.p->status&finish) {g_finish=temp.i;
break;
}
if(temp.i>g_maxi)
g_maxi=temp.i;
find.pop_back();
NextState(temp,g_finish+rec);
rec+=g_finish;
p+=g_finish;
}

}
int main(){
char expression[100];
char content[1000];
fgets(expression,g_finish);
}
else cout<<"不存在\n";
*/

findWholeMatch(content);
system("pause");
realease();

}

采用了NFA,未来将逐步更新。

原文链接:https://www.f2er.com/regex/360082.html

猜你在找的正则表达式相关文章