SQL Server中检索语句中Like的算法实现

本文主要对字串匹配Like的算法实现，在SQL Server中Like的匹配中主要有表现为对两个通配符的处理，分别为“_”代表一个字符，“%”代表任意个字符。由于“%”在匹配过程中的位置任意性，所以完全匹配、通配符“_”匹配与此不应该一起参与匹配运算，所以我们决定在匹配前先将子串按“%”分段，进行逐段匹配，显然降低了匹配算法的难度，下面讲解一下算法的实现过程：（后附实现源码）

１. 确定第一个“%”的位置，

目的：确定模式匹配的方式

a> 是以“%”开头则不需要左匹配（左匹配即要求子串的第一个字符必须与原串的第一个字符相一致）

b> 不是以“%”开头则需要进行左匹配

２. 进行ＫＭＰ*算法进行模式匹配

ＫＭＰ算法不能完全地实现本文提到的匹配算法，我们必须对此加以修正，主要是模式因子不适合，在这里必须认为“_”的因子值为与前一个任意字符一致，所以“_”越多，匹配时的回退的可能性将越少，当然其匹配速度比教课书上的模式查找要快。

３. 继续下一个子串段的匹配工作。

下面提供算法的源码，分为两个函数，

１. _strat

为ＫＭＰ*模式串查找函数，函数前有其使用说明。

２. _strlike

为Like的实现过程，内部用到_strat模式串匹配函数，实现的关键是对模式串的分段，来降低匹配的难度。

////////////////////////////////////////////////////////////////////////////////

// 函数名称：int _strat(

// char * chText,

// char * chPattern,

// int nbegpos,

// int nlen

// bool bleft )

// 实现功能：模式串搜索

// 对全局变量的影响：无

// 参数说明：

// chText 原串

// chPattern 模式串

// nbegpos 起始位置

// nlen 原串相对长度

// bleft 是否左对齐(即第一个字符必须一致)

// 返回结果说明：实际位置

// 待优化一：回退数不得大于nlen - len(chPattern)，即回退后无法导致完全匹配

// 待优化二：计算模式串与字串搜索代码合并，减少计算量

////////////////////////////////////////////////////////////////////////////////

int _strat(char * chText , char * chPattern , int nbegpos /* = 0 */ , int nlen /* = -1 */ , bool bleft /* = false */)

{

int nPatternLen = _tcslen(chPattern);

int nTextLen = _tcslen(chText);

if(nlen >= 0)

{

if(nbegpos + nlen < nTextLen)

nTextLen = nbegpos + nlen;

}

if(nbegpos + nPatternLen > nTextLen || nPatternLen > MAXLEN_PATTERN)

return -1;

if(nPatternLen == 0)

return nbegpos;

else

{

int nGeneralLen = 0;

short chNext[MAXLEN_PATTERN] = { -1 };

int nPattPos = 0 , nNext = -1;

if(!bleft)

{

//生成模式回退值

while(nPattPos < nPatternLen)

{

if( nNext == -1 || chPattern[nPattPos] == '_' || chPattern[nPattPos] == chPattern[nNext])

{

nPattPos ++;

nNext ++;

chNext[nPattPos] = nNext;

}

else

nNext = chNext[nNext];

}

int nTextPos = nbegpos;

nPattPos = 0;

//进行模式匹配

while(nPattPos < nPatternLen && nTextPos < nTextLen)

{

if(nPattPos == -1 || chPattern[nPattPos] == '_' || chPattern[nPattPos] == chText[nTextPos])

{

nPattPos ++;

nTextPos ++;

}

else

{

//要求左对齐时，不允许回退（回退时肯定不是左对齐的）

if(bleft)

return -1;

else

nPattPos = chNext[nPattPos];

}

//判断模式串是否已经完全被匹配，否则返回-1

if(nPattPos == nPatternLen)

return nTextPos - nPattPos;

else

return -1;

}

////////////////////////////////////////////////////////////////////////////////

// 函数名称：bool _strlike(

// char * chText,

// char * chPattern,

// int nbegpos )

// 实现功能：两个字符串的匹配算法，带通配符

// 对全局变量的影响：无

// 参数说明：

// chText 原字符串

// chPattern 模式串

// nbegpos 起始位置

// 返回结果说明：

// =true 表示相似或一致

// =false 表示不相似或不一致

////////////////////////////////////////////////////////////////////////////////

bool _strlike(char * chText , char * chPattern , int nbegpos /* = 0 */)

{

bool bLeftMatch = true , bLast = false;

int nTextLen = _tcslen(chText);

//作最基础的匹配，即存在模式串的情况下再作比较

if(_tcslen(chPattern) == 0)

if(_tcslen(chText) == 0)

return true;

else

return false;

{

char * chFirstPattern , * chSecondPattern;

if(chPattern[0] == '%')

{

chPattern ++;

}while(chPattern[0] == '%');

if(chPattern == NULL || _tcslen(chPattern) == 0)

return true;

bLeftMatch = false;

}

else

bLeftMatch = true;

//初始化模式串

chSecondPattern = _tcschr(chPattern , '%');

int nPatternLen;

if(chSecondPattern == NULL)

{

bLast = true;

nPatternLen = _tcslen(chPattern);

if(!bLeftMatch)

{

//若以%开头，并且没有剩余模式串时，只要考虑右对齐匹配的方式即可(实际上也是左对齐)

if(nbegpos + nPatternLen <= nTextLen)

{

nbegpos = nTextLen - nPatternLen;

bLeftMatch = true;

}

else

return false;

}

else

if(nbegpos + nPatternLen != nTextLen)

return false;

}

else

{

//模式串不得长于原串

nPatternLen = chSecondPattern - chPattern;

if(nbegpos + nPatternLen > nTextLen)

return false;

}

//初始化模式串与修改剩余串

chFirstPattern = new char[nPatternLen + 1];

memcpy(chFirstPattern , chPattern , nPatternLen);

chFirstPattern[nPatternLen] = 0;

chPattern = chSecondPattern;

int npos = _strat(chText , chFirstPattern , nbegpos , bLeftMatch ? nPatternLen : nTextLen - nbegpos , bLeftMatch);

delete chFirstPattern;

if(npos < 0)

{

return false;

}

else

{

//定下一查找位置的起点

if(bLeftMatch)

{

if(npos != nbegpos)

return false;

}

else

nbegpos = npos;

if(bLast)

{

if(nPatternLen + npos == nTextLen)

return true;

else

return false;

}

else

nbegpos += nPatternLen;

}

}while(true);

}