php natsort内核函数浅析

官方手册(http://us.php.net/manual/en/function.natsort.php

代码如下:
bool natsort ( array &$array )
This function implements a sort algorithm that orders alphanumeric strings in the way a human being would while maintaining key/value associations. This is described as a "natural ordering". An example of the difference between this algorithm and the regular computer string sorting algorithms (used in sort()) can be seen in the example below.

据官方手册还可以得到这样的结果: img1.png img2.png img10.png img12.png 显然这很适合对类似文件名的排序。从结果看这种自然算法应该是去掉头和尾的非数字部分,然后对留下来的数字部分进行排序,究竟是不是,还是看一下PHP源码吧。
代码如下:
//从ext/standard/array.c抽取的相关代码如下
static int PHP_array_natural_general_compare(const void *a,const void *b,int fold_case) /* {{{ */
{
Bucket *f,*s;
zval *fval,*sval;
zval first,second;
int result;
f = *((Bucket **) a);
s = *((Bucket **) b);
fval = *((zval **) f->pData);
sval = *((zval **) s->pData);
first = *fval;
second = *sval;
if (Z_TYPE_P(fval) != IS_STRING) {
zval_copy_ctor(&first);
convert_to_string(&first);
}
if (Z_TYPE_P(sval) != IS_STRING) {
zval_copy_ctor(&second);
convert_to_string(&second);
}
result = strnatcmp_ex(Z_STRVAL(first),Z_STRLEN(first),Z_STRVAL(second),Z_STRLEN(second),fold_case);
if (Z_TYPE_P(fval) != IS_STRING) {
zval_dtor(&first);
}
if (Z_TYPE_P(sval) != IS_STRING) {
zval_dtor(&second);
}
return result;
}
/* }}} */
static int PHP_array_natural_compare(const void *a,const void *b TSRMLS_DC) /* {{{ */
{
return PHP_array_natural_general_compare(a,b,0);
}
/* }}} */
static void PHP_natsort(INTERNAL_FUNCTION_PARAMETERS,int fold_case) /* {{{ */
{
zval *array;
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC,"a",&array) == FAILURE) {
return;
}
if (fold_case) {
if (zend_hash_sort(Z_ARRVAL_P(array),zend_qsort,PHP_array_natural_case_compare,0 TSRMLS_CC) == FAILURE) {
return;
}
} else {
if (zend_hash_sort(Z_ARRVAL_P(array),PHP_array_natural_compare,0 TSRMLS_CC) == FAILURE) {
return;
}
}
RETURN_TRUE;
}
/* }}} */
/* {{{ proto void natsort(array &array_arg)
Sort an array using natural sort */
PHP_FUNCTION(natsort)
{
PHP_natsort(INTERNAL_FUNCTION_PARAM_PASSTHRU,0);
}
/* }}} */

虽然是第一次查看PHP的内核代码,不过凭借多年看代码的经验,还是很容易找到这个自然排序算法的核心就是函数:strnatcmp_ex(位于ext/standard/strnatcmp.c文件中)。
代码如下:
/* {{{ compare_right
*/
static int
compare_right(char const **a,char const *aend,char const **b,char const *bend)
{
int bias = 0;
/* The longest run of digits wins. That aside,the greatest
value wins,but we can't know that it will until we've scanned
both numbers to know that they have the same magnitude,so we
remember it in BIAS. */
for(;; (*a)++,(*b)++) {
if ((*a == aend || !isdigit((int)(unsigned char)**a)) &&
(*b == bend || !isdigit((int)(unsigned char)**b)))
return bias;
else if (*a == aend || !isdigit((int)(unsigned char)**a))
return -1;
else if (*b == bend || !isdigit((int)(unsigned char)**b))
return +1;
else if (**a < **b) {
if (!bias)
bias = -1;
} else if (**a > **b) {
if (!bias)
bias = +1;
}
}
return 0;
}
/* }}} */
/* {{{ compare_left
*/
static int
compare_left(char const **a,char const *bend)
{
/* Compare two left-aligned numbers: the first to have a
different value wins. */
for(;; (*a)++,(*b)++) {
if ((*a == aend || !isdigit((int)(unsigned char)**a)) &&
(*b == bend || !isdigit((int)(unsigned char)**b)))
return 0;
else if (*a == aend || !isdigit((int)(unsigned char)**a))
return -1;
else if (*b == bend || !isdigit((int)(unsigned char)**b))
return +1;
else if (**a < **b)
return -1;
else if (**a > **b)
return +1;
} return 0;
}
/* }}} */
/* {{{ strnatcmp_ex
* call in array.c: strnatcmp_ex(Z_STRVAL(first),fold_case);
*/
PHPAPI int strnatcmp_ex(char const *a,size_t a_len,char const *b,size_t b_len,int fold_case)
{
char ca,cb;
char const *ap,*bp;
char const *aend = a + a_len,
*bend = b + b_len;
int fractional,result;
if (a_len == 0 || b_len == 0)
return a_len - b_len;
ap = a;
bp = b;
while (1) {
ca = *ap; cb = *bp;
/* skip over leading spaces or zeros */
while (isspace((int)(unsigned char)ca) || (ca == '0' && (ap+1 < aend) && (*(ap+1)!='.')))
ca = *++ap;
while (isspace((int)(unsigned char)cb) || (cb == '0' && (bp+1 < bend) && (*(bp+1)!='.')))
cb = *++bp;
/* process run of digits */
if (isdigit((int)(unsigned char)ca) && isdigit((int)(unsigned char)cb)) {
fractional = (ca == '0' || cb == '0');
if (fractional)
result = compare_left(&ap,aend,&bp,bend);
else
result = compare_right(&ap,bend);
if (result != 0)
return result;
else if (ap == aend && bp == bend)
/* End of the strings. Let caller sort them out. */
return 0;
else {
/* Keep on comparing from the current point. */
ca = *ap; cb = *bp;
}
}
if (fold_case) {
ca = toupper((int)(unsigned char)ca);
cb = toupper((int)(unsigned char)cb);
}
if (ca < cb)
return -1;
else if (ca > cb)
return +1;
++ap; ++bp;
if (ap >= aend && bp >= bend)
/* The strings compare the same. Perhaps the caller
will want to call strcmp to break the tie. */
return 0;
else if (ap >= aend)
return -1;
else if (bp >= bend)
return 1;
}
}
/* }}} */

从strnatcmp_ex函数中的:
代码如下:
while (isspace((int)(unsigned char)ca) || (ca == '0' && (ap+1 < aend) && (*(ap+1)!='.')))
ca = *++ap;
while (isspace((int)(unsigned char)cb) || (cb == '0' && (bp+1 < bend) && (*(bp+1)!='.')))
cb = *++bp;

所以,我觉得应该字符串(当前位置开始)中前面的空字符和数字前面的‘0'不会参与比较,比较的结果应该和

所说的一样,但是在我的PHP5.2.9中对于“0”的处理结果却不一样(例如“img002.png”与“img1.png”,我的理解应该是前者大于后者,不过在我的5.2.9中却是前者小于后者),原因还没想清楚,可能是5.2.9的一个bug,也可能是自己还没有理解清楚源码的意思。下次配置好环境再好好测试,好好消化~~

在array.c中有两个重要的数据结构很值得我们关注

相关文章

Hessian开源的远程通讯,采用二进制 RPC的协议,基于 HTTP 传输。可以实现PHP调用Java,Python,C#等多语...
初识Mongodb的一些总结,在Mac Os X下真实搭建mongodb环境,以及分享个Mongodb管理工具,学习期间一些总结...
边看边操作,这样才能记得牢,实践是检验真理的唯一标准.光看不练假把式,光练不看傻把式,边看边练真把式....
在php中,结果输出一共有两种方式:echo和print,下面将对两种方式做一个比较。 echo与print的区别: (...
在安装好wampServer后,一直没有使用phpMyAdmin,今天用了一下,phpMyAdmin显示错误:The mbstring exte...
变量是用于存储数据的容器,与代数相似,可以给变量赋予某个确定的值(例如:$x=3)或者是赋予其它的变...