build_needs_escape函数目的是要建立一个位图bitmap,表示哪些字符需要转意。此函数在escape.c中,首先到escape.h中看看。
#include "config.h" /* Highest character number that can possibly be passed through un-escaped */ #define NEEDS_ESCAPE_BITS 128 //表示128位就行 #ifndef NEEDS_ESCAPE_SHIFT #define NEEDS_ESCAPE_SHIFT 5 /* 1 << 5 is 32 bits */ #endif #define NEEDS_ESCAPE_WORD_LENGTH (1<<NEEDS_ESCAPE_SHIFT) // #define NEEDS_ESCAPE_INDEX(c) ((c)>>NEEDS_ESCAPE_SHIFT) //index嘛,表示在_needs_escape中的第几个数上表示 /* Assume variable shift is fast, otherwise this could be a table lookup */ #define NEEDS_ESCAPE_MASK(c) (1<<((c)&(NEEDS_ESCAPE_WORD_LENGTH - 1))) //NEEDS_ESCAPE_WORD_LENGTH - 1相当于一个掩码,取c最右边的五bit,然后在把1左移这么多位,就可以通过它获得相应位的状态了 /* Newer compilers could use an inline function. * This macro works great, as long as you pass unsigned int or unsigned char. */ #define needs_escape(c) ((c)>=NEEDS_ESCAPE_BITS || _needs_escape[NEEDS_ESCAPE_INDEX(c)]&NEEDS_ESCAPE_MASK(c)) //实现很简洁。。。 extern unsigned long _needs_escape[(NEEDS_ESCAPE_BITS+NEEDS_ESCAPE_WORD_LENGTH-1)/NEEDS_ESCAPE_WORD_LENGTH]; void build_needs_escape(void);
escape.h中的东西让我看了很久才看懂。之前看programming pearls时也实现了个bitmap,在这里。
然后来看看escape.c
unsigned long _needs_escape[(NEEDS_ESCAPE_BITS+NEEDS_ESCAPE_WORD_LENGTH-1)/NEEDS_ESCAPE_WORD_LENGTH];
void build_needs_escape(void)
{
unsigned int a, b;//理论上这里不应该是unsigned long么。。
const unsigned char special[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789"
"-_.!~*'():@&=+$,/?";
//以上不需要转意
/* 21 Mar 2002 - jnelson - confirm with Apache 1.3.23 that '?'
* is safe to leave unescaped.
*/
unsigned short i, j;
b = 1;
for (a=0; b!=0; a++) b=b<<1;
/* I found $a bit positions available in an unsigned long. */
if (a < NEEDS_ESCAPE_WORD_LENGTH) {//大于a的时候表示unsigned long就不能保存WORD_LENGTH的东东了
fprintf(stderr,
"NEEDS_ESCAPE_SHIFT configuration error -- "\
"%d should be <= log2(%d)\n",
NEEDS_ESCAPE_SHIFT, a);
exit(1);
} else if (a >= 2*NEEDS_ESCAPE_WORD_LENGTH) {//这时可能需要优化,NEED_ESCAPE_SHIFT++
/* needs_escape_shift configuration suboptimal */
} else {
/* Ahh, just right! */;
}
memset(_needs_escape, ~0, sizeof(_needs_escape));//默认是1,需要escape
for(i = 0; i < sizeof(special) - 1; ++i) {
j=special[i];
if (j>=NEEDS_ESCAPE_BITS) {
/* warning: character $j will be needlessly escaped. */
} else {
_needs_escape[NEEDS_ESCAPE_INDEX(j)]&=~NEEDS_ESCAPE_MASK(j);//不需要时将其置0
}
}
}
//在编译事加-DTEST 参数可以运行以下,测试看对不对。
#ifdef TEST
int main(void)
{
int i;
build_needs_escape();
for(i = 0; i <= NEEDS_ESCAPE_BITS; ++i) {
if (needs_escape(i)) {
fprintf(stdout, "%3d needs escape.\n", i);
}
}
return(0);
}
#endif
好吧,就这么多了,,感觉这一部分用c++的bitset比较好。。唉。。
更多内容请访问:IT源点
注意:本文归作者所有,未经作者允许,不得转载