Boa源码分析 – 4 – 转意处理

build_needs_escape函数目的是要建立一个位图bitmap,表示哪些字符需要转意。此函数在escape.c中,首先到escape.h中看看。

#include "config.h"

/* Highest character number that can possibly be passed through un-escaped */
#define NEEDS_ESCAPE_BITS 128
//表示128位就行
#ifndef NEEDS_ESCAPE_SHIFT
#define NEEDS_ESCAPE_SHIFT 5     /* 1 << 5 is 32 bits */
#endif

#define NEEDS_ESCAPE_WORD_LENGTH (1<<NEEDS_ESCAPE_SHIFT)
//
#define NEEDS_ESCAPE_INDEX(c) ((c)>>NEEDS_ESCAPE_SHIFT)
//index嘛,表示在_needs_escape中的第几个数上表示

/* Assume variable shift is fast, otherwise this could be a table lookup */
#define NEEDS_ESCAPE_MASK(c)  (1<<((c)&(NEEDS_ESCAPE_WORD_LENGTH - 1)))
//NEEDS_ESCAPE_WORD_LENGTH - 1相当于一个掩码,取c最右边的五bit,然后在把1左移这么多位,就可以通过它获得相应位的状态了

/* Newer compilers could use an inline function.
* This macro works great, as long as you pass unsigned int or unsigned char.
*/
#define needs_escape(c) ((c)>=NEEDS_ESCAPE_BITS || _needs_escape[NEEDS_ESCAPE_INDEX(c)]&NEEDS_ESCAPE_MASK(c))
//实现很简洁。。。
extern unsigned long _needs_escape[(NEEDS_ESCAPE_BITS+NEEDS_ESCAPE_WORD_LENGTH-1)/NEEDS_ESCAPE_WORD_LENGTH];
void build_needs_escape(void);


escape.h中的东西让我看了很久才看懂。之前看programming pearls时也实现了个bitmap,在这里

然后来看看escape.c

 


unsigned long _needs_escape[(NEEDS_ESCAPE_BITS+NEEDS_ESCAPE_WORD_LENGTH-1)/NEEDS_ESCAPE_WORD_LENGTH];

void build_needs_escape(void)
{
unsigned int a, b;//理论上这里不应该是unsigned long么。。
const unsigned char special[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789"
"-_.!~*'():@&=+$,/?";
//以上不需要转意
/* 21 Mar 2002 - jnelson - confirm with Apache 1.3.23 that '?'
* is safe to leave unescaped.
*/
unsigned short i, j;

b = 1;
for (a=0; b!=0; a++) b=b<<1;
/* I found $a bit positions available in an unsigned long. */
if (a < NEEDS_ESCAPE_WORD_LENGTH) {//大于a的时候表示unsigned long就不能保存WORD_LENGTH的东东了
fprintf(stderr,
"NEEDS_ESCAPE_SHIFT configuration error -- "\
"%d should be <= log2(%d)\n",
NEEDS_ESCAPE_SHIFT, a);
exit(1);
} else if (a >= 2*NEEDS_ESCAPE_WORD_LENGTH) {//这时可能需要优化,NEED_ESCAPE_SHIFT++
/* needs_escape_shift configuration suboptimal */
} else {
/* Ahh, just right! */;
}
memset(_needs_escape, ~0, sizeof(_needs_escape));//默认是1,需要escape
for(i = 0; i < sizeof(special) - 1; ++i) {
j=special[i];
if (j>=NEEDS_ESCAPE_BITS) {
/* warning: character $j will be needlessly escaped. */
} else {
_needs_escape[NEEDS_ESCAPE_INDEX(j)]&=~NEEDS_ESCAPE_MASK(j);//不需要时将其置0
}
}
}
//在编译事加-DTEST 参数可以运行以下,测试看对不对。
#ifdef TEST
int main(void)
{
int i;
build_needs_escape();
for(i = 0; i <= NEEDS_ESCAPE_BITS; ++i) {
if (needs_escape(i)) {
fprintf(stdout, "%3d needs escape.\n", i);
}
}
return(0);
}
#endif

好吧,就这么多了,,感觉这一部分用c++的bitset比较好。。唉。。

发表评论

电子邮件地址不会被公开。