build_needs_escape函数目的是要建立一个位图bitmap,表示哪些字符需要转意。此函数在escape.c中,首先到escape.h中看看。
#include "config.h" /* Highest character number that can possibly be passed through un-escaped */ #define NEEDS_ESCAPE_BITS 128 //表示128位就行 #ifndef NEEDS_ESCAPE_SHIFT #define NEEDS_ESCAPE_SHIFT 5 /* 1 << 5 is 32 bits */ #endif #define NEEDS_ESCAPE_WORD_LENGTH (1<<NEEDS_ESCAPE_SHIFT) // #define NEEDS_ESCAPE_INDEX(c) ((c)>>NEEDS_ESCAPE_SHIFT) //index嘛,表示在_needs_escape中的第几个数上表示 /* Assume variable shift is fast, otherwise this could be a table lookup */ #define NEEDS_ESCAPE_MASK(c) (1<<((c)&(NEEDS_ESCAPE_WORD_LENGTH - 1))) //NEEDS_ESCAPE_WORD_LENGTH - 1相当于一个掩码,取c最右边的五bit,然后在把1左移这么多位,就可以通过它获得相应位的状态了 /* Newer compilers could use an inline function. * This macro works great, as long as you pass unsigned int or unsigned char. */ #define needs_escape(c) ((c)>=NEEDS_ESCAPE_BITS || _needs_escape[NEEDS_ESCAPE_INDEX(c)]&NEEDS_ESCAPE_MASK(c)) //实现很简洁。。。 extern unsigned long _needs_escape[(NEEDS_ESCAPE_BITS+NEEDS_ESCAPE_WORD_LENGTH-1)/NEEDS_ESCAPE_WORD_LENGTH]; void build_needs_escape(void);
escape.h中的东西让我看了很久才看懂。之前看programming pearls时也实现了个bitmap,在这里。
然后来看看escape.c
unsigned long _needs_escape[(NEEDS_ESCAPE_BITS+NEEDS_ESCAPE_WORD_LENGTH-1)/NEEDS_ESCAPE_WORD_LENGTH]; void build_needs_escape(void) { unsigned int a, b;//理论上这里不应该是unsigned long么。。 const unsigned char special[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789" "-_.!~*'():@&=+$,/?"; //以上不需要转意 /* 21 Mar 2002 - jnelson - confirm with Apache 1.3.23 that '?' * is safe to leave unescaped. */ unsigned short i, j; b = 1; for (a=0; b!=0; a++) b=b<<1; /* I found $a bit positions available in an unsigned long. */ if (a < NEEDS_ESCAPE_WORD_LENGTH) {//大于a的时候表示unsigned long就不能保存WORD_LENGTH的东东了 fprintf(stderr, "NEEDS_ESCAPE_SHIFT configuration error -- "\ "%d should be <= log2(%d)\n", NEEDS_ESCAPE_SHIFT, a); exit(1); } else if (a >= 2*NEEDS_ESCAPE_WORD_LENGTH) {//这时可能需要优化,NEED_ESCAPE_SHIFT++ /* needs_escape_shift configuration suboptimal */ } else { /* Ahh, just right! */; } memset(_needs_escape, ~0, sizeof(_needs_escape));//默认是1,需要escape for(i = 0; i < sizeof(special) - 1; ++i) { j=special[i]; if (j>=NEEDS_ESCAPE_BITS) { /* warning: character $j will be needlessly escaped. */ } else { _needs_escape[NEEDS_ESCAPE_INDEX(j)]&=~NEEDS_ESCAPE_MASK(j);//不需要时将其置0 } } } //在编译事加-DTEST 参数可以运行以下,测试看对不对。 #ifdef TEST int main(void) { int i; build_needs_escape(); for(i = 0; i <= NEEDS_ESCAPE_BITS; ++i) { if (needs_escape(i)) { fprintf(stdout, "%3d needs escape.\n", i); } } return(0); } #endif
好吧,就这么多了,,感觉这一部分用c++的bitset比较好。。唉。。