考察以下代码:
#define IDENT 0#define OP *typedef int data_t;typedef struct{ long int len; data_t *data;}vec_rec, *vecptr;vec_ptr NewVec(long int len){ vec_ptr result = (vec_ptr) malloc(sizeof(vec_rec)); if(!result){ return NULL; } result-> len = len; if(len > 0){ data_t *data = (data_t*)calloc(len, sizeof(data_t)); if(!data){ free((void*) result); return NULL; } result->data = data; } else{ result->data = NULL; } return result;}//Retrieve vector elemernt and store at destint GetVecElement(vec_ptr v, long int index, data_t *dest){ if(index < 0 || index >= v->len){ return 0; } *dest = v->data[index]; return 1;}long int VecLength(vec_ptr v){ return v->len;}void Combine1(vec_ptr v, data_t *dest){ *dest = IDENT; for(long int i = 0; i < VecLength(v); ++i){ data_t val; GetVecElement(v, i, &val); *dest = *dest OP val; }}
其中对于 Combine1(), 每次在 for 循环中进行检查时, 都要调用 VecLength() 获取链表长度, 可是在此例中链表的长度是不变的, 因此在首次获取链表长度之后, 接下来对 VecLength() 就成了累赘, 优化的方法就是消除循环的低效率:
//消除循环低效率后的代码void Combine2(vec_ptr v, data_t *dest){ long int length = VecLength(v); *dest = IDENT; for(long int i = 0; i < length; ++i){ data_t val; GetVecElemet(v, i, &val); *dest = *dest OP val; }}
对于 Combine2(), 每次循环迭代都会调用 GetVecElement() 来获取下一个向量元素, 对每一个向量引用都要把索引做边界检查明显会造成低效率, 所以就需要增加一个函数 GetVecStart() 这个函数来返回数组的起始地址, 从而直接访问数组以避免调用函数来获取每个元素.
//减少过程调用后的代码data_t* GetVecStart(vec_ptr v){ return v->data;}void Combine3(vec_ptr v, deta_t *dest){ long int length = vec_length(v); data_t *data = GetVecStart(v); *dest = IDENT; for(long int i = 0; i < length; ++i){ *dest = *dest OP data[i]; }}
//消除不必要的储存器引用后的代码void Conbine4(vec_ptr v, data_t *dest){ long int length = VecLength(v); data_t *data = GetVecStart(v); data acc = IDENT; //use local variable for(long int i = 0; i < length; ++i){ acc = acc OP data[i]; } *dest = acc;}