本文共 4163 字,大约阅读时间需要 13 分钟。
void inner4(vec_ptr u, vec_ptr v, data_t *dest) { long i; long length = vec_length(u); data_t *udata = get_vec_start(u); data_t *vdata = get_vec_start(v); data_t sum = (data_t) 0; data_t sum1 = (data_t) 0; data_t sum2 = (data_t) 0; data_t sum3 = (data_t) 0; data_t sum4 = (data_t) 0; data_t sum5 = (data_t) 0; for (i = 0; i < length-6; i+=6) { sum = sum + udata[i] * vdata[i]; sum1 = sum1 + udata[i+1] * vdata[i+1]; sum2 = sum2 + udata[i+2] * vdata[i+2]; sum3 = sum3 + udata[i+3] * vdata[i+3]; sum4 = sum4 + udata[i+4] * vdata[i+4]; sum5 = sum5 + udata[i+5] * vdata[i+5]; } for(; i < length; i++) { sum = sum + udata[i] * vdata[i]; } *dest = sum + sum1 + sum2 + sum3 + sum4 + sum5;}
限制性能的因素:我也不知道
void inner4(vec_ptr u, vec_ptr v, data_t *dest) { long i; long length = vec_length(u); data_t *udata = get_vec_start(u); data_t *vdata = get_vec_start(v); data_t sum = (data_t) 0; for (i = 0; i < length-6; i+=6) { sum = sum + ( udata[i] * vdata[i] + udata[i+1] * vdata[i+1] + udata[i+2] * vdata[i+2] + udata[i+3] * vdata[i+3] + udata[i+4] * vdata[i+4] + udata[i+5] * vdata[i+5] ); } for(; i < length; i++) { sum = sum + udata[i] * vdata[i]; } *dest = sum;}
#include#include #include #include #include void* basic_memset(void *s, int c, size_t n) { size_t cnt = 0; unsigned char *schar = s; while (cnt < n) { *schar++ = (unsigned char) c; cnt++; } return s;}void* effective_memset(void *s, unsigned long cs, size_t n) { size_t K = sizeof(unsigned long); size_t cnt = 0; unsigned char *schar = s; while (cnt < n) { if ((size_t)schar % K == 0) { break; } *schar++ = (unsigned char)cs; cnt++; } unsigned long *slong = (unsigned long *)schar; size_t rest = n - cnt; size_t loop = rest / K; size_t tail = rest % K; for (size_t i = 0; i < loop; i++) { *slong++ = cs; } schar = (unsigned char *)slong; for (size_t i = 0; i < tail; i++) { *schar++ = (unsigned char)cs; } return s;}
#includedouble poly(double a[], double x, long degree) { long i; double result = a[0]; double xpwr = x; for (i = 1; i <= degree; i++) { result += a[i] * xpwr; xpwr = x * xpwr; } return result;}double poly_6_3a(double a[], double x, long degree) { long i = 1; double result = a[0]; double result1 = 0; double result2 = 0; double xpwr = x; double xpwr1 = x * x * x; double xpwr2 = x * x * x * x * x; double xpwr_step = x * x * x * x * x * x; for (; i <= degree - 6; i+=6) { result = result + (a[i]*xpwr + a[i+1]*xpwr*x); result1 = result1 + (a[i+2]*xpwr1 + a[i+3]*xpwr1*x); result2 = result2 + (a[i+4]*xpwr2 + a[i+5]*xpwr2*x); xpwr *= xpwr_step; xpwr1 *= xpwr_step; xpwr2 *= xpwr_step; } for (; i <= degree; i++) { result = result + a[i]*xpwr; xpwr *= x; } return result + result1 + result2;}double polyh(double a[], double x, long degree) { long i; double result = a[degree]; for (i = degree-1; i >= 0; i--) { result = a[i] + x*result; } return result;}
#include#include void psum1a(float a[], float p[], long n) { long i; float last_val, val; last_val = p[0] = a[0]; for (i = 1; i < n; i++) { val = last_val + a[i]; p[i] = val; last_val = val; }}void psum_4_1a(float a[], float p[], long n) { long i; float val, last_val; float tmp, tmp1, tmp2, tmp3; last_val = p[0] = a[0]; for (i = 1; i < n - 4; i++) { tmp = last_val + a[i]; tmp1 = tmp + a[i+1]; tmp2 = tmp1 + a[i+2]; tmp3 = tmp2 + a[i+3]; p[i] = tmp; p[i+1] = tmp1; p[i+2] = tmp2; p[i+3] = tmp3; last_val = last_val + (a[i] + a[i+1] + a[i+2] + a[i+3]); } for (; i < n; i++) { last_val += a[i]; p[i] = last_val; }}
转载地址:http://tywai.baihongyu.com/