Doxygen Source Code Documentation
RegAna.c
Go to the documentation of this file.00001 /*---------------------------------------------------------------------------*/ 00002 /* 00003 This file contains routines for performing multiple linear regression. 00004 00005 File: RegAna.c 00006 Author: B. Douglas Ward 00007 Date: 02 September 1998 00008 00009 Mod: Restructured matrix calculations to improve execution speed. 00010 Date: 16 December 1998 00011 00012 Mod: Added routines for matrix calculation of general linear tests. 00013 Date: 02 July 1999 00014 00015 Mod: Additional statistical output (partial R^2 statistics). 00016 Date: 07 September 1999 00017 00018 Mod: Modifications for compatibility with 3dDeconvolve options for 00019 writing the fitted full model time series (-fitts) and the 00020 residual error time series (-errts) to 3d+time datasets. 00021 Date: 22 November 1999 00022 00023 Mod: Added function calc_sse_fit. 00024 Date: 21 April 2000 00025 00026 Mod: Additional output with -nodata option (norm.std.dev.'s for 00027 GLT linear constraints). 00028 Date: 11 August 2000 00029 00030 Mod: Modified use of vector_multiply() followed by vector_subtract() 00031 to use new function vector_multiply_subtract(), and to use 00032 new function vector_dotself() when possible -- RWCox. 00033 Date: 28 Dec 2001 00034 00035 Mod: Modification to calc_tcoef to calculate t-statistics for individual 00036 GLT linear constraints. 00037 Date: 29 January 2002 00038 00039 Mod: If FLOATIZE is defined, uses floats instead of doubles -- RWCox. 00040 Date: 03 Mar 2003 00041 00042 Mod: If use_psinv is 1, use matrix_psinv() instead of inversion. 00043 Date 19 Jul 2004 -- RWCox 00044 00045 */ 00046 00047 static int use_psinv = 1 ; /* 19 Jul 2004 */ 00048 00049 /*---------------------------------------------------------------------------*/ 00050 00051 #ifndef FLOATIZE 00052 # include "matrix.c" 00053 #endif 00054 00055 void RA_error (char * message); 00056 00057 00058 /*---------------------------------------------------------------------------*/ 00059 00060 /** macro to test a malloc-ed pointer for validity **/ 00061 00062 #define MTEST(ptr) \ 00063 if((ptr)==NULL) \ 00064 ( RA_error ("Cannot allocate memory") ) 00065 00066 00067 /*---------------------------------------------------------------------------*/ 00068 /* 00069 Calculate constant matrices to be used for all voxels. 00070 */ 00071 00072 int calc_matrices 00073 ( 00074 matrix xdata, /* experimental design matrix */ 00075 int p, /* number of parameters */ 00076 int * plist, /* list of parameters */ 00077 matrix * x, /* extracted X matrix */ 00078 matrix * xtxinv, /* matrix: 1/(X'X) */ 00079 matrix * xtxinvxt /* matrix: (1/(X'X))X' */ 00080 ) 00081 00082 { 00083 matrix xt, xtx; /* temporary matrix calculation results */ 00084 int ok; /* flag for successful matrix inversion */ 00085 00086 ENTRY("calc_matrices") ; 00087 00088 /*----- extract the independent variable matrix X -----*/ 00089 matrix_extract (xdata, p, plist, x); 00090 00091 00092 /*----- calculate various matrices which will be needed later -----*/ 00093 if( p <= 1 || !use_psinv ){ 00094 matrix_initialize (&xt); 00095 matrix_initialize (&xtx); 00096 matrix_transpose (*x, &xt); 00097 matrix_multiply (xt, *x, &xtx); 00098 ok = matrix_inverse_dsc (xtx, xtxinv); 00099 if (ok) 00100 matrix_multiply (*xtxinv, xt, xtxinvxt); 00101 else 00102 RA_error ("Improper X matrix (cannot invert X'X) "); 00103 matrix_destroy (&xtx); 00104 matrix_destroy (&xt); 00105 } else { 00106 matrix_psinv (*x, xtxinv , xtxinvxt ); ok = 1 ; /* 19 Jul 2004 */ 00107 } 00108 00109 RETURN (ok); 00110 } 00111 00112 00113 /*---------------------------------------------------------------------------*/ 00114 /* 00115 Calculate constant matrix to be used for general linear test (GLT). 00116 */ 00117 00118 int calc_glt_matrix 00119 ( 00120 matrix xtxinv, /* matrix: 1/(X'X) */ 00121 matrix c, /* matrix representing GLT linear constraints */ 00122 matrix * a, /* constant matrix for use later */ 00123 matrix * cxtxinvct /* matrix: C(1/(X'X))C' for GLT */ 00124 ) 00125 00126 { 00127 matrix ct, xtxinvct, t1, t2; /* temporary matrix calculation results */ 00128 int ok; /* flag for successful matrix inversion */ 00129 00130 00131 ENTRY("calc_glt_matrix") ; 00132 00133 /*----- initialize matrices -----*/ 00134 matrix_initialize (&ct); 00135 matrix_initialize (&xtxinvct); 00136 matrix_initialize (&t1); 00137 matrix_initialize (&t2); 00138 00139 00140 /*----- calculate the constant matrix which will be needed later -----*/ 00141 matrix_transpose (c, &ct); /* [c]' */ 00142 matrix_multiply (xtxinv, ct, &xtxinvct); /* inv{[x]'[x]} [c]' */ 00143 matrix_multiply (c, xtxinvct, cxtxinvct); /* [c] inv{[x]'[x]} [c]' */ 00144 ok = matrix_inverse_dsc (*cxtxinvct, &t2); /* inv{ [c] inv{[x]'[x]} [c]' } */ 00145 if (ok) 00146 { 00147 matrix_multiply (xtxinvct, t2, &t1); 00148 matrix_multiply (t1, c, &t2); 00149 matrix_identity (xtxinv.rows, &t1); 00150 matrix_subtract (t1, t2, a); 00151 } 00152 else 00153 RA_error ("Improper C matrix ( cannot invert C(1/(X'X))C' ) "); 00154 00155 00156 /*----- dispose of matrices -----*/ 00157 matrix_destroy (&ct); 00158 matrix_destroy (&xtxinvct); 00159 matrix_destroy (&t1); 00160 matrix_destroy (&t2); 00161 00162 RETURN (ok); 00163 } 00164 00165 00166 /*---------------------------------------------------------------------------*/ 00167 /* 00168 Calculate the error sum of squares. 00169 */ 00170 00171 float calc_sse 00172 ( 00173 matrix x, /* independent variable matrix */ 00174 vector b, /* vector of estimated regression parameters */ 00175 vector y /* vector of measured data */ 00176 ) 00177 00178 { 00179 #if 0 00180 vector yhat; /* product Xb */ 00181 #endif 00182 vector e; /* vector of residuals */ 00183 float sse; /* error sum of squares */ 00184 00185 00186 /*----- initialize vectors -----*/ 00187 #if 0 00188 vector_initialize (&yhat); 00189 #endif 00190 vector_initialize (&e); 00191 00192 00193 /*----- calculate the error sum of squares -----*/ 00194 #if 0 00195 vector_multiply (x, b, &yhat); 00196 vector_subtract (y, yhat, &e); 00197 sse = vector_dot (e, e); 00198 #else 00199 sse = vector_multiply_subtract( x , b , y , &e ) ; 00200 #endif 00201 00202 00203 /*----- dispose of vectors -----*/ 00204 vector_destroy (&e); 00205 #if 0 00206 vector_destroy (&yhat); 00207 #endif 00208 00209 00210 /*----- return SSE -----*/ 00211 return (sse); 00212 00213 } 00214 00215 00216 /*---------------------------------------------------------------------------*/ 00217 /* 00218 Calculate the error sum of squares and the residuals. 00219 */ 00220 00221 float calc_resids 00222 ( 00223 matrix x, /* independent variable matrix */ 00224 vector b, /* vector of estimated regression parameters */ 00225 vector y, /* vector of measured data */ 00226 vector * e /* vector of residuals */ 00227 ) 00228 00229 { 00230 #if 0 00231 vector yhat; /* product Xb */ 00232 #endif 00233 float sse; /* error sum of squares */ 00234 00235 00236 /*----- initialize vectors -----*/ 00237 #if 0 00238 vector_initialize (&yhat); 00239 #endif 00240 00241 00242 /*----- calculate the error sum of squares -----*/ 00243 #if 0 00244 vector_multiply (x, b, &yhat); 00245 vector_subtract (y, yhat, e); 00246 sse = vector_dot (*e, *e); 00247 #else 00248 sse = vector_multiply_subtract( x , b , y , e ) ; 00249 #endif 00250 00251 00252 /*----- dispose of vectors -----*/ 00253 #if 0 00254 vector_destroy (&yhat); 00255 #endif 00256 00257 00258 /*----- return SSE -----*/ 00259 return (sse); 00260 00261 } 00262 00263 00264 /*---------------------------------------------------------------------------*/ 00265 /* 00266 Calculate the error sum of squares. Also, return the fitted time series, 00267 and residual errors time series. 00268 */ 00269 00270 float calc_sse_fit 00271 ( 00272 matrix x, /* independent variable matrix */ 00273 vector b, /* vector of estimated regression parameters */ 00274 vector y, /* vector of measured data */ 00275 float * fitts, /* full model fitted time series */ 00276 float * errts /* full model residual error time series */ 00277 ) 00278 00279 { 00280 vector yhat; /* product Xb */ 00281 vector e; /* vector of residuals */ 00282 float sse; /* error sum of squares */ 00283 int it; /* time point index */ 00284 00285 00286 /*----- initialize vectors -----*/ 00287 vector_initialize (&yhat); 00288 vector_initialize (&e); 00289 00290 00291 /*----- calculate the error sum of squares -----*/ 00292 vector_multiply (x, b, &yhat); 00293 vector_subtract (y, yhat, &e); 00294 sse = vector_dotself( e ); 00295 00296 /*----- save the fitted time series and residual errors -----*/ 00297 for (it = 0; it < x.rows; it++) 00298 { 00299 fitts[it] = yhat.elts[it]; 00300 errts[it] = e.elts[it]; 00301 } 00302 00303 00304 /*----- dispose of vectors -----*/ 00305 vector_destroy (&e); 00306 vector_destroy (&yhat); 00307 00308 00309 /*----- return SSE -----*/ 00310 return (sse); 00311 00312 } 00313 00314 00315 /*---------------------------------------------------------------------------*/ 00316 /* 00317 Calculate the pure error sum of squares. 00318 */ 00319 00320 float calc_sspe 00321 ( 00322 vector y, /* vector of measured data */ 00323 int * levels, /* indices for repeat observations */ 00324 int * counts, /* number of observations at each level */ 00325 int c /* number of unique rows in ind. var. matrix */ 00326 ) 00327 00328 { 00329 register int i, j; /* indices */ 00330 register float * sum = NULL; /* sum of observations at each level */ 00331 register float diff; /* difference between observation and average */ 00332 register float sspe; /* pure error sum of squares */ 00333 00334 00335 /*----- initialize sum -----*/ 00336 sum = (float *) malloc (sizeof(float) * c); 00337 MTEST (sum); 00338 00339 for (j = 0; j < c; j++) 00340 sum[j] = 0.0; 00341 00342 00343 /*----- accumulate sum for each level -----*/ 00344 for (i = 0; i < y.dim; i++) 00345 { 00346 j = levels[i]; 00347 sum[j] += y.elts[i]; 00348 } 00349 00350 00351 /*----- calculate SSPE -----*/ 00352 sspe = 0.0; 00353 for (i = 0; i < y.dim; i++) 00354 { 00355 j = levels[i]; 00356 diff = y.elts[i] - (sum[j]/counts[j]); 00357 sspe += diff * diff; 00358 } 00359 00360 00361 free (sum); sum = NULL; 00362 00363 00364 /*----- return SSPE -----*/ 00365 return (sspe); 00366 00367 } 00368 00369 00370 /*---------------------------------------------------------------------------*/ 00371 /* 00372 Calculate the F-statistic for lack of fit. 00373 */ 00374 00375 float calc_flof 00376 ( 00377 int n, /* number of data points */ 00378 int p, /* number of parameters in the full model */ 00379 int c, /* number of unique rows in ind. var. matrix */ 00380 float sse, /* error sum of squares from full model */ 00381 float sspe /* error sum of squares due to pure error */ 00382 ) 00383 00384 { 00385 const float EPSILON = 1.0e-5; /* protection against divide by zero */ 00386 float mspe; /* mean square error due to pure error */ 00387 float sslf; /* error sum of squares due to lack of fit */ 00388 float mslf; /* mean square error due to lack of fit */ 00389 float flof; /* F-statistic for lack of fit */ 00390 00391 00392 /*----- calculate mean sum of squares due to pure error -----*/ 00393 mspe = sspe / (n - c); 00394 00395 00396 /*----- calculate mean sum of squares due to lack of fit -----*/ 00397 sslf = sse - sspe; 00398 mslf = sslf / (c - p); 00399 00400 00401 /*----- calculate F-statistic for lack of fit -----*/ 00402 if (mspe < EPSILON) 00403 flof = 0.0; 00404 else 00405 flof = mslf / mspe; 00406 00407 00408 /*----- return F-statistic for lack of fit -----*/ 00409 return (flof); 00410 00411 } 00412 00413 00414 /*---------------------------------------------------------------------------*/ 00415 /* 00416 Calculate the regression coefficients. 00417 */ 00418 00419 void calc_coef 00420 ( 00421 matrix xtxinvxt, /* matrix: (1/(X'X))X' */ 00422 vector y, /* vector of measured data */ 00423 vector * coef /* vector of regression parameters */ 00424 ) 00425 00426 { 00427 00428 /*----- calculate regression coefficients -----*/ 00429 vector_multiply (xtxinvxt, y, coef); 00430 00431 } 00432 00433 00434 /*---------------------------------------------------------------------------*/ 00435 /* 00436 Calculate least squares estimators under the reduced model. 00437 */ 00438 00439 void calc_rcoef 00440 ( 00441 matrix a, /* constant matrix for least squares calculation */ 00442 vector coef, /* vector of regression parameters */ 00443 vector * rcoef /* reduced model regression coefficients */ 00444 ) 00445 00446 { 00447 00448 /*----- calculate reduced model regression coefficients -----*/ 00449 vector_multiply (a, coef, rcoef); 00450 00451 } 00452 00453 00454 /*---------------------------------------------------------------------------*/ 00455 /* 00456 Calculate linear combinations of regression coefficients. 00457 */ 00458 00459 void calc_lcoef 00460 ( 00461 matrix c, /* matrix representing GLT linear constraints */ 00462 vector coef, /* vector of regression parameters */ 00463 vector * lcoef /* linear combinations of regression parameters */ 00464 ) 00465 00466 { 00467 00468 /*----- calculate linear combinations -----*/ 00469 vector_multiply (c, coef, lcoef); 00470 00471 } 00472 00473 00474 /*---------------------------------------------------------------------------*/ 00475 /* 00476 Calculate standard deviations and t-statistics for the regression 00477 coefficients. 00478 */ 00479 00480 void calc_tcoef 00481 ( 00482 int n, /* number of data points */ 00483 int p, /* number of parameters in the full model */ 00484 float sse, /* error sum of squares */ 00485 matrix xtxinv, /* matrix: 1/(Xf'Xf) */ 00486 vector coef, /* vector of regression parameters */ 00487 vector * scoef, /* std. devs. for regression parameters */ 00488 vector * tcoef /* t-statistics for regression parameters */ 00489 ) 00490 00491 { 00492 const float MAXT = 1000.0; /* maximum value for t-statistic */ 00493 const float EPSILON = 1.0e-5; /* protection against divide by zero */ 00494 int df; /* error degrees of freedom */ 00495 float mse; /* mean square error */ 00496 register int i; /* parameter index */ 00497 float stddev; /* standard deviation for parameter estimate */ 00498 float tstat; /* t-statistic for parameter estimate */ 00499 float num; /* numerator of t-statistic */ 00500 float var; /* variance for parameter estimate */ 00501 00502 00503 /*----- Create vectors -----*/ 00504 vector_create (p, scoef); 00505 vector_create (p, tcoef); 00506 00507 00508 /*----- Calculate mean square error -----*/ 00509 df = n - p; 00510 mse = sse / df; 00511 00512 00513 for (i = 0; i < xtxinv.rows; i++) 00514 { 00515 /*----- Calculate standard deviation for regression parameters -----*/ 00516 var = mse * xtxinv.elts[i][i]; 00517 if (var <= 0.0) stddev = 0.0; 00518 else stddev = sqrt (var); 00519 scoef->elts[i] = stddev; 00520 00521 00522 /*----- Calculate t-statistic for regression parameters -----*/ 00523 num = coef.elts[i]; 00524 if (num > MAXT*stddev) tstat = MAXT; 00525 else if (num < -MAXT*stddev) tstat = -MAXT; 00526 else if (stddev < EPSILON) tstat = 0.0; 00527 else tstat = num / stddev; 00528 00529 00530 /*----- Limit range of values for t-statistic -----*/ 00531 if (tstat < -MAXT) tstat = -MAXT; 00532 if (tstat > MAXT) tstat = MAXT; 00533 00534 tcoef->elts[i] = tstat; 00535 } 00536 } 00537 00538 00539 /*---------------------------------------------------------------------------*/ 00540 /* 00541 Calculate the F-statistic for significance of the regression. 00542 */ 00543 00544 float calc_freg 00545 ( 00546 int n, /* number of data points */ 00547 int p, /* number of parameters in the full model */ 00548 int q, /* number of parameters in the rdcd model */ 00549 float ssef, /* error sum of squares from full model */ 00550 float sser /* error sum of squares from reduced model */ 00551 ) 00552 00553 { 00554 const float MAXF = 1000.0; /* maximum value for F-statistic */ 00555 const float EPSILON = 1.0e-5; /* protection against divide by zero */ 00556 float msef; /* mean square error for the full model */ 00557 float msreg; /* mean square due to the regression */ 00558 float freg; /* F-statistic for the full regression model */ 00559 00560 00561 /*----- Order of reduced model = order of full model ??? -----*/ 00562 if (p <= q) return (0.0); 00563 00564 00565 /*----- Calculate numerator and denominator of F-statistic -----*/ 00566 msreg = (sser - ssef) / (p - q); if (msreg < 0.0) msreg = 0.0; 00567 msef = ssef / (n - p); if (msef < 0.0) msef = 0.0; 00568 00569 if (msreg > MAXF*msef) freg = MAXF; 00570 else 00571 if (msef < EPSILON) 00572 freg = 0.0; 00573 else 00574 freg = msreg / msef; 00575 00576 00577 /*----- Limit range of values for F-statistic -----*/ 00578 if (freg < 0.0) freg = 0.0; 00579 else if (freg > MAXF) freg = MAXF; 00580 00581 00582 /*----- Return F-statistic for significance of the regression -----*/ 00583 return (freg); 00584 00585 } 00586 00587 00588 /*---------------------------------------------------------------------------*/ 00589 /* 00590 Calculate the coefficient of multiple determination R^2. 00591 */ 00592 00593 float calc_rsqr 00594 ( 00595 float ssef, /* error sum of squares from full model */ 00596 float sser /* error sum of squares from reduced model */ 00597 ) 00598 00599 { 00600 const float EPSILON = 1.0e-5; /* protection against divide by zero */ 00601 float rsqr; /* coeff. of multiple determination R^2 */ 00602 00603 00604 /*----- coefficient of multiple determination R^2 -----*/ 00605 if (sser < EPSILON) 00606 rsqr = 0.0; 00607 else 00608 rsqr = (sser - ssef) / sser; 00609 00610 00611 /*----- Limit range of values for R^2 -----*/ 00612 if (rsqr < 0.0) rsqr = 0.0; 00613 else if (rsqr > 1.0) rsqr = 1.0; 00614 00615 00616 /*----- Return coefficient of multiple determination R^2 -----*/ 00617 return (rsqr); 00618 } 00619 00620 00621 /*---------------------------------------------------------------------------*/ 00622 00623 00624 00625 00626