00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071 #undef SETUP_BLAS1
00072 #undef SETUP_BLAS2
00073 #undef DOTP
00074 #undef VSUB
00075 #undef MATVEC
00076 #undef SUBMATVEC
00077
00078 #if defined(USE_SCSLBLAS)
00079 # include <scsl_blas.h>
00080 # define SETUP_BLAS1
00081 # undef SETUP_BLAS2
00082 # define TRANSA "T"
00083 #elif defined(USE_SUNPERF)
00084 # include <sunperf.h>
00085 # define SETUP_BLAS1
00086 # undef SETUP_BLAS2
00087 # define TRANSA 'T'
00088 #endif
00089
00090
00091
00092 #ifdef SETUP_BLAS1
00093 # define DOTP(n,x,y,z) *(z)=ddot(n,x,1,y,1)
00094 # define VSUB(n,x,y,z) (memcpy(z,x,sizeof(double)*n),daxpy(n,-1.0,y,1,z,1))
00095 #endif
00096
00097
00098
00099
00100
00101
00102
00103
00104 #ifdef DONT_USE_MATRIX_MAT
00105 # undef SETUP_BLAS2
00106 #endif
00107
00108 #ifdef SETUP_BLAS2
00109
00110 # define MATVEC(m,v,z) dgemv( TRANSA , (m).cols , (m).rows , \
00111 1.0 , (m).mat , (m).cols , \
00112 (v).elts , 1 , 0.0 , (z).elts , 1 )
00113
00114 # define SUBMATVEC(m,v,z) dgemv( TRANSA , (m).cols , (m).rows , \
00115 -1.0 , (m).mat , (m).cols , \
00116 (v).elts , 1 , 1.0 , (z).elts , 1 )
00117 #endif
00118
00119
00120 static double flops=0.0 ;
00121 double get_matrix_flops(void){ return flops; }
00122
00123 static double dotnum=0.0 , dotsum=0.0 ;
00124 double get_matrix_dotlen(void){ return (dotnum > 0.0) ? dotsum/dotnum : 0.0 ; }
00125
00126 #define ENABLE_FLOPS
00127
00128
00129
00130
00131
00132 void matrix_error (char * message)
00133 {
00134 printf ("Matrix error: %s \n", message);
00135 exit (1);
00136 }
00137
00138
00139
00140
00141
00142
00143
00144 void matrix_initialize (matrix * m)
00145 {
00146 m->rows = 0;
00147 m->cols = 0;
00148 m->elts = NULL;
00149 #ifndef DONT_USE_MATRIX_MAT
00150 m->mat = NULL ;
00151 #endif
00152 }
00153
00154
00155
00156
00157
00158
00159
00160 void matrix_destroy (matrix * m)
00161 {
00162 if (m->elts != NULL){
00163 #ifdef DONT_USE_MATRIX_MAT
00164 int i ;
00165 for( i=0 ; i < m->rows ; i++ )
00166 if( m->elts[i] != NULL ) free(m->elts[i]) ;
00167 #endif
00168 free(m->elts) ;
00169 }
00170 #ifndef DONT_USE_MATRIX_MAT
00171 if( m->mat != NULL) free (m->mat ) ;
00172 #endif
00173 matrix_initialize (m);
00174 }
00175
00176
00177
00178
00179
00180
00181 void matrix_create (int rows, int cols, matrix * m)
00182 {
00183 register int i ;
00184
00185 matrix_destroy (m);
00186
00187 if ((rows < 0) || (cols < 0))
00188 matrix_error ("Illegal dimensions for new matrix");
00189
00190 m->rows = rows;
00191 m->cols = cols;
00192 if ((rows < 1) || (cols < 1)) return;
00193
00194 m->elts = (double **) malloc (sizeof(double *) * rows);
00195 if (m->elts == NULL)
00196 matrix_error ("Memory allocation error");
00197
00198 #ifdef DONT_USE_MATRIX_MAT
00199 for (i = 0; i < rows; i++){
00200 m->elts[i] = (double *) calloc (sizeof(double) , cols);
00201 if (m->elts[i] == NULL) matrix_error ("Memory allocation error");
00202 }
00203 #else
00204 m->mat = (double *) calloc( sizeof(double) , rows*cols ) ;
00205 if( m->mat == NULL )
00206 matrix_error ("Memory allocation error");
00207 for (i = 0; i < rows; i++)
00208 m->elts[i] = m->mat + (i*cols) ;
00209 #endif
00210 }
00211
00212
00213
00214
00215
00216
00217
00218 void matrix_print (matrix m)
00219 {
00220 int i, j;
00221 int rows, cols;
00222 double val ;
00223 int ipr ;
00224
00225 rows = m.rows;
00226 cols = m.cols;
00227
00228 for( i=0 ; i < rows ; i++ ){
00229 for( j=0 ; j < cols ; j++ ){
00230 val = (int)m.elts[i][j] ;
00231 if( val != m.elts[i][j] || fabs(val) > 9.0 ) goto zork ;
00232 }
00233 }
00234 zork:
00235 ipr = (i==rows && j==cols) ;
00236
00237 for (i = 0; i < rows; i++)
00238 {
00239 for (j = 0; j < cols; j++)
00240 if( ipr ) printf (" %2d" , (int)m.elts[i][j]);
00241 else printf (" %10.4g", m.elts[i][j]);
00242 printf (" \n");
00243 }
00244 printf (" \n"); fflush(stdout) ;
00245 }
00246
00247
00248
00249
00250
00251
00252
00253 void matrix_sprint (char * s, matrix m)
00254 {
00255 printf ("%s \n", s);
00256 matrix_print (m);
00257 }
00258
00259
00260
00261
00262
00263
00264
00265 void matrix_file_write (char * filename, matrix m)
00266 {
00267 int i, j;
00268 int rows, cols;
00269 FILE * outfile = NULL;
00270
00271
00272
00273 if (filename == NULL) matrix_error ("Missing matrix file name");
00274
00275
00276 outfile = fopen (filename, "w");
00277
00278 rows = m.rows;
00279 cols = m.cols;
00280
00281 for (i = 0; i < rows; i++)
00282 {
00283 for (j = 0; j < cols; j++)
00284 fprintf (outfile, " %g", m.elts[i][j]);
00285 fprintf (outfile, " \n");
00286 }
00287 fprintf (outfile, " \n");
00288
00289 fclose (outfile);
00290 }
00291
00292
00293
00294
00295
00296
00297 void matrix_enter (matrix * m)
00298 {
00299 int rows, cols;
00300 int i, j;
00301 float fval;
00302
00303 printf ("Enter number of rows: "); fflush(stdout) ;
00304 scanf ("%d", &rows);
00305 printf ("Enter number of cols: "); fflush(stdout) ;
00306 scanf ("%d", &cols);
00307
00308 matrix_create (rows, cols, m);
00309
00310 for (i = 0; i < rows; i++)
00311 for (j = 0; j < cols; j++)
00312 {
00313 printf ("elts[%d][%d] = ", i, j); fflush(stdout);
00314 scanf ("%f", &fval);
00315 m->elts[i][j] = fval;
00316 }
00317 }
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328 void matrix_file_read (char * filename, int rows, int cols, matrix * m,
00329 int error_exit)
00330 {
00331 int i, j;
00332
00333 MRI_IMAGE * im, * flim;
00334
00335 float * far;
00336 char message [80];
00337
00338
00339
00340 if (filename == NULL) matrix_error ("Missing matrix file name");
00341
00342
00343
00344 flim = mri_read_1D(filename);
00345 if (flim == NULL)
00346 if (error_exit)
00347 {
00348 sprintf (message, "Unable to read matrix from file: %s", filename);
00349 matrix_error (message);
00350 }
00351 else
00352 {
00353 matrix_destroy (m);
00354 return;
00355 }
00356
00357
00358
00359 far = MRI_FLOAT_PTR(flim);
00360
00361
00362
00363 if ( (rows != flim->nx) || (cols != flim->ny) )
00364 if (error_exit)
00365 {
00366 sprintf (message,
00367 "In matrix file: %s Expected: %d x %d Actual: %d x %d",
00368 filename, rows, cols, flim->nx, flim->ny);
00369 matrix_error (message);
00370 }
00371 else
00372 {
00373 matrix_destroy (m);
00374 return;
00375 }
00376
00377
00378 matrix_create (rows, cols, m);
00379
00380
00381
00382 for (i = 0; i < rows; i++)
00383 for (j = 0; j < cols; j++)
00384 m->elts[i][j] = far[i + j*rows];
00385
00386
00387 mri_free (flim); flim = NULL;
00388
00389 }
00390
00391
00392
00393
00394
00395
00396
00397 void array_to_matrix (int rows, int cols, float ** f, matrix * m)
00398 {
00399 register int i, j;
00400
00401 matrix_create (rows, cols, m);
00402
00403 for (i = 0; i < rows; i++)
00404 for (j = 0; j < cols; j++)
00405 m->elts[i][j] = f[i][j];
00406 }
00407
00408
00409
00410
00411
00412
00413
00414 void matrix_equate (matrix a, matrix * b)
00415 {
00416 register int i, j;
00417 register int rows, cols;
00418
00419 rows = a.rows;
00420 cols = a.cols;
00421
00422 matrix_create (rows, cols, b);
00423
00424 for (i = 0; i < rows; i++){
00425 #if 0
00426 for (j = 0; j < cols; j++)
00427 b->elts[i][j] = a.elts[i][j];
00428 #else
00429 if( cols > 0 )
00430 memcpy( b->elts[i] , a.elts[i] , sizeof(double)*cols ) ;
00431 #endif
00432 }
00433 }
00434
00435
00436
00437
00438
00439
00440
00441 void matrix_extract (matrix a, int p, int * list, matrix * b)
00442 {
00443 register int i, j;
00444 register int rows, cols;
00445
00446 rows = a.rows;
00447 cols = p;
00448
00449 matrix_create (rows, cols, b);
00450
00451 for (i = 0; i < rows; i++)
00452 for (j = 0; j < cols; j++)
00453 b->elts[i][j] = a.elts[i][list[j]];
00454 }
00455
00456
00457
00458
00459
00460
00461
00462 void matrix_extract_rows (matrix a, int p, int * list, matrix * b)
00463 {
00464 register int i, j;
00465 register int rows, cols;
00466
00467 rows = p;
00468 cols = a.cols;
00469
00470 matrix_create (rows, cols, b);
00471
00472 for (i = 0; i < rows; i++)
00473 for (j = 0; j < cols; j++)
00474 b->elts[i][j] = a.elts[list[i]][j];
00475 }
00476
00477
00478
00479
00480
00481
00482
00483 void matrix_identity (int n, matrix * m)
00484 {
00485 register int i, j;
00486
00487 if (n < 0)
00488 matrix_error ("Illegal dimensions for identity matrix");
00489
00490 matrix_create (n, n, m);
00491
00492 for (i = 0; i < n; i++)
00493 for (j = 0; j < n; j++)
00494 if (i == j)
00495 m->elts[i][j] = 1.0;
00496 else
00497 m->elts[i][j] = 0.0;
00498 }
00499
00500
00501
00502
00503
00504
00505
00506 void matrix_add (matrix a, matrix b, matrix * c)
00507 {
00508 register int rows, cols;
00509 register int i, j;
00510
00511 if ((a.rows != b.rows) || (a.cols != b.cols))
00512 matrix_error ("Incompatible dimensions for matrix addition");
00513
00514 rows = a.rows;
00515 cols = a.cols;
00516
00517 matrix_create (rows, cols, c);
00518
00519 for (i = 0; i < rows; i++)
00520 for (j = 0; j < cols; j++)
00521 c->elts[i][j] = a.elts[i][j] + b.elts[i][j];
00522
00523 #ifdef ENABLE_FLOPS
00524 flops += rows*cols ;
00525 #endif
00526 }
00527
00528
00529
00530
00531
00532
00533
00534 void matrix_subtract (matrix a, matrix b, matrix * c)
00535 {
00536 register int rows, cols;
00537 register int i, j;
00538
00539 if ((a.rows != b.rows) || (a.cols != b.cols))
00540 matrix_error ("Incompatible dimensions for matrix subtraction");
00541
00542 rows = a.rows;
00543 cols = a.cols;
00544
00545 matrix_create (rows, cols, c);
00546
00547 for (i = 0; i < rows; i++)
00548 for (j = 0; j < cols; j++)
00549 c->elts[i][j] = a.elts[i][j] - b.elts[i][j];
00550
00551 #ifdef ENABLE_FLOPS
00552 flops += rows*cols ;
00553 #endif
00554 }
00555
00556
00557
00558
00559
00560
00561
00562 void matrix_multiply (matrix a, matrix b, matrix * c)
00563 {
00564 int rows, cols;
00565 register int i, j, k;
00566 register double sum ;
00567
00568 if (a.cols != b.rows)
00569 matrix_error ("Incompatible dimensions for matrix multiplication");
00570
00571 rows = a.rows;
00572 cols = b.cols;
00573
00574 matrix_create (rows, cols, c);
00575
00576 for (i = 0; i < rows; i++)
00577 for (j = 0; j < cols; j++)
00578 {
00579 sum = 0.0 ;
00580 for (k = 0; k < a.cols; k++)
00581 sum += a.elts[i][k] * b.elts[k][j];
00582 c->elts[i][j] = sum;
00583 }
00584
00585 #ifdef ENABLE_FLOPS
00586 flops += 2.0*rows*cols*cols ;
00587 #endif
00588 }
00589
00590
00591
00592
00593
00594
00595
00596 void matrix_scale (double k, matrix a, matrix * c)
00597 {
00598 register int rows, cols;
00599 register int i, j;
00600
00601 rows = a.rows;
00602 cols = a.cols;
00603
00604 matrix_create (rows, cols, c);
00605
00606 for (i = 0; i < rows; i++)
00607 for (j = 0; j < cols; j++)
00608 c->elts[i][j] = k * a.elts[i][j];
00609
00610 #ifdef ENABLE_FLOPS
00611 flops += rows*cols ;
00612 #endif
00613 }
00614
00615
00616
00617
00618
00619
00620
00621 void matrix_transpose (matrix a, matrix * t)
00622 {
00623 register int rows, cols;
00624 register int i, j;
00625
00626 rows = a.cols;
00627 cols = a.rows;
00628
00629 matrix_create (rows, cols, t);
00630 for (i = 0; i < rows; i++)
00631 for (j = 0; j < cols; j++)
00632 t->elts[i][j] = a.elts[j][i];
00633 }
00634
00635
00636
00637
00638
00639
00640
00641 int matrix_inverse (matrix a, matrix * ainv)
00642 {
00643 const double epsilon = 1.0e-10;
00644 matrix tmp;
00645 register int i, j, ii, n;
00646 register double fval;
00647 register double fmax;
00648 register double * p;
00649
00650 matrix_initialize (&tmp);
00651
00652
00653 if (a.rows != a.cols)
00654 matrix_error ("Illegal dimensions for matrix inversion");
00655
00656 #if 0
00657 matrix_sprint("matrix_inverse:",a) ;
00658 #endif
00659
00660 n = a.rows;
00661 matrix_identity (n, ainv);
00662 matrix_equate (a, &tmp);
00663
00664 for (i = 0; i < n; i++)
00665 {
00666 fmax = fabs(tmp.elts[i][i]);
00667 for (j = i+1; j < n; j++)
00668 if (fabs(tmp.elts[j][i]) > fmax)
00669 {
00670 fmax = fabs(tmp.elts[j][i]);
00671 p = tmp.elts[i];
00672 tmp.elts[i] = tmp.elts[j];
00673 tmp.elts[j] = p;
00674 p = ainv->elts[i];
00675 ainv->elts[i] = ainv->elts[j];
00676 ainv->elts[j] = p;
00677 }
00678
00679 if (fmax < epsilon)
00680 {
00681 matrix_destroy (&tmp);
00682 return (0);
00683 }
00684
00685
00686 fval = 1.0 / tmp.elts[i][i];
00687 for (j = 0; j < n; j++)
00688 {
00689 tmp.elts[i][j] *= fval;
00690 ainv->elts[i][j] *= fval;
00691 }
00692 for (ii = 0; ii < n; ii++)
00693 if (ii != i)
00694 {
00695 fval = tmp.elts[ii][i];
00696 for (j = 0; j < n; j++)
00697 {
00698 tmp.elts[ii][j] -= fval*tmp.elts[i][j];
00699 ainv->elts[ii][j] -= fval*ainv->elts[i][j];
00700 }
00701 }
00702
00703 }
00704 matrix_destroy (&tmp);
00705 #ifdef ENABLE_FLOPS
00706 flops += 3.0*n*n*n ;
00707 #endif
00708 return (1);
00709 }
00710
00711
00712
00713
00714
00715
00716
00717
00718 int matrix_inverse_dsc (matrix a, matrix * ainv)
00719 {
00720 matrix atmp;
00721 register int i, j, n;
00722 register double *diag ;
00723 int mir ;
00724
00725 if (a.rows != a.cols)
00726 matrix_error ("Illegal dimensions for matrix inversion");
00727
00728 matrix_initialize (&atmp);
00729
00730 n = a.rows;
00731 matrix_equate (a, &atmp);
00732 diag = (double *)malloc( sizeof(double)*n ) ;
00733 for( i=0 ; i < n ; i++ ){
00734 diag[i] = fabs(atmp.elts[i][i]) ;
00735 if( diag[i] == 0.0 ) diag[i] = 1.0 ;
00736 diag[i] = 1.0 / sqrt(diag[i]) ;
00737 }
00738
00739 for( i=0 ; i < n ; i++ )
00740 for( j=0 ; j < n ; j++ )
00741 atmp.elts[i][j] *= diag[i]*diag[j] ;
00742
00743 mir = matrix_inverse( atmp , ainv ) ;
00744
00745 for( i=0 ; i < n ; i++ )
00746 for( j=0 ; j < n ; j++ )
00747 ainv->elts[i][j] *= diag[i]*diag[j] ;
00748
00749 matrix_destroy (&atmp); free((void *)diag) ;
00750 #ifdef ENABLE_FLOPS
00751 flops += 4.0*n*n + 4.0*n ;
00752 #endif
00753 return (mir);
00754 }
00755
00756
00757
00758
00759
00760
00761
00762
00763 int matrix_sqrt (matrix a, matrix * s)
00764 {
00765 const int MAX_ITER = 100;
00766 int n;
00767 int ok;
00768 int iter;
00769 register float sse, psse;
00770 register int i, j;
00771 matrix x, xinv, axinv, xtemp, error;
00772
00773 matrix_initialize (&x);
00774 matrix_initialize (&xinv);
00775 matrix_initialize (&axinv);
00776 matrix_initialize (&xtemp);
00777 matrix_initialize (&error);
00778
00779
00780 if (a.rows != a.cols)
00781 matrix_error ("Illegal dimensions for matrix square root");
00782
00783
00784 n = a.rows;
00785 matrix_identity (n, &x);
00786
00787
00788 psse = 1.0e+30;
00789 for (iter = 0; iter < MAX_ITER; iter++)
00790 {
00791 ok = matrix_inverse (x, &xinv);
00792 if (! ok) return (0);
00793 matrix_multiply (a, xinv, &axinv);
00794 matrix_add (x, axinv, &xtemp);
00795 matrix_scale (0.5, xtemp, &x);
00796
00797 matrix_multiply (x, x, &xtemp);
00798 matrix_subtract (a, xtemp, &error);
00799 sse = 0.0;
00800 for (i = 0; i < n; i++)
00801 for (j = 0; j < n; j++)
00802 sse += error.elts[i][j] * error.elts[i][j] ;
00803
00804 if (sse >= psse) break;
00805
00806 psse = sse;
00807 }
00808
00809 if (iter == MAX_ITER) return (0);
00810
00811 matrix_equate (x, s);
00812
00813 matrix_destroy (&x);
00814 matrix_destroy (&xinv);
00815 matrix_destroy (&axinv);
00816 matrix_destroy (&xtemp);
00817
00818 return (1);
00819
00820 }
00821
00822
00823
00824
00825
00826
00827
00828 void vector_initialize (vector * v)
00829 {
00830 v->dim = 0;
00831 v->elts = NULL;
00832 }
00833
00834
00835
00836
00837
00838
00839
00840 void vector_destroy (vector * v)
00841 {
00842 if (v->elts != NULL) free (v->elts);
00843 vector_initialize (v);
00844 }
00845
00846
00847
00848
00849
00850
00851
00852 void vector_create (int dim, vector * v)
00853 {
00854 register int i;
00855
00856 vector_destroy (v);
00857
00858 if (dim < 0) matrix_error ("Illegal dimensions for new vector");
00859
00860 v->dim = dim;
00861 if (dim < 1) return;
00862
00863 v->elts = (double *) calloc (sizeof(double) , dim);
00864 if (v->elts == NULL)
00865 matrix_error ("Memory allocation error");
00866
00867 }
00868
00869
00870 static void vector_create_noinit(int dim, vector * v)
00871 {
00872 register int i;
00873
00874 vector_destroy (v);
00875
00876 if (dim < 0) matrix_error ("Illegal dimensions for new vector");
00877
00878 v->dim = dim;
00879 if (dim < 1) return;
00880
00881 v->elts = (double *) malloc (sizeof(double) * dim);
00882 if (v->elts == NULL)
00883 matrix_error ("Memory allocation error");
00884 }
00885
00886
00887
00888
00889
00890
00891 void vector_print (vector v)
00892 {
00893 int i;
00894
00895 for (i = 0; i < v.dim; i++)
00896 printf (" %10.4g \n", v.elts[i]);
00897 printf (" \n"); fflush(stdout);
00898
00899 }
00900
00901
00902
00903
00904
00905
00906
00907 void vector_sprint (char * s, vector v)
00908 {
00909 printf ("%s \n", s);
00910
00911 vector_print (v);
00912 }
00913
00914
00915
00916
00917
00918
00919
00920 void vector_equate (vector a, vector * b)
00921 {
00922 register int i, dim;
00923
00924 dim = a.dim;
00925
00926 vector_create_noinit (dim, b);
00927
00928 #if 0
00929 for (i = 0; i < dim; i++)
00930 b->elts[i] = a.elts[i];
00931 #else
00932 if( dim > 0 )
00933 memcpy( b->elts , a.elts , sizeof(double)*dim ) ;
00934 #endif
00935 }
00936
00937
00938
00939
00940
00941
00942
00943 void array_to_vector (int dim, float * f, vector * v)
00944 {
00945 register int i;
00946
00947 vector_create_noinit (dim, v);
00948
00949 for (i = 0; i < dim; i++)
00950 v->elts[i] = f[i];
00951 }
00952
00953
00954
00955
00956
00957
00958
00959
00960 void column_to_vector (matrix m, int c, vector * v)
00961 {
00962 register int i;
00963 register int dim;
00964
00965 dim = m.rows;
00966 vector_create_noinit (dim, v);
00967
00968 for (i = 0; i < dim; i++)
00969 v->elts[i] = m.elts[i][c];
00970 }
00971
00972
00973
00974
00975
00976
00977
00978
00979 void vector_to_array (vector v, float * f)
00980 {
00981 register int i;
00982
00983 for (i = 0; i < v.dim; i++)
00984 f[i] = v.elts[i];
00985 }
00986
00987
00988
00989
00990
00991
00992
00993 void vector_add (vector a, vector b, vector * c)
00994 {
00995 register int i, dim;
00996
00997 if (a.dim != b.dim)
00998 matrix_error ("Incompatible dimensions for vector addition");
00999
01000 dim = a.dim;
01001
01002 vector_create_noinit (dim, c);
01003
01004 for (i = 0; i < dim; i++)
01005 c->elts[i] = a.elts[i] + b.elts[i];
01006
01007 #ifdef ENABLE_FLOPS
01008 flops += dim ;
01009 #endif
01010 }
01011
01012
01013
01014
01015
01016
01017
01018 void vector_subtract (vector a, vector b, vector * c)
01019 {
01020 register int i, dim;
01021 register double *aa,*bb,*cc ;
01022
01023 if (a.dim != b.dim)
01024 matrix_error ("Incompatible dimensions for vector subtraction");
01025
01026 dim = a.dim;
01027
01028 vector_create_noinit (dim, c);
01029
01030 aa = a.elts ; bb = b.elts ; cc = c->elts ;
01031 for (i = 0; i < dim; i++)
01032 #if 0
01033 c->elts[i] = a.elts[i] - b.elts[i];
01034 #else
01035 cc[i] = aa[i] - bb[i] ;
01036 #endif
01037
01038 #ifdef ENABLE_FLOPS
01039 flops += dim ;
01040 #endif
01041 }
01042
01043
01044
01045 #define UNROLL_VECMUL
01046
01047
01048
01049
01050
01051 void vector_multiply (matrix a, vector b, vector * c)
01052 {
01053 register int rows, cols;
01054 register int i, j;
01055 register double *bb ;
01056 register double sum ;
01057 #ifdef DOTP
01058 register double **aa , *cc ;
01059 #else
01060 register double *aa ;
01061 #endif
01062
01063
01064 if (a.cols != b.dim){
01065 char str[444] ;
01066 sprintf(str,
01067 "Incompatible dimensions for vector multiplication: %dx%d X %d",
01068 a.rows,a.cols,b.dim ) ;
01069 matrix_error(str) ;
01070 }
01071
01072 rows = a.rows;
01073 cols = a.cols;
01074
01075 vector_create_noinit (rows, c);
01076
01077 if( cols <= 0 ){
01078 for( i=0 ; i < rows ; i++ ) c->elts[i] = 0.0 ;
01079 return ;
01080 }
01081
01082 bb = b.elts ;
01083
01084 #ifdef MATVEC
01085 MATVEC( a , b , *c ) ;
01086
01087 #elif defined(DOTP)
01088 aa = a.elts ; cc = c->elts ;
01089 i = rows%2 ;
01090 if( i == 1 ) DOTP(cols,aa[0],bb,cc) ;
01091 for( ; i < rows ; i+=2 ){
01092 DOTP(cols,aa[i] ,bb,cc+i ) ;
01093 DOTP(cols,aa[i+1],bb,cc+(i+1)) ;
01094 }
01095 #else
01096
01097 #ifdef UNROLL_VECMUL
01098 if( cols%2 == 0 ){
01099 for (i = 0; i < rows; i++){
01100 sum = 0.0 ; aa = a.elts[i] ;
01101 for (j = 0; j < cols; j+=2 )
01102 sum += aa[j]*bb[j] + aa[j+1]*bb[j+1];
01103 c->elts[i] = sum ;
01104 }
01105 } else {
01106 for (i = 0; i < rows; i++){
01107 aa = a.elts[i] ; sum = aa[0]*bb[0] ;
01108 for (j = 1; j < cols; j+=2 )
01109 sum += aa[j]*bb[j] + aa[j+1]*bb[j+1];
01110 c->elts[i] = sum ;
01111 }
01112 }
01113 #else
01114 for (i = 0; i < rows; i++){
01115 sum = 0.0 ; aa = a.elts[i] ;
01116 for (j = 0; j < cols; j++ ) sum += aa[j]*bb[j] ;
01117 c->elts[i] = sum ;
01118 }
01119 #endif
01120
01121 #endif
01122
01123 #ifdef ENABLE_FLOPS
01124 flops += 2.0*rows*cols ;
01125 dotsum += rows*cols ; dotnum += rows ;
01126 #endif
01127 return ;
01128 }
01129
01130
01131
01132
01133
01134
01135
01136 double vector_multiply_subtract (matrix a, vector b, vector c, vector * d)
01137 {
01138 register int rows, cols;
01139 register int i, j;
01140 register double *bb ;
01141 #ifdef DOTP
01142 double qsum,sum , **aa , *dd,*cc,*ee ;
01143 #else
01144 register double qsum,sum, *aa ;
01145 #endif
01146
01147
01148 if (a.cols != b.dim || a.rows != c.dim )
01149 matrix_error ("Incompatible dimensions for vector multiplication-subtraction");
01150
01151 rows = a.rows;
01152 cols = a.cols;
01153
01154 vector_create_noinit (rows, d);
01155
01156 if( cols <= 0 ){
01157 qsum = 0.0 ;
01158 for( i=0 ; i < rows ; i++ ){
01159 d->elts[i] = c.elts[i] ;
01160 qsum += d->elts[i] * d->elts[i] ;
01161 }
01162 return qsum ;
01163 }
01164
01165 qsum = 0.0 ; bb = b.elts ;
01166
01167 #ifdef DOTP
01168 aa = a.elts ; dd = d->elts ; cc = c.elts ;
01169 ee = (double *)malloc(sizeof(double)*rows) ;
01170 i = rows%2 ;
01171 if( i == 1 ) DOTP(cols,aa[0],bb,ee) ;
01172 for( ; i < rows ; i+=2 ){
01173 DOTP(cols,aa[i] ,bb,ee+i ) ;
01174 DOTP(cols,aa[i+1],bb,ee+(i+1)) ;
01175 }
01176 VSUB(rows,cc,ee,dd) ;
01177 DOTP(rows,dd,dd,&qsum) ;
01178 free((void *)ee) ;
01179 #else
01180
01181 #ifdef UNROLL_VECMUL
01182 if( cols%2 == 0 ){
01183 for (i = 0; i < rows; i++){
01184 aa = a.elts[i] ; sum = c.elts[i] ;
01185 for (j = 0; j < cols; j+=2)
01186 sum -= aa[j]*bb[j] + aa[j+1]*bb[j+1];
01187 d->elts[i] = sum ; qsum += sum*sum ;
01188 }
01189 } else {
01190 for (i = 0; i < rows; i++){
01191 aa = a.elts[i] ; sum = c.elts[i] - aa[0]*bb[0] ;
01192 for (j = 1; j < cols; j+=2)
01193 sum -= aa[j]*bb[j] + aa[j+1]*bb[j+1];
01194 d->elts[i] = sum ; qsum += sum*sum ;
01195 }
01196 }
01197 #else
01198 for (i = 0; i < rows; i++){
01199 aa = a.elts[i] ; sum = c.elts[i] ;
01200 for (j = 0; j < cols; j++) sum -= aa[j] * bb[j] ;
01201 d->elts[i] = sum ; qsum += sum*sum ;
01202 }
01203 #endif
01204
01205 #endif
01206
01207 #ifdef ENABLE_FLOPS
01208 flops += 2.0*rows*(cols+1) ;
01209 dotsum += rows*cols ; dotnum += rows ;
01210 #endif
01211
01212 return qsum ;
01213 }
01214
01215
01216
01217
01218
01219
01220 double vector_dot (vector a, vector b)
01221 {
01222 register int i, dim;
01223 register double sum;
01224 register double *aa , *bb ;
01225
01226 if (a.dim != b.dim)
01227 matrix_error ("Incompatible dimensions for vector dot product");
01228
01229 dim = a.dim;
01230
01231 sum = 0.0;
01232 aa = a.elts ; bb = b.elts ;
01233 for (i = 0; i < dim; i++)
01234 #if 0
01235 sum += a.elts[i] * b.elts[i];
01236 #else
01237 sum += aa[i] * bb[i] ;
01238 #endif
01239
01240 #ifdef ENABLE_FLOPS
01241 flops += 2.0*dim ;
01242 #endif
01243 return (sum);
01244 }
01245
01246
01247
01248
01249
01250
01251 double vector_dotself( vector a )
01252 {
01253 register int i, dim;
01254 register double sum;
01255 register double *aa ;
01256
01257 dim = a.dim;
01258 sum = 0.0;
01259 aa = a.elts ;
01260 for (i = 0; i < dim; i++)
01261 #if 0
01262 sum += a.elts[i] * a.elts[i];
01263 #else
01264 sum += aa[i] * aa[i] ;
01265 #endif
01266
01267 #ifdef ENABLE_FLOPS
01268 flops += 2.0*dim ;
01269 #endif
01270 return (sum);
01271 }
01272
01273
01274
01275
01276
01277
01278 double matrix_norm( matrix a )
01279 {
01280 int i,j , rows=a.rows, cols=a.cols ;
01281 double sum , smax=0.0 ;
01282
01283 for (i = 0; i < rows; i++){
01284 sum = 0.0 ;
01285 for (j = 0; j < cols; j++) sum += fabs(a.elts[i][j]) ;
01286 if( sum > smax ) smax = sum ;
01287 }
01288 #ifdef ENABLE_FLOPS
01289 flops += 2.0*rows*cols ;
01290 #endif
01291 return smax ;
01292 }
01293
01294
01295
01296
01297
01298
01299
01300
01301
01302
01303
01304
01305
01306
01307
01308
01309 int * matrix_check_columns( matrix a , double eps )
01310 {
01311 int i,j,k , rows=a.rows , cols=a.cols ;
01312 int *iar=NULL , nar=0 ;
01313 double sumi,sumj,sumd ;
01314
01315 if( eps <= 0.0 ) eps = 1.e-5 ;
01316
01317 for( i=0 ; i < cols ; i++ ){
01318 sumi = 0.0 ;
01319 for( k=0 ; k < rows ; k++ ) sumi += a.elts[k][i] * a.elts[k][i] ;
01320 if( sumi <= 0.0 ){
01321 iar = (int *)realloc( (void *)iar , sizeof(int)*2*(nar+1) ) ;
01322 iar[2*nar] = i ; iar[2*nar+1] = -1 ; nar++ ;
01323 continue ;
01324 }
01325 for( j=i+1 ; j < cols ; j++ ){
01326 sumj = sumd = 0.0 ;
01327 for( k=0 ; k < rows ; k++ ){
01328 sumj += a.elts[k][j] * a.elts[k][j] ;
01329 sumd += a.elts[k][j] * a.elts[k][i] ;
01330 }
01331 if( sumj > 0.0 ){
01332 sumd = fabs(sumd) / sqrt(sumi*sumj) ;
01333 if( sumd >= 1.0-eps ){
01334 iar = (int *)realloc( (void *)iar , sizeof(int)*2*(nar+1) ) ;
01335 iar[2*nar] = i ; iar[2*nar+1] = j ; nar++ ;
01336 }
01337 }
01338 }
01339 }
01340
01341 if( iar != NULL ){
01342 iar = (int *)realloc( (void *)iar , sizeof(int)*2*(nar+1) ) ;
01343 iar[2*nar] = iar[2*nar+1] = -1 ;
01344 }
01345
01346 return iar ;
01347 }
01348
01349
01350
01351
01352
01353
01354
01355 double * matrix_singvals( matrix X )
01356 {
01357 int i,j,k , M=X.rows , N=X.cols ;
01358 double *a , *e , sum ;
01359
01360 a = (double *) malloc( sizeof(double)*N*N ) ;
01361 e = (double *) malloc( sizeof(double)*N ) ;
01362
01363 for( i=0 ; i < N ; i++ ){
01364 for( j=0 ; j <= i ; j++ ){
01365 sum = 0.0 ;
01366 for( k=0 ; k < M ; k++ ) sum += X.elts[k][i] * X.elts[k][j] ;
01367 a[j+N*i] = sum ;
01368 if( j < i ) a[i+N*j] = sum ;
01369 }
01370 }
01371
01372 for( i=0 ; i < N ; i++ ){
01373 if( a[i+N*i] > 0.0 ) e[i] = 1.0 / sqrt(a[i+N*i]) ;
01374 else e[i] = 1.0 ;
01375 }
01376 for( i=0 ; i < N ; i++ ){
01377 for( j=0 ; j < N ; j++ ) a[j+N*i] *= e[i]*e[j] ;
01378 }
01379
01380 symeigval_double( N , a , e ) ;
01381 free( (void *)a ) ;
01382 #ifdef ENABLE_FLOPS
01383 flops += (M+N+2.0)*N*N ;
01384 #endif
01385 return e ;
01386 }
01387
01388
01389
01390 extern void svd_double( int, int, double *, double *, double *, double * ) ;
01391
01392
01393
01394
01395
01396
01397
01398
01399 void matrix_psinv( matrix X , matrix *XtXinv , matrix *XtXinvXt )
01400 {
01401 int m = X.rows , n = X.cols , ii,jj,kk ;
01402 double *amat , *umat , *vmat , *sval , *xfac , smax,del,sum ;
01403
01404 if( m < 1 || n < 1 || m < n || (XtXinv == NULL && XtXinvXt == NULL) ) return;
01405
01406 amat = (double *)calloc( sizeof(double),m*n ) ;
01407 umat = (double *)calloc( sizeof(double),m*n ) ;
01408 vmat = (double *)calloc( sizeof(double),n*n ) ;
01409 sval = (double *)calloc( sizeof(double),n ) ;
01410 xfac = (double *)calloc( sizeof(double),n ) ;
01411
01412 #define A(i,j) amat[(i)+(j)*m]
01413 #define U(i,j) umat[(i)+(j)*m]
01414 #define V(i,j) vmat[(i)+(j)*n]
01415
01416
01417
01418 for( ii=0 ; ii < m ; ii++ )
01419 for( jj=0 ; jj < n ; jj++ ) A(ii,jj) = X.elts[ii][jj] ;
01420
01421
01422
01423 for( jj=0 ; jj < n ; jj++ ){
01424 sum = 0.0 ;
01425 for( ii=0 ; ii < m ; ii++ ) sum += A(ii,jj)*A(ii,jj) ;
01426 if( sum > 0.0 ) sum = 1.0/sqrt(sum) ;
01427 xfac[jj] = sum ;
01428 for( ii=0 ; ii < m ; ii++ ) A(ii,jj) *= sum ;
01429 }
01430
01431
01432
01433 svd_double( m , n , amat , sval , umat , vmat ) ;
01434
01435 free((void *)amat) ;
01436
01437
01438
01439 smax = sval[0] ;
01440 for( ii=1 ; ii < n ; ii++ )
01441 if( sval[ii] > smax ) smax = sval[ii] ;
01442
01443 if( smax <= 0.0 ){
01444 free((void *)xfac); free((void *)sval);
01445 free((void *)vmat); free((void *)umat); return;
01446 }
01447
01448 for( ii=0 ; ii < n ; ii++ )
01449 if( sval[ii] < 0.0 ) sval[ii] = 0.0 ;
01450
01451 #ifdef FLOATIZE
01452 #define PSINV_EPS 1.e-8
01453 #else
01454 #define PSINV_EPS 1.e-16
01455 #endif
01456
01457
01458
01459 del = PSINV_EPS * smax*smax ;
01460 for( ii=0 ; ii < n ; ii++ )
01461 sval[ii] = sval[ii] / ( sval[ii]*sval[ii] + del ) ;
01462
01463
01464
01465 if( XtXinvXt != NULL ){
01466 matrix_create( n , m , XtXinvXt ) ;
01467 for( ii=0 ; ii < n ; ii++ ){
01468 for( jj=0 ; jj < m ; jj++ ){
01469 sum = 0.0 ;
01470 for( kk=0 ; kk < n ; kk++ )
01471 sum += sval[kk] * V(ii,kk) * U(jj,kk) ;
01472 XtXinvXt->elts[ii][jj] = sum * xfac[ii] ;
01473 }
01474 }
01475 }
01476
01477 if( XtXinv != NULL ){
01478 matrix_create( n , n , XtXinv ) ;
01479 for( ii=0 ; ii < n ; ii++ ) sval[ii] = sval[ii] * sval[ii] ;
01480 matrix_create( n , n , XtXinv ) ;
01481 for( ii=0 ; ii < n ; ii++ ){
01482 for( jj=0 ; jj < n ; jj++ ){
01483 sum = 0.0 ;
01484 for( kk=0 ; kk < n ; kk++ )
01485 sum += sval[kk] * V(ii,kk) * V(jj,kk) ;
01486 XtXinv->elts[ii][jj] = sum * xfac[ii] * xfac[jj] ;
01487 }
01488 }
01489 }
01490
01491 #ifdef ENABLE_FLOPS
01492 flops += n*n*(n+2.0*m+2.0) ;
01493 #endif
01494 free((void *)xfac); free((void *)sval);
01495 free((void *)vmat); free((void *)umat); return;
01496 }