Doxygen Source Code Documentation

Main Page Alphabetical List Data Structures File List Data Fields Globals Search
mfwddct.c

00001 
00002 /*
00003  * mfwddct.c (derived from jfwddct.c, which carries the following info)
00004  *
00005  * Copyright (C) 1991, 1992, Thomas G. Lane. This file is part of the
00006  * Independent JPEG Group's software. For conditions of distribution and use,
00007  * see the accompanying README file.
00008  *
00009  * This file contains the basic DCT (Discrete Cosine Transform) transformation
00010  * subroutine.
00011  *
00012  * This implementation is based on Appendix A.2 of the book "Discrete Cosine
00013  * Transform---Algorithms, Advantages, Applications" by K.R. Rao and P. Yip
00014  * (Academic Press, Inc, London, 1990). It uses scaled fixed-point arithmetic
00015  * instead of floating point.
00016  */
00017 
00018 #include "all.h"
00019 
00020 #include "dct.h"
00021 #include "mtypes.h"
00022 #include "opts.h"
00023 
00024 /*
00025  * The poop on this scaling stuff is as follows:
00026  *
00027  * We have to do addition and subtraction of the integer inputs, which is no
00028  * problem, and multiplication by fractional constants, which is a problem to
00029  * do in integer arithmetic.  We multiply all the constants by DCT_SCALE and
00030  * convert them to integer constants (thus retaining LG2_DCT_SCALE bits of
00031  * precision in the constants).  After doing a multiplication we have to
00032  * divide the product by DCT_SCALE, with proper rounding, to produce the
00033  * correct output.  The division can be implemented cheaply as a right shift
00034  * of LG2_DCT_SCALE bits.  The DCT equations also specify an additional
00035  * division by 2 on the final outputs; this can be folded into the
00036  * right-shift by shifting one more bit (see UNFIXH).
00037  *
00038  * If you are planning to recode this in assembler, you might want to set
00039  * LG2_DCT_SCALE to 15.  This loses a bit of precision, but then all the
00040  * multiplications are between 16-bit quantities (given 8-bit JSAMPLEs!) so
00041  * you could use a signed 16x16=>32 bit multiply instruction instead of full
00042  * 32x32 multiply.  Unfortunately there's no way to describe such a multiply
00043  * portably in C, so we've gone for the extra bit of accuracy here.
00044  */
00045 
00046 #define EIGHT_BIT_SAMPLES
00047 #ifdef EIGHT_BIT_SAMPLES
00048 #define LG2_DCT_SCALE 16
00049 #else
00050 #define LG2_DCT_SCALE 15        /* lose a little precision to avoid overflow */
00051 #endif
00052 
00053 #define ONE     ((int32) 1)
00054 
00055 #define DCT_SCALE (ONE << LG2_DCT_SCALE)
00056 
00057 /* In some places we shift the inputs left by a couple more bits, */
00058 /* so that they can be added to fractional results without too much */
00059 /* loss of precision. */
00060 #define LG2_OVERSCALE 2
00061 #define OVERSCALE  (ONE << LG2_OVERSCALE)
00062 #define OVERSHIFT(x)  ((x) <<= LG2_OVERSCALE)
00063 
00064 /* Scale a fractional constant by DCT_SCALE */
00065 #define FIX(x)  ((int32) ((x) * DCT_SCALE + 0.5))
00066 
00067 /* Scale a fractional constant by DCT_SCALE/OVERSCALE */
00068 /* Such a constant can be multiplied with an overscaled input */
00069 /* to produce something that's scaled by DCT_SCALE */
00070 #define FIXO(x)  ((int32) ((x) * DCT_SCALE / OVERSCALE + 0.5))
00071 
00072 /* Descale and correctly round a value that's scaled by DCT_SCALE */
00073 #define UNFIX(x)   RIGHT_SHIFT((x) + (ONE << (LG2_DCT_SCALE-1)), LG2_DCT_SCALE)
00074 
00075 /* Same with an additional division by 2, ie, correctly rounded UNFIX(x/2) */
00076 #define UNFIXH(x)  RIGHT_SHIFT((x) + (ONE << LG2_DCT_SCALE), LG2_DCT_SCALE+1)
00077 
00078 /* Take a value scaled by DCT_SCALE and round to integer scaled by OVERSCALE */
00079 #define UNFIXO(x)  RIGHT_SHIFT((x) + (ONE << (LG2_DCT_SCALE-1-LG2_OVERSCALE)),\
00080                                LG2_DCT_SCALE-LG2_OVERSCALE)
00081 
00082 /* Here are the constants we need */
00083 /* SIN_i_j is sine of i*pi/j, scaled by DCT_SCALE */
00084 /* COS_i_j is cosine of i*pi/j, scaled by DCT_SCALE */
00085 
00086 #define SIN_1_4 FIX(0.707106781)
00087 #define COS_1_4 SIN_1_4
00088 
00089 #define SIN_1_8 FIX(0.382683432)
00090 #define COS_1_8 FIX(0.923879533)
00091 #define SIN_3_8 COS_1_8
00092 #define COS_3_8 SIN_1_8
00093 
00094 #define SIN_1_16 FIX(0.195090322)
00095 #define COS_1_16 FIX(0.980785280)
00096 #define SIN_7_16 COS_1_16
00097 #define COS_7_16 SIN_1_16
00098 
00099 #define SIN_3_16 FIX(0.555570233)
00100 #define COS_3_16 FIX(0.831469612)
00101 #define SIN_5_16 COS_3_16
00102 #define COS_5_16 SIN_3_16
00103 
00104 /* OSIN_i_j is sine of i*pi/j, scaled by DCT_SCALE/OVERSCALE */
00105 /* OCOS_i_j is cosine of i*pi/j, scaled by DCT_SCALE/OVERSCALE */
00106 
00107 #define OSIN_1_4 FIXO(0.707106781)
00108 #define OCOS_1_4 OSIN_1_4
00109 
00110 #define OSIN_1_8 FIXO(0.382683432)
00111 #define OCOS_1_8 FIXO(0.923879533)
00112 #define OSIN_3_8 OCOS_1_8
00113 #define OCOS_3_8 OSIN_1_8
00114 
00115 #define OSIN_1_16 FIXO(0.195090322)
00116 #define OCOS_1_16 FIXO(0.980785280)
00117 #define OSIN_7_16 OCOS_1_16
00118 #define OCOS_7_16 OSIN_1_16
00119 
00120 #define OSIN_3_16 FIXO(0.555570233)
00121 #define OCOS_3_16 FIXO(0.831469612)
00122 #define OSIN_5_16 OCOS_3_16
00123 #define OCOS_5_16 OSIN_3_16
00124 
00125 /* Prototypes */
00126 void reference_fwd_dct _ANSI_ARGS_((Block block, Block dest));
00127 void mp_fwd_dct_fast _ANSI_ARGS_((Block data2d, Block dest2d));
00128 void init_fdct _ANSI_ARGS_((void));
00129 
00130 /*
00131  * --------------------------------------------------------------
00132  *
00133  * mp_fwd_dct_block2 --
00134  *
00135  * Select the appropriate mp_fwd_dct routine
00136  *
00137  * Results: None
00138  *
00139  * Side effects: None
00140  *
00141  * --------------------------------------------------------------
00142  */
00143 extern boolean pureDCT;
00144 void
00145 mp_fwd_dct_block2(data, dest)
00146     Block data, dest;
00147 {
00148   if (pureDCT) reference_fwd_dct(data, dest);
00149   else mp_fwd_dct_fast(data, dest);
00150 }
00151 
00152 /*
00153  * --------------------------------------------------------------
00154  *
00155  * mp_fwd_dct_fast --
00156  *
00157  * Perform the forward DCT on one block of samples.
00158  *
00159  * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT on each
00160  * column.
00161  *
00162  * Results: None
00163  *
00164  * Side effects: Overwrites the input data
00165  *
00166  * --------------------------------------------------------------
00167  */
00168 
00169 void
00170 mp_fwd_dct_fast(data2d, dest2d)
00171     Block data2d, dest2d;
00172 {
00173     int16 *data = (int16 *) data2d;     /* this algorithm wants
00174                                          * a 1-d array */
00175     int16 *dest = (int16 *) dest2d;
00176     int pass, rowctr;
00177     register int16 *inptr, *outptr;
00178     int16 workspace[DCTSIZE_SQ];
00179     SHIFT_TEMPS
00180 
00181 #ifdef ndef
00182     {
00183         int y;
00184 
00185         printf("fwd_dct (beforehand):\n");
00186         for (y = 0; y < 8; y++)
00187             printf("%4d %4d %4d %4d %4d %4d %4d %4d\n",
00188                    data2d[y][0], data2d[y][1],
00189                    data2d[y][2], data2d[y][3],
00190                    data2d[y][4], data2d[y][5],
00191                    data2d[y][6], data2d[y][7]);
00192     }
00193 #endif
00194 
00195     /*
00196      * Each iteration of the inner loop performs one 8-point 1-D DCT. It
00197      * reads from a *row* of the input matrix and stores into a *column*
00198      * of the output matrix.  In the first pass, we read from the data[]
00199      * array and store into the local workspace[].  In the second pass,
00200      * we read from the workspace[] array and store into data[], thus
00201      * performing the equivalent of a columnar DCT pass with no variable
00202      * array indexing.
00203      */
00204 
00205     inptr = data;               /* initialize pointers for first pass */
00206     outptr = workspace;
00207     for (pass = 1; pass >= 0; pass--) {
00208         for (rowctr = DCTSIZE - 1; rowctr >= 0; rowctr--) {
00209             /*
00210              * many tmps have nonoverlapping lifetime -- flashy
00211              * register colourers should be able to do this lot
00212              * very well
00213              */
00214             int32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
00215             int32 tmp10, tmp11, tmp12, tmp13;
00216             int32 tmp14, tmp15, tmp16, tmp17;
00217             int32 tmp25, tmp26;
00218             /* SHIFT_TEMPS */
00219 
00220             /* temp0 through tmp7:  -512 to +512 */
00221             /* if I-block, then -256 to +256 */
00222             tmp0 = inptr[7] + inptr[0];
00223             tmp1 = inptr[6] + inptr[1];
00224             tmp2 = inptr[5] + inptr[2];
00225             tmp3 = inptr[4] + inptr[3];
00226             tmp4 = inptr[3] - inptr[4];
00227             tmp5 = inptr[2] - inptr[5];
00228             tmp6 = inptr[1] - inptr[6];
00229             tmp7 = inptr[0] - inptr[7];
00230 
00231             /* tmp10 through tmp13:  -1024 to +1024 */
00232             /* if I-block, then -512 to +512 */
00233             tmp10 = tmp3 + tmp0;
00234             tmp11 = tmp2 + tmp1;
00235             tmp12 = tmp1 - tmp2;
00236             tmp13 = tmp0 - tmp3;
00237 
00238             outptr[0] = (int16) UNFIXH((tmp10 + tmp11) * SIN_1_4);
00239             outptr[DCTSIZE * 4] = (int16) UNFIXH((tmp10 - tmp11) * COS_1_4);
00240 
00241             outptr[DCTSIZE * 2] = (int16) UNFIXH(tmp13 * COS_1_8 + tmp12 * SIN_1_8);
00242             outptr[DCTSIZE * 6] = (int16) UNFIXH(tmp13 * SIN_1_8 - tmp12 * COS_1_8);
00243 
00244             tmp16 = UNFIXO((tmp6 + tmp5) * SIN_1_4);
00245             tmp15 = UNFIXO((tmp6 - tmp5) * COS_1_4);
00246 
00247             OVERSHIFT(tmp4);
00248             OVERSHIFT(tmp7);
00249 
00250             /*
00251              * tmp4, tmp7, tmp15, tmp16 are overscaled by
00252              * OVERSCALE
00253              */
00254 
00255             tmp14 = tmp4 + tmp15;
00256             tmp25 = tmp4 - tmp15;
00257             tmp26 = tmp7 - tmp16;
00258             tmp17 = tmp7 + tmp16;
00259 
00260             outptr[DCTSIZE] = (int16) UNFIXH(tmp17 * OCOS_1_16 + tmp14 * OSIN_1_16);
00261             outptr[DCTSIZE * 7] = (int16) UNFIXH(tmp17 * OCOS_7_16 - tmp14 * OSIN_7_16);
00262             outptr[DCTSIZE * 5] = (int16) UNFIXH(tmp26 * OCOS_5_16 + tmp25 * OSIN_5_16);
00263             outptr[DCTSIZE * 3] = (int16) UNFIXH(tmp26 * OCOS_3_16 - tmp25 * OSIN_3_16);
00264 
00265             inptr += DCTSIZE;   /* advance inptr to next row */
00266             outptr++;           /* advance outptr to next column */
00267         }
00268         /* end of pass; in case it was pass 1, set up for pass 2 */
00269         inptr = workspace;
00270         outptr = dest;
00271     }
00272 #ifdef ndef
00273     {
00274         int y;
00275 
00276         printf("fwd_dct (afterward):\n");
00277         for (y = 0; y < 8; y++)
00278             printf("%4d %4d %4d %4d %4d %4d %4d %4d\n",
00279                    dest2d[y][0], dest2d[y][1],
00280                    dest2d[y][2], dest2d[y][3],
00281                    dest2d[y][4], dest2d[y][5],
00282                    dest2d[y][6], dest2d[y][7]);
00283     }
00284 #endif
00285 }
00286 
00287 
00288 /* Modifies from the MPEG2 verification coder */
00289 /* fdctref.c, forward discrete cosine transform, double precision           */
00290 
00291 /* Copyright (C) 1994, MPEG Software Simulation Group. All Rights Reserved. */
00292 
00293 /*
00294  * Disclaimer of Warranty
00295  *
00296  * These software programs are available to the user without any license fee or
00297  * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
00298  * any and all warranties, whether express, implied, or statuary, including any
00299  * implied warranties or merchantability or of fitness for a particular
00300  * purpose.  In no event shall the copyright-holder be liable for any
00301  * incidental, punitive, or consequential damages of any kind whatsoever
00302  * arising from the use of these programs.
00303  *
00304  * This disclaimer of warranty extends to the user of these programs and user's
00305  * customers, employees, agents, transferees, successors, and assigns.
00306  *
00307  * The MPEG Software Simulation Group does not represent or warrant that the
00308  * programs furnished hereunder are free of infringement of any third-party
00309  * patents.
00310  *
00311  * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
00312  * are subject to royalty fees to patent holders.  Many of these patents are
00313  * general enough such that they are unavoidable regardless of implementation
00314  * design.
00315  *
00316  */
00317 
00318 #ifndef PI
00319 #ifdef M_PI
00320 #define PI M_PI
00321 #else
00322 #define PI 3.14159265358979323846
00323 #endif
00324 #endif
00325 
00326 /* private data */
00327 static double trans_coef[8][8]; /* transform coefficients */
00328 
00329 void init_fdct()
00330 {
00331   int i, j;
00332   double s;
00333 
00334   for (i=0; i<8; i++)
00335   {
00336     s = (i==0) ? sqrt(0.125) : 0.5;
00337 
00338     for (j=0; j<8; j++)
00339       trans_coef[i][j] = s * cos((PI/8.0)*i*(j+0.5));
00340   }
00341 }
00342 
00343 void reference_fwd_dct(block, dest)
00344 Block block, dest;
00345 {
00346   int i, j, k;
00347   double s;
00348   double tmp[64];
00349 
00350   if (DoLaplace) {
00351     LaplaceNum++;
00352   }
00353 
00354   for (i=0; i<8; i++)
00355     for (j=0; j<8; j++)
00356     {
00357       s = 0.0;
00358 
00359       for (k=0; k<8; k++)
00360         s += trans_coef[j][k] * block[i][k];
00361 
00362       tmp[8*i+j] = s;
00363     }
00364 
00365   for (i=0; i<8; i++)
00366     for (j=0; j<8; j++)
00367     {
00368       s = 0.0;
00369 
00370       for (k=0; k<8; k++)
00371         s += trans_coef[i][k] * tmp[8*k+j];
00372 
00373       if (collect_quant) {
00374         fprintf(collect_quant_fp, "%d %f\n", 8*i+j, s);
00375       } 
00376       if (DoLaplace) {
00377         L1[LaplaceCnum][i*8+j] += s*s;
00378         L2[LaplaceCnum][i*8+j] += s;
00379       }
00380 
00381 
00382       dest[i][j] = (int)floor(s+0.499999);
00383       /*
00384        * reason for adding 0.499999 instead of 0.5:
00385        * s is quite often x.5 (at least for i and/or j = 0 or 4)
00386        * and setting the rounding threshold exactly to 0.5 leads to an
00387        * extremely high arithmetic implementation dependency of the result;
00388        * s being between x.5 and x.500001 (which is now incorrectly rounded
00389        * downwards instead of upwards) is assumed to occur less often
00390        * (if at all)
00391        */
00392     }
00393 }
AFNI/NIfTI Server

Sections

Personal tools

Navigation

Doxygen Source Code Documentation

mfwddct.c