Skip to content

AFNI/NIfTI Server

Sections
Personal tools
You are here: Home » AFNI » Documentation

Doxygen Source Code Documentation


Main Page   Alphabetical List   Data Structures   File List   Data Fields   Globals   Search  

motion_comp_altivec.c

Go to the documentation of this file.
00001 /*
00002  * motion_comp_altivec.c
00003  * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
00004  * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
00005  *
00006  * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
00007  * See http://libmpeg2.sourceforge.net/ for updates.
00008  *
00009  * mpeg2dec is free software; you can redistribute it and/or modify
00010  * it under the terms of the GNU General Public License as published by
00011  * the Free Software Foundation; either version 2 of the License, or
00012  * (at your option) any later version.
00013  *
00014  * mpeg2dec is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU General Public License
00020  * along with this program; if not, write to the Free Software
00021  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022  */
00023 
00024 #ifndef __ALTIVEC__
00025 
00026 #include "config.h"
00027 
00028 #ifdef ARCH_PPC
00029 
00030 #include <inttypes.h>
00031 
00032 #include "mpeg2.h"
00033 #include "mpeg2_internal.h"
00034 
00035 /*
00036  * The asm code is generated with:
00037  *
00038  * gcc-2.95 -fvec -D__ALTIVEC__ -O9 -fomit-frame-pointer -mregnames -S
00039  *      motion_comp_altivec.c
00040  *
00041  * sed 's/.L/._L/g' motion_comp_altivec.s |
00042  * awk '{args=""; len=split ($2, arg, ",");
00043  *      for (i=1; i<=len; i++) { a=arg[i]; if (i<len) a=a",";
00044  *                               args = args sprintf ("%-6s", a) }
00045  *      printf ("\t\"\t%-16s%-24s\\n\"\n", $1, args) }' |
00046  * unexpand -a
00047  */
00048 
00049 static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref,
00050                                  int stride, int height)
00051 {
00052     asm ("                                              \n"
00053         "       srawi           %r6,  %r6,  1           \n"
00054         "       li              %r9,  15                \n"
00055         "       addi            %r6,  %r6,  -1          \n"
00056         "       lvsl            %v12, 0,    %r4         \n"
00057         "       mtctr           %r6                     \n"
00058         "       lvx             %v1,  0,    %r4         \n"
00059         "       lvx             %v0,  %r9,  %r4         \n"
00060         "       add             %r0,  %r5,  %r5         \n"
00061         "       vperm           %v13, %v1,  %v0,  %v12  \n"
00062         "       add             %r4,  %r4,  %r5         \n"
00063         "._L6:                                          \n"
00064         "       li              %r9,  15                \n"
00065         "       lvx             %v1,  0,    %r4         \n"
00066         "       lvx             %v0,  %r9,  %r4         \n"
00067         "       stvx            %v13, 0,    %r3         \n"
00068         "       vperm           %v13, %v1,  %v0,  %v12  \n"
00069         "       add             %r4,  %r4,  %r5         \n"
00070         "       lvx             %v1,  0,    %r4         \n"
00071         "       lvx             %v0,  %r9,  %r4         \n"
00072         "       stvx            %v13, %r5,  %r3         \n"
00073         "       vperm           %v13, %v1,  %v0,  %v12  \n"
00074         "       add             %r4,  %r4,  %r5         \n"
00075         "       add             %r3,  %r3,  %r0         \n"
00076         "       bdnz            ._L6                    \n"
00077         "       lvx             %v0,  %r9,  %r4         \n"
00078         "       lvx             %v1,  0,    %r4         \n"
00079         "       stvx            %v13, 0,    %r3         \n"
00080         "       vperm           %v13, %v1,  %v0,  %v12  \n"
00081         "       stvx            %v13, %r5,  %r3         \n"
00082          );
00083 }
00084 
00085 static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref,
00086                                 int stride, int height)
00087 {
00088     asm ("                                              \n"
00089         "       lvsl            %v12, 0,    %r4         \n"
00090         "       lvsl            %v1,  %r5,  %r4         \n"
00091         "       vmrghb          %v12, %v12, %v12        \n"
00092         "       srawi           %r6,  %r6,  1           \n"
00093         "       li              %r9,  7                 \n"
00094         "       vmrghb          %v1,  %v1,  %v1         \n"
00095         "       addi            %r6,  %r6,  -1          \n"
00096         "       vpkuhum         %v10, %v12, %v12        \n"
00097         "       lvx             %v13, 0,    %r4         \n"
00098         "       mtctr           %r6                     \n"
00099         "       vpkuhum         %v11, %v1,  %v1         \n"
00100         "       lvx             %v0,  %r9,  %r4         \n"
00101         "       add             %r4,  %r4,  %r5         \n"
00102         "       vperm           %v12, %v13, %v0,  %v10  \n"
00103         "._L11:                                         \n"
00104         "       li              %r9,  7                 \n"
00105         "       lvx             %v0,  %r9,  %r4         \n"
00106         "       lvx             %v13, 0,    %r4         \n"
00107         "       stvewx          %v12, 0,    %r3         \n"
00108         "       li              %r9,  4                 \n"
00109         "       vperm           %v1,  %v13, %v0,  %v11  \n"
00110         "       stvewx          %v12, %r9,  %r3         \n"
00111         "       add             %r4,  %r4,  %r5         \n"
00112         "       li              %r9,  7                 \n"
00113         "       lvx             %v0,  %r9,  %r4         \n"
00114         "       lvx             %v13, 0,    %r4         \n"
00115         "       add             %r3,  %r3,  %r5         \n"
00116         "       stvewx          %v1,  0,    %r3         \n"
00117         "       vperm           %v12, %v13, %v0,  %v10  \n"
00118         "       li              %r9,  4                 \n"
00119         "       stvewx          %v1,  %r9,  %r3         \n"
00120         "       add             %r4,  %r4,  %r5         \n"
00121         "       add             %r3,  %r3,  %r5         \n"
00122         "       bdnz            ._L11                   \n"
00123         "       li              %r9,  7                 \n"
00124         "       lvx             %v0,  %r9,  %r4         \n"
00125         "       lvx             %v13, 0,    %r4         \n"
00126         "       stvewx          %v12, 0,    %r3         \n"
00127         "       li              %r9,  4                 \n"
00128         "       vperm           %v1,  %v13, %v0,  %v11  \n"
00129         "       stvewx          %v12, %r9,  %r3         \n"
00130         "       add             %r3,  %r3,  %r5         \n"
00131         "       stvewx          %v1,  0,    %r3         \n"
00132         "       stvewx          %v1,  %r9,  %r3         \n"
00133          );
00134 }
00135 
00136 static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref,
00137                                  int stride, int height)
00138 {
00139     asm ("                                              \n"
00140         "       lvsl            %v11, 0,    %r4         \n"
00141         "       vspltisb        %v0,  1                 \n"
00142         "       li              %r9,  16                \n"
00143         "       lvx             %v12, 0,    %r4         \n"
00144         "       vaddubm         %v10, %v11, %v0         \n"
00145         "       lvx             %v13, %r9,  %r4         \n"
00146         "       srawi           %r6,  %r6,  1           \n"
00147         "       addi            %r6,  %r6,  -1          \n"
00148         "       vperm           %v1,  %v12, %v13, %v10  \n"
00149         "       vperm           %v0,  %v12, %v13, %v11  \n"
00150         "       mtctr           %r6                     \n"
00151         "       add             %r0,  %r5,  %r5         \n"
00152         "       add             %r4,  %r4,  %r5         \n"
00153         "       vavgub          %v0,  %v0,  %v1         \n"
00154         "._L16:                                         \n"
00155         "       li              %r9,  16                \n"
00156         "       lvx             %v12, 0,    %r4         \n"
00157         "       lvx             %v13, %r9,  %r4         \n"
00158         "       stvx            %v0,  0,    %r3         \n"
00159         "       vperm           %v1,  %v12, %v13, %v10  \n"
00160         "       add             %r4,  %r4,  %r5         \n"
00161         "       vperm           %v0,  %v12, %v13, %v11  \n"
00162         "       lvx             %v12, 0,    %r4         \n"
00163         "       lvx             %v13, %r9,  %r4         \n"
00164         "       vavgub          %v0,  %v0,  %v1         \n"
00165         "       stvx            %v0,  %r5,  %r3         \n"
00166         "       vperm           %v1,  %v12, %v13, %v10  \n"
00167         "       add             %r4,  %r4,  %r5         \n"
00168         "       vperm           %v0,  %v12, %v13, %v11  \n"
00169         "       add             %r3,  %r3,  %r0         \n"
00170         "       vavgub          %v0,  %v0,  %v1         \n"
00171         "       bdnz            ._L16                   \n"
00172         "       lvx             %v13, %r9,  %r4         \n"
00173         "       lvx             %v12, 0,    %r4         \n"
00174         "       stvx            %v0,  0,    %r3         \n"
00175         "       vperm           %v1,  %v12, %v13, %v10  \n"
00176         "       vperm           %v0,  %v12, %v13, %v11  \n"
00177         "       vavgub          %v0,  %v0,  %v1         \n"
00178         "       stvx            %v0,  %r5,  %r3         \n"
00179          );
00180 }
00181 
00182 static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref,
00183                                 int stride, int height)
00184 {
00185     asm ("                                              \n"
00186         "       lvsl            %v0,  0,    %r4         \n"
00187         "       vspltisb        %v13, 1                 \n"
00188         "       lvsl            %v10, %r5,  %r4         \n"
00189         "       vmrghb          %v0,  %v0,  %v0         \n"
00190         "       li              %r9,  8                 \n"
00191         "       lvx             %v11, 0,    %r4         \n"
00192         "       vmrghb          %v10, %v10, %v10        \n"
00193         "       vpkuhum         %v8,  %v0,  %v0         \n"
00194         "       lvx             %v12, %r9,  %r4         \n"
00195         "       srawi           %r6,  %r6,  1           \n"
00196         "       vpkuhum         %v9,  %v10, %v10        \n"
00197         "       vaddubm         %v7,  %v8,  %v13        \n"
00198         "       addi            %r6,  %r6,  -1          \n"
00199         "       vperm           %v1,  %v11, %v12, %v8   \n"
00200         "       mtctr           %r6                     \n"
00201         "       vaddubm         %v13, %v9,  %v13        \n"
00202         "       add             %r4,  %r4,  %r5         \n"
00203         "       vperm           %v0,  %v11, %v12, %v7   \n"
00204         "       vavgub          %v0,  %v1,  %v0         \n"
00205         "._L21:                                         \n"
00206         "       li              %r9,  8                 \n"
00207         "       lvx             %v12, %r9,  %r4         \n"
00208         "       lvx             %v11, 0,    %r4         \n"
00209         "       stvewx          %v0,  0,    %r3         \n"
00210         "       li              %r9,  4                 \n"
00211         "       vperm           %v1,  %v11, %v12, %v13  \n"
00212         "       stvewx          %v0,  %r9,  %r3         \n"
00213         "       vperm           %v0,  %v11, %v12, %v9   \n"
00214         "       add             %r4,  %r4,  %r5         \n"
00215         "       li              %r9,  8                 \n"
00216         "       lvx             %v12, %r9,  %r4         \n"
00217         "       vavgub          %v10, %v0,  %v1         \n"
00218         "       lvx             %v11, 0,    %r4         \n"
00219         "       add             %r3,  %r3,  %r5         \n"
00220         "       stvewx          %v10, 0,    %r3         \n"
00221         "       vperm           %v1,  %v11, %v12, %v7   \n"
00222         "       vperm           %v0,  %v11, %v12, %v8   \n"
00223         "       li              %r9,  4                 \n"
00224         "       stvewx          %v10, %r9,  %r3         \n"
00225         "       add             %r4,  %r4,  %r5         \n"
00226         "       vavgub          %v0,  %v0,  %v1         \n"
00227         "       add             %r3,  %r3,  %r5         \n"
00228         "       bdnz            ._L21                   \n"
00229         "       li              %r9,  8                 \n"
00230         "       lvx             %v12, %r9,  %r4         \n"
00231         "       lvx             %v11, 0,    %r4         \n"
00232         "       stvewx          %v0,  0,    %r3         \n"
00233         "       li              %r9,  4                 \n"
00234         "       vperm           %v1,  %v11, %v12, %v13  \n"
00235         "       stvewx          %v0,  %r9,  %r3         \n"
00236         "       vperm           %v0,  %v11, %v12, %v9   \n"
00237         "       add             %r3,  %r3,  %r5         \n"
00238         "       vavgub          %v10, %v0,  %v1         \n"
00239         "       stvewx          %v10, 0,    %r3         \n"
00240         "       stvewx          %v10, %r9,  %r3         \n"
00241          );
00242 }
00243 
00244 static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref,
00245                                  int stride, int height)
00246 {
00247     asm ("                                              \n"
00248         "       li              %r9,  15                \n"
00249         "       lvsl            %v10, 0,    %r4         \n"
00250         "       lvx             %v13, 0,    %r4         \n"
00251         "       lvx             %v1,  %r9,  %r4         \n"
00252         "       add             %r4,  %r4,  %r5         \n"
00253         "       vperm           %v12, %v13, %v1,  %v10  \n"
00254         "       srawi           %r6,  %r6,  1           \n"
00255         "       lvx             %v13, 0,    %r4         \n"
00256         "       lvx             %v1,  %r9,  %r4         \n"
00257         "       addi            %r6,  %r6,  -1          \n"
00258         "       vperm           %v11, %v13, %v1,  %v10  \n"
00259         "       mtctr           %r6                     \n"
00260         "       add             %r0,  %r5,  %r5         \n"
00261         "       add             %r4,  %r4,  %r5         \n"
00262         "       vavgub          %v0,  %v12, %v11        \n"
00263         "._L26:                                         \n"
00264         "       li              %r9,  15                \n"
00265         "       lvx             %v13, 0,    %r4         \n"
00266         "       lvx             %v1,  %r9,  %r4         \n"
00267         "       stvx            %v0,  0,    %r3         \n"
00268         "       vperm           %v12, %v13, %v1,  %v10  \n"
00269         "       add             %r4,  %r4,  %r5         \n"
00270         "       lvx             %v13, 0,    %r4         \n"
00271         "       lvx             %v1,  %r9,  %r4         \n"
00272         "       vavgub          %v0,  %v12, %v11        \n"
00273         "       stvx            %v0,  %r5,  %r3         \n"
00274         "       vperm           %v11, %v13, %v1,  %v10  \n"
00275         "       add             %r4,  %r4,  %r5         \n"
00276         "       add             %r3,  %r3,  %r0         \n"
00277         "       vavgub          %v0,  %v12, %v11        \n"
00278         "       bdnz            ._L26                   \n"
00279         "       lvx             %v1,  %r9,  %r4         \n"
00280         "       lvx             %v13, 0,    %r4         \n"
00281         "       stvx            %v0,  0,    %r3         \n"
00282         "       vperm           %v12, %v13, %v1,  %v10  \n"
00283         "       vavgub          %v0,  %v12, %v11        \n"
00284         "       stvx            %v0,  %r5,  %r3         \n"
00285          );
00286 }
00287 
00288 static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref,
00289                                 int stride, int height)
00290 {
00291     asm ("                                              \n"
00292         "       lvsl            %v13, 0,    %r4         \n"
00293         "       lvsl            %v11, %r5,  %r4         \n"
00294         "       vmrghb          %v13, %v13, %v13        \n"
00295         "       li              %r9,  7                 \n"
00296         "       lvx             %v12, 0,    %r4         \n"
00297         "       vmrghb          %v11, %v11, %v11        \n"
00298         "       lvx             %v1,  %r9,  %r4         \n"
00299         "       vpkuhum         %v9,  %v13, %v13        \n"
00300         "       add             %r4,  %r4,  %r5         \n"
00301         "       vpkuhum         %v10, %v11, %v11        \n"
00302         "       vperm           %v13, %v12, %v1,  %v9   \n"
00303         "       srawi           %r6,  %r6,  1           \n"
00304         "       lvx             %v12, 0,    %r4         \n"
00305         "       lvx             %v1,  %r9,  %r4         \n"
00306         "       addi            %r6,  %r6,  -1          \n"
00307         "       vperm           %v11, %v12, %v1,  %v10  \n"
00308         "       mtctr           %r6                     \n"
00309         "       add             %r4,  %r4,  %r5         \n"
00310         "       vavgub          %v0,  %v13, %v11        \n"
00311         "._L31:                                         \n"
00312         "       li              %r9,  7                 \n"
00313         "       lvx             %v1,  %r9,  %r4         \n"
00314         "       lvx             %v12, 0,    %r4         \n"
00315         "       stvewx          %v0,  0,    %r3         \n"
00316         "       li              %r9,  4                 \n"
00317         "       vperm           %v13, %v12, %v1,  %v9   \n"
00318         "       stvewx          %v0,  %r9,  %r3         \n"
00319         "       add             %r4,  %r4,  %r5         \n"
00320         "       vavgub          %v0,  %v13, %v11        \n"
00321         "       li              %r9,  7                 \n"
00322         "       lvx             %v1,  %r9,  %r4         \n"
00323         "       lvx             %v12, 0,    %r4         \n"
00324         "       add             %r3,  %r3,  %r5         \n"
00325         "       stvewx          %v0,  0,    %r3         \n"
00326         "       vperm           %v11, %v12, %v1,  %v10  \n"
00327         "       li              %r9,  4                 \n"
00328         "       stvewx          %v0,  %r9,  %r3         \n"
00329         "       vavgub          %v0,  %v13, %v11        \n"
00330         "       add             %r4,  %r4,  %r5         \n"
00331         "       add             %r3,  %r3,  %r5         \n"
00332         "       bdnz            ._L31                   \n"
00333         "       li              %r9,  7                 \n"
00334         "       lvx             %v1,  %r9,  %r4         \n"
00335         "       lvx             %v12, 0,    %r4         \n"
00336         "       stvewx          %v0,  0,    %r3         \n"
00337         "       li              %r9,  4                 \n"
00338         "       vperm           %v13, %v12, %v1,  %v9   \n"
00339         "       stvewx          %v0,  %r9,  %r3         \n"
00340         "       add             %r3,  %r3,  %r5         \n"
00341         "       vavgub          %v0,  %v13, %v11        \n"
00342         "       stvewx          %v0,  0,    %r3         \n"
00343         "       stvewx          %v0,  %r9,  %r3         \n"
00344          );
00345 }
00346 
00347 static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
00348                                   int stride, int height)
00349 {
00350     asm ("                                              \n"
00351         "       lvsl            %v5,  0,    %r4         \n"
00352         "       vspltisb        %v3,  1                 \n"
00353         "       li              %r9,  16                \n"
00354         "       lvx             %v1,  0,    %r4         \n"
00355         "       vaddubm         %v4,  %v5,  %v3         \n"
00356         "       lvx             %v0,  %r9,  %r4         \n"
00357         "       add             %r4,  %r4,  %r5         \n"
00358         "       vperm           %v10, %v1,  %v0,  %v4   \n"
00359         "       srawi           %r6,  %r6,  1           \n"
00360         "       vperm           %v11, %v1,  %v0,  %v5   \n"
00361         "       addi            %r6,  %r6,  -1          \n"
00362         "       lvx             %v1,  0,    %r4         \n"
00363         "       mtctr           %r6                     \n"
00364         "       lvx             %v0,  %r9,  %r4         \n"
00365         "       vavgub          %v9,  %v11, %v10        \n"
00366         "       vxor            %v8,  %v11, %v10        \n"
00367         "       add             %r0,  %r5,  %r5         \n"
00368         "       vperm           %v10, %v1,  %v0,  %v4   \n"
00369         "       add             %r4,  %r4,  %r5         \n"
00370         "       vperm           %v11, %v1,  %v0,  %v5   \n"
00371         "       vxor            %v6,  %v11, %v10        \n"
00372         "       vavgub          %v7,  %v11, %v10        \n"
00373         "       vor             %v0,  %v8,  %v6         \n"
00374         "       vxor            %v13, %v9,  %v7         \n"
00375         "       vand            %v0,  %v3,  %v0         \n"
00376         "       vavgub          %v1,  %v9,  %v7         \n"
00377         "       vand            %v0,  %v0,  %v13        \n"
00378         "       vsububm         %v13, %v1,  %v0         \n"
00379         "._L36:                                         \n"
00380         "       li              %r9,  16                \n"
00381         "       lvx             %v1,  0,    %r4         \n"
00382         "       lvx             %v0,  %r9,  %r4         \n"
00383         "       stvx            %v13, 0,    %r3         \n"
00384         "       vperm           %v10, %v1,  %v0,  %v4   \n"
00385         "       add             %r4,  %r4,  %r5         \n"
00386         "       vperm           %v11, %v1,  %v0,  %v5   \n"
00387         "       lvx             %v1,  0,    %r4         \n"
00388         "       lvx             %v0,  %r9,  %r4         \n"
00389         "       vavgub          %v9,  %v11, %v10        \n"
00390         "       vxor            %v8,  %v11, %v10        \n"
00391         "       add             %r4,  %r4,  %r5         \n"
00392         "       vperm           %v10, %v1,  %v0,  %v4   \n"
00393         "       vavgub          %v12, %v9,  %v7         \n"
00394         "       vperm           %v11, %v1,  %v0,  %v5   \n"
00395         "       vor             %v13, %v8,  %v6         \n"
00396         "       vxor            %v0,  %v9,  %v7         \n"
00397         "       vxor            %v6,  %v11, %v10        \n"
00398         "       vand            %v13, %v3,  %v13        \n"
00399         "       vavgub          %v7,  %v11, %v10        \n"
00400         "       vor             %v1,  %v8,  %v6         \n"
00401         "       vand            %v13, %v13, %v0         \n"
00402         "       vxor            %v0,  %v9,  %v7         \n"
00403         "       vand            %v1,  %v3,  %v1         \n"
00404         "       vsububm         %v13, %v12, %v13        \n"
00405         "       vand            %v1,  %v1,  %v0         \n"
00406         "       stvx            %v13, %r5,  %r3         \n"
00407         "       vavgub          %v0,  %v9,  %v7         \n"
00408         "       add             %r3,  %r3,  %r0         \n"
00409         "       vsububm         %v13, %v0,  %v1         \n"
00410         "       bdnz            ._L36                   \n"
00411         "       lvx             %v0,  %r9,  %r4         \n"
00412         "       lvx             %v1,  0,    %r4         \n"
00413         "       stvx            %v13, 0,    %r3         \n"
00414         "       vperm           %v10, %v1,  %v0,  %v4   \n"
00415         "       vperm           %v11, %v1,  %v0,  %v5   \n"
00416         "       vxor            %v8,  %v11, %v10        \n"
00417         "       vavgub          %v9,  %v11, %v10        \n"
00418         "       vor             %v0,  %v8,  %v6         \n"
00419         "       vxor            %v13, %v9,  %v7         \n"
00420         "       vand            %v0,  %v3,  %v0         \n"
00421         "       vavgub          %v1,  %v9,  %v7         \n"
00422         "       vand            %v0,  %v0,  %v13        \n"
00423         "       vsububm         %v13, %v1,  %v0         \n"
00424         "       stvx            %v13, %r5,  %r3         \n"
00425          );
00426 }
00427 
00428 static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
00429                                  int stride, int height)
00430 {
00431     asm ("                                              \n"
00432         "       lvsl            %v4,  0,    %r4         \n"
00433         "       vspltisb        %v3,  1                 \n"
00434         "       lvsl            %v5,  %r5,  %r4         \n"
00435         "       vmrghb          %v4,  %v4,  %v4         \n"
00436         "       li              %r9,  8                 \n"
00437         "       vmrghb          %v5,  %v5,  %v5         \n"
00438         "       lvx             %v1,  0,    %r4         \n"
00439         "       vpkuhum         %v4,  %v4,  %v4         \n"
00440         "       lvx             %v0,  %r9,  %r4         \n"
00441         "       vpkuhum         %v5,  %v5,  %v5         \n"
00442         "       add             %r4,  %r4,  %r5         \n"
00443         "       vaddubm         %v2,  %v4,  %v3         \n"
00444         "       vperm           %v11, %v1,  %v0,  %v4   \n"
00445         "       srawi           %r6,  %r6,  1           \n"
00446         "       vaddubm         %v19, %v5,  %v3         \n"
00447         "       addi            %r6,  %r6,  -1          \n"
00448         "       vperm           %v10, %v1,  %v0,  %v2   \n"
00449         "       mtctr           %r6                     \n"
00450         "       lvx             %v1,  0,    %r4         \n"
00451         "       lvx             %v0,  %r9,  %r4         \n"
00452         "       vavgub          %v9,  %v11, %v10        \n"
00453         "       vxor            %v8,  %v11, %v10        \n"
00454         "       add             %r4,  %r4,  %r5         \n"
00455         "       vperm           %v10, %v1,  %v0,  %v19  \n"
00456         "       vperm           %v11, %v1,  %v0,  %v5   \n"
00457         "       vxor            %v6,  %v11, %v10        \n"
00458         "       vavgub          %v7,  %v11, %v10        \n"
00459         "       vor             %v0,  %v8,  %v6         \n"
00460         "       vxor            %v13, %v9,  %v7         \n"
00461         "       vand            %v0,  %v3,  %v0         \n"
00462         "       vavgub          %v1,  %v9,  %v7         \n"
00463         "       vand            %v0,  %v0,  %v13        \n"
00464         "       vsububm         %v13, %v1,  %v0         \n"
00465         "._L41:                                         \n"
00466         "       li              %r9,  8                 \n"
00467         "       lvx             %v0,  %r9,  %r4         \n"
00468         "       lvx             %v1,  0,    %r4         \n"
00469         "       stvewx          %v13, 0,    %r3         \n"
00470         "       li              %r9,  4                 \n"
00471         "       vperm           %v10, %v1,  %v0,  %v2   \n"
00472         "       stvewx          %v13, %r9,  %r3         \n"
00473         "       vperm           %v11, %v1,  %v0,  %v4   \n"
00474         "       add             %r4,  %r4,  %r5         \n"
00475         "       li              %r9,  8                 \n"
00476         "       vavgub          %v9,  %v11, %v10        \n"
00477         "       lvx             %v0,  %r9,  %r4         \n"
00478         "       vxor            %v8,  %v11, %v10        \n"
00479         "       lvx             %v1,  0,    %r4         \n"
00480         "       vavgub          %v12, %v9,  %v7         \n"
00481         "       vor             %v13, %v8,  %v6         \n"
00482         "       add             %r3,  %r3,  %r5         \n"
00483         "       vperm           %v10, %v1,  %v0,  %v19  \n"
00484         "       li              %r9,  4                 \n"
00485         "       vperm           %v11, %v1,  %v0,  %v5   \n"
00486         "       vand            %v13, %v3,  %v13        \n"
00487         "       add             %r4,  %r4,  %r5         \n"
00488         "       vxor            %v0,  %v9,  %v7         \n"
00489         "       vxor            %v6,  %v11, %v10        \n"
00490         "       vavgub          %v7,  %v11, %v10        \n"
00491         "       vor             %v1,  %v8,  %v6         \n"
00492         "       vand            %v13, %v13, %v0         \n"
00493         "       vxor            %v0,  %v9,  %v7         \n"
00494         "       vand            %v1,  %v3,  %v1         \n"
00495         "       vsububm         %v13, %v12, %v13        \n"
00496         "       vand            %v1,  %v1,  %v0         \n"
00497         "       stvewx          %v13, 0,    %r3         \n"
00498         "       vavgub          %v0,  %v9,  %v7         \n"
00499         "       stvewx          %v13, %r9,  %r3         \n"
00500         "       add             %r3,  %r3,  %r5         \n"
00501         "       vsububm         %v13, %v0,  %v1         \n"
00502         "       bdnz            ._L41                   \n"
00503         "       li              %r9,  8                 \n"
00504         "       lvx             %v0,  %r9,  %r4         \n"
00505         "       lvx             %v1,  0,    %r4         \n"
00506         "       stvewx          %v13, 0,    %r3         \n"
00507         "       vperm           %v10, %v1,  %v0,  %v2   \n"
00508         "       li              %r9,  4                 \n"
00509         "       vperm           %v11, %v1,  %v0,  %v4   \n"
00510         "       stvewx          %v13, %r9,  %r3         \n"
00511         "       add             %r3,  %r3,  %r5         \n"
00512         "       vxor            %v8,  %v11, %v10        \n"
00513         "       vavgub          %v9,  %v11, %v10        \n"
00514         "       vor             %v0,  %v8,  %v6         \n"
00515         "       vxor            %v13, %v9,  %v7         \n"
00516         "       vand            %v0,  %v3,  %v0         \n"
00517         "       vavgub          %v1,  %v9,  %v7         \n"
00518         "       vand            %v0,  %v0,  %v13        \n"
00519         "       vsububm         %v13, %v1,  %v0         \n"
00520         "       stvewx          %v13, 0,    %r3         \n"
00521         "       stvewx          %v13, %r9,  %r3         \n"
00522          );
00523 }
00524 
00525 static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref,
00526                                  int stride, int height)
00527 {
00528     asm ("                                              \n"
00529         "       li              %r9,  15                \n"
00530         "       lvx             %v0,  %r9,  %r4         \n"
00531         "       lvsl            %v11, 0,    %r4         \n"
00532         "       lvx             %v1,  0,    %r4         \n"
00533         "       srawi           %r6,  %r6,  1           \n"
00534         "       addi            %r6,  %r6,  -1          \n"
00535         "       vperm           %v0,  %v1,  %v0,  %v11  \n"
00536         "       lvx             %v13, 0,    %r3         \n"
00537         "       mtctr           %r6                     \n"
00538         "       add             %r9,  %r5,  %r5         \n"
00539         "       vavgub          %v12, %v13, %v0         \n"
00540         "       add             %r4,  %r4,  %r5         \n"
00541         "._L46:                                         \n"
00542         "       li              %r11, 15                \n"
00543         "       lvx             %v1,  0,    %r4         \n"
00544         "       lvx             %v0,  %r11, %r4         \n"
00545         "       lvx             %v13, %r5,  %r3         \n"
00546         "       vperm           %v0,  %v1,  %v0,  %v11  \n"
00547         "       stvx            %v12, 0,    %r3         \n"
00548         "       add             %r4,  %r4,  %r5         \n"
00549         "       vavgub          %v12, %v13, %v0         \n"
00550         "       lvx             %v1,  0,    %r4         \n"
00551         "       lvx             %v0,  %r11, %r4         \n"
00552         "       lvx             %v13, %r9,  %r3         \n"
00553         "       vperm           %v0,  %v1,  %v0,  %v11  \n"
00554         "       stvx            %v12, %r5,  %r3         \n"
00555         "       add             %r4,  %r4,  %r5         \n"
00556         "       vavgub          %v12, %v13, %v0         \n"
00557         "       add             %r3,  %r3,  %r9         \n"
00558         "       bdnz            ._L46                   \n"
00559         "       lvx             %v0,  %r11, %r4         \n"
00560         "       lvx             %v1,  0,    %r4         \n"
00561         "       lvx             %v13, %r5,  %r3         \n"
00562         "       vperm           %v0,  %v1,  %v0,  %v11  \n"
00563         "       stvx            %v12, 0,    %r3         \n"
00564         "       vavgub          %v12, %v13, %v0         \n"
00565         "       stvx            %v12, %r5,  %r3         \n"
00566          );
00567 }
00568 
00569 static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref,
00570                                 int stride, int height)
00571 {
00572     asm ("                                              \n"
00573         "       lvsl            %v12, 0,    %r4         \n"
00574         "       li              %r9,  7                 \n"
00575         "       vmrghb          %v12, %v12, %v12        \n"
00576         "       lvsl            %v1,  %r5,  %r4         \n"
00577         "       lvx             %v13, 0,    %r4         \n"
00578         "       vpkuhum         %v9,  %v12, %v12        \n"
00579         "       lvx             %v0,  %r9,  %r4         \n"
00580         "       srawi           %r6,  %r6,  1           \n"
00581         "       vmrghb          %v1,  %v1,  %v1         \n"
00582         "       addi            %r6,  %r6,  -1          \n"
00583         "       vperm           %v0,  %v13, %v0,  %v9   \n"
00584         "       lvx             %v11, 0,    %r3         \n"
00585         "       mtctr           %r6                     \n"
00586         "       vpkuhum         %v10, %v1,  %v1         \n"
00587         "       add             %r4,  %r4,  %r5         \n"
00588         "       vavgub          %v12, %v11, %v0         \n"
00589         "._L51:                                         \n"
00590         "       li              %r9,  7                 \n"
00591         "       lvx             %v0,  %r9,  %r4         \n"
00592         "       lvx             %v13, 0,    %r4         \n"
00593         "       lvx             %v11, %r5,  %r3         \n"
00594         "       stvewx          %v12, 0,    %r3         \n"
00595         "       vperm           %v0,  %v13, %v0,  %v10  \n"
00596         "       li              %r9,  4                 \n"
00597         "       stvewx          %v12, %r9,  %r3         \n"
00598         "       vavgub          %v1,  %v11, %v0         \n"
00599         "       add             %r4,  %r4,  %r5         \n"
00600         "       li              %r9,  7                 \n"
00601         "       lvx             %v0,  %r9,  %r4         \n"
00602         "       add             %r3,  %r3,  %r5         \n"
00603         "       lvx             %v13, 0,    %r4         \n"
00604         "       lvx             %v11, %r5,  %r3         \n"
00605         "       stvewx          %v1,  0,    %r3         \n"
00606         "       vperm           %v0,  %v13, %v0,  %v9   \n"
00607         "       li              %r9,  4                 \n"
00608         "       stvewx          %v1,  %r9,  %r3         \n"
00609         "       vavgub          %v12, %v11, %v0         \n"
00610         "       add             %r4,  %r4,  %r5         \n"
00611         "       add             %r3,  %r3,  %r5         \n"
00612         "       bdnz            ._L51                   \n"
00613         "       li              %r9,  7                 \n"
00614         "       lvx             %v0,  %r9,  %r4         \n"
00615         "       lvx             %v13, 0,    %r4         \n"
00616         "       lvx             %v11, %r5,  %r3         \n"
00617         "       stvewx          %v12, 0,    %r3         \n"
00618         "       vperm           %v0,  %v13, %v0,  %v10  \n"
00619         "       li              %r9,  4                 \n"
00620         "       stvewx          %v12, %r9,  %r3         \n"
00621         "       vavgub          %v1,  %v11, %v0         \n"
00622         "       add             %r3,  %r3,  %r5         \n"
00623         "       stvewx          %v1,  0,    %r3         \n"
00624         "       stvewx          %v1,  %r9,  %r3         \n"
00625          );
00626 }
00627 
00628 static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref,
00629                                  int stride, int height)
00630 {
00631     asm ("                                              \n"
00632         "       lvsl            %v8,  0,    %r4         \n"
00633         "       vspltisb        %v0,  1                 \n"
00634         "       li              %r9,  16                \n"
00635         "       lvx             %v12, %r9,  %r4         \n"
00636         "       vaddubm         %v7,  %v8,  %v0         \n"
00637         "       lvx             %v11, 0,    %r4         \n"
00638         "       srawi           %r6,  %r6,  1           \n"
00639         "       vperm           %v1,  %v11, %v12, %v7   \n"
00640         "       addi            %r6,  %r6,  -1          \n"
00641         "       vperm           %v0,  %v11, %v12, %v8   \n"
00642         "       lvx             %v9,  0,    %r3         \n"
00643         "       mtctr           %r6                     \n"
00644         "       add             %r9,  %r5,  %r5         \n"
00645         "       vavgub          %v0,  %v0,  %v1         \n"
00646         "       add             %r4,  %r4,  %r5         \n"
00647         "       vavgub          %v10, %v9,  %v0         \n"
00648         "._L56:                                         \n"
00649         "       li              %r11, 16                \n"
00650         "       lvx             %v11, 0,    %r4         \n"
00651         "       lvx             %v12, %r11, %r4         \n"
00652         "       lvx             %v9,  %r5,  %r3         \n"
00653         "       stvx            %v10, 0,    %r3         \n"
00654         "       vperm           %v0,  %v11, %v12, %v7   \n"
00655         "       add             %r4,  %r4,  %r5         \n"
00656         "       vperm           %v1,  %v11, %v12, %v8   \n"
00657         "       lvx             %v11, 0,    %r4         \n"
00658         "       lvx             %v12, %r11, %r4         \n"
00659         "       vavgub          %v1,  %v1,  %v0         \n"
00660         "       add             %r4,  %r4,  %r5         \n"
00661         "       vperm           %v13, %v11, %v12, %v7   \n"
00662         "       vavgub          %v10, %v9,  %v1         \n"
00663         "       vperm           %v0,  %v11, %v12, %v8   \n"
00664         "       lvx             %v9,  %r9,  %r3         \n"
00665         "       stvx            %v10, %r5,  %r3         \n"
00666         "       vavgub          %v0,  %v0,  %v13        \n"
00667         "       add             %r3,  %r3,  %r9         \n"
00668         "       vavgub          %v10, %v9,  %v0         \n"
00669         "       bdnz            ._L56                   \n"
00670         "       lvx             %v12, %r11, %r4         \n"
00671         "       lvx             %v11, 0,    %r4         \n"
00672         "       lvx             %v9,  %r5,  %r3         \n"
00673         "       vperm           %v1,  %v11, %v12, %v7   \n"
00674         "       stvx            %v10, 0,    %r3         \n"
00675         "       vperm           %v0,  %v11, %v12, %v8   \n"
00676         "       vavgub          %v0,  %v0,  %v1         \n"
00677         "       vavgub          %v10, %v9,  %v0         \n"
00678         "       stvx            %v10, %r5,  %r3         \n"
00679          );
00680 }
00681 
00682 static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref,
00683                                 int stride, int height)
00684 {
00685     asm ("                                              \n"
00686         "       lvsl            %v10, 0,    %r4         \n"
00687         "       vspltisb        %v13, 1                 \n"
00688         "       li              %r9,  8                 \n"
00689         "       vmrghb          %v10, %v10, %v10        \n"
00690         "       lvx             %v11, 0,    %r4         \n"
00691         "       lvx             %v12, %r9,  %r4         \n"
00692         "       vpkuhum         %v7,  %v10, %v10        \n"
00693         "       srawi           %r6,  %r6,  1           \n"
00694         "       lvsl            %v10, %r5,  %r4         \n"
00695         "       vaddubm         %v6,  %v7,  %v13        \n"
00696         "       vperm           %v0,  %v11, %v12, %v7   \n"
00697         "       addi            %r6,  %r6,  -1          \n"
00698         "       vmrghb          %v10, %v10, %v10        \n"
00699         "       lvx             %v9,  0,    %r3         \n"
00700         "       mtctr           %r6                     \n"
00701         "       vperm           %v1,  %v11, %v12, %v6   \n"
00702         "       add             %r4,  %r4,  %r5         \n"
00703         "       vpkuhum         %v8,  %v10, %v10        \n"
00704         "       vavgub          %v0,  %v0,  %v1         \n"
00705         "       vaddubm         %v13, %v8,  %v13        \n"
00706         "       vavgub          %v10, %v9,  %v0         \n"
00707         "._L61:                                         \n"
00708         "       li              %r9,  8                 \n"
00709         "       lvx             %v12, %r9,  %r4         \n"
00710         "       lvx             %v11, 0,    %r4         \n"
00711         "       lvx             %v9,  %r5,  %r3         \n"
00712         "       stvewx          %v10, 0,    %r3         \n"
00713         "       vperm           %v1,  %v11, %v12, %v13  \n"
00714         "       vperm           %v0,  %v11, %v12, %v8   \n"
00715         "       li              %r9,  4                 \n"
00716         "       stvewx          %v10, %r9,  %r3         \n"
00717         "       add             %r4,  %r4,  %r5         \n"
00718         "       vavgub          %v0,  %v0,  %v1         \n"
00719         "       li              %r9,  8                 \n"
00720         "       lvx             %v12, %r9,  %r4         \n"
00721         "       vavgub          %v10, %v9,  %v0         \n"
00722         "       lvx             %v11, 0,    %r4         \n"
00723         "       add             %r3,  %r3,  %r5         \n"
00724         "       vperm           %v1,  %v11, %v12, %v6   \n"
00725         "       lvx             %v9,  %r5,  %r3         \n"
00726         "       vperm           %v0,  %v11, %v12, %v7   \n"
00727         "       stvewx          %v10, 0,    %r3         \n"
00728         "       li              %r9,  4                 \n"
00729         "       vavgub          %v0,  %v0,  %v1         \n"
00730         "       stvewx          %v10, %r9,  %r3         \n"
00731         "       add             %r4,  %r4,  %r5         \n"
00732         "       add             %r3,  %r3,  %r5         \n"
00733         "       vavgub          %v10, %v9,  %v0         \n"
00734         "       bdnz            ._L61                   \n"
00735         "       li              %r9,  8                 \n"
00736         "       lvx             %v12, %r9,  %r4         \n"
00737         "       lvx             %v11, 0,    %r4         \n"
00738         "       lvx             %v9,  %r5,  %r3         \n"
00739         "       vperm           %v1,  %v11, %v12, %v13  \n"
00740         "       stvewx          %v10, 0,    %r3         \n"
00741         "       vperm           %v0,  %v11, %v12, %v8   \n"
00742         "       li              %r9,  4                 \n"
00743         "       stvewx          %v10, %r9,  %r3         \n"
00744         "       vavgub          %v0,  %v0,  %v1         \n"
00745         "       add             %r3,  %r3,  %r5         \n"
00746         "       vavgub          %v10, %v9,  %v0         \n"
00747         "       stvewx          %v10, 0,    %r3         \n"
00748         "       stvewx          %v10, %r9,  %r3         \n"
00749          );
00750 }
00751 
00752 static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref,
00753                                  int stride, int height)
00754 {
00755     asm ("                                              \n"
00756         "       li              %r9,  15                \n"
00757         "       lvx             %v1,  %r9,  %r4         \n"
00758         "       lvsl            %v9,  0,    %r4         \n"
00759         "       lvx             %v13, 0,    %r4         \n"
00760         "       add             %r4,  %r4,  %r5         \n"
00761         "       vperm           %v11, %v13, %v1,  %v9   \n"
00762         "       li              %r11, 15                \n"
00763         "       lvx             %v13, 0,    %r4         \n"
00764         "       lvx             %v1,  %r11, %r4         \n"
00765         "       srawi           %r6,  %r6,  1           \n"
00766         "       vperm           %v10, %v13, %v1,  %v9   \n"
00767         "       addi            %r6,  %r6,  -1          \n"
00768         "       lvx             %v12, 0,    %r3         \n"
00769         "       mtctr           %r6                     \n"
00770         "       vavgub          %v0,  %v11, %v10        \n"
00771         "       add             %r9,  %r5,  %r5         \n"
00772         "       add             %r4,  %r4,  %r5         \n"
00773         "       vavgub          %v0,  %v12, %v0         \n"
00774         "._L66:                                         \n"
00775         "       li              %r11, 15                \n"
00776         "       lvx             %v13, 0,    %r4         \n"
00777         "       lvx             %v1,  %r11, %r4         \n"
00778         "       lvx             %v12, %r5,  %r3         \n"
00779         "       vperm           %v11, %v13, %v1,  %v9   \n"
00780         "       stvx            %v0,  0,    %r3         \n"
00781         "       add             %r4,  %r4,  %r5         \n"
00782         "       vavgub          %v0,  %v11, %v10        \n"
00783         "       lvx             %v13, 0,    %r4         \n"
00784         "       lvx             %v1,  %r11, %r4         \n"
00785         "       vavgub          %v0,  %v12, %v0         \n"
00786         "       add             %r4,  %r4,  %r5         \n"
00787         "       lvx             %v12, %r9,  %r3         \n"
00788         "       vperm           %v10, %v13, %v1,  %v9   \n"
00789         "       stvx            %v0,  %r5,  %r3         \n"
00790         "       vavgub          %v0,  %v11, %v10        \n"
00791         "       add             %r3,  %r3,  %r9         \n"
00792         "       vavgub          %v0,  %v12, %v0         \n"
00793         "       bdnz            ._L66                   \n"
00794         "       lvx             %v1,  %r11, %r4         \n"
00795         "       lvx             %v13, 0,    %r4         \n"
00796         "       lvx             %v12, %r5,  %r3         \n"
00797         "       vperm           %v11, %v13, %v1,  %v9   \n"
00798         "       stvx            %v0,  0,    %r3         \n"
00799         "       vavgub          %v0,  %v11, %v10        \n"
00800         "       vavgub          %v0,  %v12, %v0         \n"
00801         "       stvx            %v0,  %r5,  %r3         \n"
00802          );
00803 }
00804 
00805 static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref,
00806                                 int stride, int height)
00807 {
00808     asm ("                                              \n"
00809         "       lvsl            %v12, 0,    %r4         \n"
00810         "       lvsl            %v9,  %r5,  %r4         \n"
00811         "       vmrghb          %v12, %v12, %v12        \n"
00812         "       li              %r9,  7                 \n"
00813         "       lvx             %v11, 0,    %r4         \n"
00814         "       vmrghb          %v9,  %v9,  %v9         \n"
00815         "       lvx             %v13, %r9,  %r4         \n"
00816         "       vpkuhum         %v7,  %v12, %v12        \n"
00817         "       add             %r4,  %r4,  %r5         \n"
00818         "       vpkuhum         %v8,  %v9,  %v9         \n"
00819         "       vperm           %v12, %v11, %v13, %v7   \n"
00820         "       srawi           %r6,  %r6,  1           \n"
00821         "       lvx             %v11, 0,    %r4         \n"
00822         "       lvx             %v13, %r9,  %r4         \n"
00823         "       addi            %r6,  %r6,  -1          \n"
00824         "       vperm           %v9,  %v11, %v13, %v8   \n"
00825         "       lvx             %v10, 0,    %r3         \n"
00826         "       mtctr           %r6                     \n"
00827         "       add             %r4,  %r4,  %r5         \n"
00828         "       vavgub          %v0,  %v12, %v9         \n"
00829         "       vavgub          %v1,  %v10, %v0         \n"
00830         "._L71:                                         \n"
00831         "       li              %r9,  7                 \n"
00832         "       lvx             %v13, %r9,  %r4         \n"
00833         "       lvx             %v11, 0,    %r4         \n"
00834         "       lvx             %v10, %r5,  %r3         \n"
00835         "       stvewx          %v1,  0,    %r3         \n"
00836         "       vperm           %v12, %v11, %v13, %v7   \n"
00837         "       li              %r9,  4                 \n"
00838         "       stvewx          %v1,  %r9,  %r3         \n"
00839         "       vavgub          %v0,  %v12, %v9         \n"
00840         "       add             %r4,  %r4,  %r5         \n"
00841         "       li              %r9,  7                 \n"
00842         "       vavgub          %v1,  %v10, %v0         \n"
00843         "       lvx             %v13, %r9,  %r4         \n"
00844         "       lvx             %v11, 0,    %r4         \n"
00845         "       add             %r3,  %r3,  %r5         \n"
00846         "       vperm           %v9,  %v11, %v13, %v8   \n"
00847         "       lvx             %v10, %r5,  %r3         \n"
00848         "       stvewx          %v1,  0,    %r3         \n"
00849         "       vavgub          %v0,  %v12, %v9         \n"
00850         "       li              %r9,  4                 \n"
00851         "       stvewx          %v1,  %r9,  %r3         \n"
00852         "       add             %r4,  %r4,  %r5         \n"
00853         "       vavgub          %v1,  %v10, %v0         \n"
00854         "       add             %r3,  %r3,  %r5         \n"
00855         "       bdnz            ._L71                   \n"
00856         "       li              %r9,  7                 \n"
00857         "       lvx             %v13, %r9,  %r4         \n"
00858         "       lvx             %v11, 0,    %r4         \n"
00859         "       lvx             %v10, %r5,  %r3         \n"
00860         "       vperm           %v12, %v11, %v13, %v7   \n"
00861         "       stvewx          %v1,  0,    %r3         \n"
00862         "       li              %r9,  4                 \n"
00863         "       vavgub          %v0,  %v12, %v9         \n"
00864         "       stvewx          %v1,  %r9,  %r3         \n"
00865         "       add             %r3,  %r3,  %r5         \n"
00866         "       vavgub          %v1,  %v10, %v0         \n"
00867         "       stvewx          %v1,  0,    %r3         \n"
00868         "       stvewx          %v1,  %r9,  %r3         \n"
00869          );
00870 }
00871 
00872 static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
00873                                   int stride, int height)
00874 {
00875     asm ("                                              \n"
00876         "       lvsl            %v4,  0,    %r4         \n"
00877         "       vspltisb        %v2,  1                 \n"
00878         "       li              %r9,  16                \n"
00879         "       lvx             %v1,  %r9,  %r4         \n"
00880         "       vaddubm         %v3,  %v4,  %v2         \n"
00881         "       lvx             %v13, 0,    %r4         \n"
00882         "       add             %r4,  %r4,  %r5         \n"
00883         "       vperm           %v10, %v13, %v1,  %v3   \n"
00884         "       li              %r11, 16                \n"
00885         "       vperm           %v11, %v13, %v1,  %v4   \n"
00886         "       srawi           %r6,  %r6,  1           \n"
00887         "       lvx             %v13, 0,    %r4         \n"
00888         "       lvx             %v1,  %r11, %r4         \n"
00889         "       vavgub          %v9,  %v11, %v10        \n"
00890         "       vxor            %v8,  %v11, %v10        \n"
00891         "       addi            %r6,  %r6,  -1          \n"
00892         "       vperm           %v10, %v13, %v1,  %v3   \n"
00893         "       lvx             %v6,  0,    %r3         \n"
00894         "       mtctr           %r6                     \n"
00895         "       vperm           %v11, %v13, %v1,  %v4   \n"
00896         "       add             %r9,  %r5,  %r5         \n"
00897         "       add             %r4,  %r4,  %r5         \n"
00898         "       vxor            %v5,  %v11, %v10        \n"
00899         "       vavgub          %v7,  %v11, %v10        \n"
00900         "       vor             %v1,  %v8,  %v5         \n"
00901         "       vxor            %v13, %v9,  %v7         \n"
00902         "       vand            %v1,  %v2,  %v1         \n"
00903         "       vavgub          %v0,  %v9,  %v7         \n"
00904         "       vand            %v1,  %v1,  %v13        \n"
00905         "       vsububm         %v0,  %v0,  %v1         \n"
00906         "       vavgub          %v12, %v6,  %v0         \n"
00907         "._L76:                                         \n"
00908         "       li              %r11, 16                \n"
00909         "       lvx             %v13, 0,    %r4         \n"
00910         "       lvx             %v1,  %r11, %r4         \n"
00911         "       lvx             %v6,  %r5,  %r3         \n"
00912         "       stvx            %v12, 0,    %r3         \n"
00913         "       vperm           %v10, %v13, %v1,  %v3   \n"
00914         "       vperm           %v11, %v13, %v1,  %v4   \n"
00915         "       add             %r4,  %r4,  %r5         \n"
00916         "       lvx             %v13, 0,    %r4         \n"
00917         "       lvx             %v1,  %r11, %r4         \n"
00918         "       vavgub          %v9,  %v11, %v10        \n"
00919         "       vxor            %v8,  %v11, %v10        \n"
00920         "       add             %r4,  %r4,  %r5         \n"
00921         "       vperm           %v10, %v13, %v1,  %v3   \n"
00922         "       vavgub          %v12, %v9,  %v7         \n"
00923         "       vperm           %v11, %v13, %v1,  %v4   \n"
00924         "       vor             %v0,  %v8,  %v5         \n"
00925         "       vxor            %v13, %v9,  %v7         \n"
00926         "       vxor            %v5,  %v11, %v10        \n"
00927         "       vand            %v0,  %v2,  %v0         \n"
00928         "       vavgub          %v7,  %v11, %v10        \n"
00929         "       vor             %v1,  %v8,  %v5         \n"
00930         "       vand            %v0,  %v0,  %v13        \n"
00931         "       vand            %v1,  %v2,  %v1         \n"
00932         "       vxor            %v13, %v9,  %v7         \n"
00933         "       vsububm         %v12, %v12, %v0         \n"
00934         "       vand            %v1,  %v1,  %v13        \n"
00935         "       vavgub          %v0,  %v9,  %v7         \n"
00936         "       vavgub          %v12, %v6,  %v12        \n"
00937         "       lvx             %v6,  %r9,  %r3         \n"
00938         "       vsububm         %v0,  %v0,  %v1         \n"
00939         "       stvx            %v12, %r5,  %r3         \n"
00940         "       vavgub          %v12, %v6,  %v0         \n"
00941         "       add             %r3,  %r3,  %r9         \n"
00942         "       bdnz            ._L76                   \n"
00943         "       lvx             %v1,  %r11, %r4         \n"
00944         "       lvx             %v13, 0,    %r4         \n"
00945         "       lvx             %v6,  %r5,  %r3         \n"
00946         "       vperm           %v10, %v13, %v1,  %v3   \n"
00947         "       stvx            %v12, 0,    %r3         \n"
00948         "       vperm           %v11, %v13, %v1,  %v4   \n"
00949         "       vxor            %v8,  %v11, %v10        \n"
00950         "       vavgub          %v9,  %v11, %v10        \n"
00951         "       vor             %v0,  %v8,  %v5         \n"
00952         "       vxor            %v13, %v9,  %v7         \n"
00953         "       vand            %v0,  %v2,  %v0         \n"
00954         "       vavgub          %v1,  %v9,  %v7         \n"
00955         "       vand            %v0,  %v0,  %v13        \n"
00956         "       vsububm         %v1,  %v1,  %v0         \n"
00957         "       vavgub          %v12, %v6,  %v1         \n"
00958         "       stvx            %v12, %r5,  %r3         \n"
00959          );
00960 }
00961 
00962 static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
00963                                  int stride, int height)
00964 {
00965     asm ("                                              \n"
00966         "       lvsl            %v2,  0,    %r4         \n"
00967         "       vspltisb        %v19, 1                 \n"
00968         "       lvsl            %v3,  %r5,  %r4         \n"
00969         "       vmrghb          %v2,  %v2,  %v2         \n"
00970         "       li              %r9,  8                 \n"
00971         "       vmrghb          %v3,  %v3,  %v3         \n"
00972         "       lvx             %v9,  0,    %r4         \n"
00973         "       vpkuhum         %v2,  %v2,  %v2         \n"
00974         "       lvx             %v1,  %r9,  %r4         \n"
00975         "       vpkuhum         %v3,  %v3,  %v3         \n"
00976         "       add             %r4,  %r4,  %r5         \n"
00977         "       vaddubm         %v18, %v2,  %v19        \n"
00978         "       vperm           %v11, %v9,  %v1,  %v2   \n"
00979         "       srawi           %r6,  %r6,  1           \n"
00980         "       vaddubm         %v17, %v3,  %v19        \n"
00981         "       addi            %r6,  %r6,  -1          \n"
00982         "       vperm           %v10, %v9,  %v1,  %v18  \n"
00983         "       lvx             %v4,  0,    %r3         \n"
00984         "       mtctr           %r6                     \n"
00985         "       lvx             %v1,  %r9,  %r4         \n"
00986         "       lvx             %v9,  0,    %r4         \n"
00987         "       vavgub          %v8,  %v11, %v10        \n"
00988         "       vxor            %v7,  %v11, %v10        \n"
00989         "       add             %r4,  %r4,  %r5         \n"
00990         "       vperm           %v10, %v9,  %v1,  %v17  \n"
00991         "       vperm           %v11, %v9,  %v1,  %v3   \n"
00992         "       vxor            %v5,  %v11, %v10        \n"
00993         "       vavgub          %v6,  %v11, %v10        \n"
00994         "       vor             %v1,  %v7,  %v5         \n"
00995         "       vxor            %v13, %v8,  %v6         \n"
00996         "       vand            %v1,  %v19, %v1         \n"
00997         "       vavgub          %v0,  %v8,  %v6         \n"
00998         "       vand            %v1,  %v1,  %v13        \n"
00999         "       vsububm         %v0,  %v0,  %v1         \n"
01000         "       vavgub          %v13, %v4,  %v0         \n"
01001         "._L81:                                         \n"
01002         "       li              %r9,  8                 \n"
01003         "       lvx             %v1,  %r9,  %r4         \n"
01004         "       lvx             %v9,  0,    %r4         \n"
01005         "       lvx             %v4,  %r5,  %r3         \n"
01006         "       stvewx          %v13, 0,    %r3         \n"
01007         "       vperm           %v10, %v9,  %v1,  %v18  \n"
01008         "       vperm           %v11, %v9,  %v1,  %v2   \n"
01009         "       li              %r9,  4                 \n"
01010         "       stvewx          %v13, %r9,  %r3         \n"
01011         "       vxor            %v7,  %v11, %v10        \n"
01012         "       add             %r4,  %r4,  %r5         \n"
01013         "       li              %r9,  8                 \n"
01014         "       vavgub          %v8,  %v11, %v10        \n"
01015         "       lvx             %v1,  %r9,  %r4         \n"
01016         "       vor             %v0,  %v7,  %v5         \n"
01017         "       lvx             %v9,  0,    %r4         \n"
01018         "       vxor            %v12, %v8,  %v6         \n"
01019         "       vand            %v0,  %v19, %v0         \n"
01020         "       add             %r3,  %r3,  %r5         \n"
01021         "       vperm           %v10, %v9,  %v1,  %v17  \n"
01022         "       vavgub          %v13, %v8,  %v6         \n"
01023         "       li              %r9,  4                 \n"
01024         "       vperm           %v11, %v9,  %v1,  %v3   \n"
01025         "       vand            %v0,  %v0,  %v12        \n"
01026         "       add             %r4,  %r4,  %r5         \n"
01027         "       vxor            %v5,  %v11, %v10        \n"
01028         "       vavgub          %v6,  %v11, %v10        \n"
01029         "       vor             %v1,  %v7,  %v5         \n"
01030         "       vsububm         %v13, %v13, %v0         \n"
01031         "       vxor            %v0,  %v8,  %v6         \n"
01032         "       vand            %v1,  %v19, %v1         \n"
01033         "       vavgub          %v13, %v4,  %v13        \n"
01034         "       vand            %v1,  %v1,  %v0         \n"
01035         "       lvx             %v4,  %r5,  %r3         \n"
01036         "       vavgub          %v0,  %v8,  %v6         \n"
01037         "       stvewx          %v13, 0,    %r3         \n"
01038         "       stvewx          %v13, %r9,  %r3         \n"
01039         "       vsububm         %v0,  %v0,  %v1         \n"
01040         "       add             %r3,  %r3,  %r5         \n"
01041         "       vavgub          %v13, %v4,  %v0         \n"
01042         "       bdnz            ._L81                   \n"
01043         "       li              %r9,  8                 \n"
01044         "       lvx             %v1,  %r9,  %r4         \n"
01045         "       lvx             %v9,  0,    %r4         \n"
01046         "       lvx             %v4,  %r5,  %r3         \n"
01047         "       vperm           %v10, %v9,  %v1,  %v18  \n"
01048         "       stvewx          %v13, 0,    %r3         \n"
01049         "       vperm           %v11, %v9,  %v1,  %v2   \n"
01050         "       li              %r9,  4                 \n"
01051         "       stvewx          %v13, %r9,  %r3         \n"
01052         "       vxor            %v7,  %v11, %v10        \n"
01053         "       add             %r3,  %r3,  %r5         \n"
01054         "       vavgub          %v8,  %v11, %v10        \n"
01055         "       vor             %v0,  %v7,  %v5         \n"
01056         "       vxor            %v13, %v8,  %v6         \n"
01057         "       vand            %v0,  %v19, %v0         \n"
01058         "       vavgub          %v1,  %v8,  %v6         \n"
01059         "       vand            %v0,  %v0,  %v13        \n"
01060         "       vsububm         %v1,  %v1,  %v0         \n"
01061         "       vavgub          %v13, %v4,  %v1         \n"
01062         "       stvewx          %v13, 0,    %r3         \n"
01063         "       stvewx          %v13, %r9,  %r3         \n"
01064          );
01065 }
01066 
01067 MPEG2_MC_EXTERN (altivec)
01068 
01069 #endif  /* ARCH_PPC */
01070 
01071 #else   /* __ALTIVEC__ */
01072 
01073 #define vector_s16_t vector signed short
01074 #define vector_u16_t vector unsigned short
01075 #define vector_s8_t vector signed char
01076 #define vector_u8_t vector unsigned char
01077 #define vector_s32_t vector signed int
01078 #define vector_u32_t vector unsigned int
01079 
01080 void MC_put_o_16_altivec (unsigned char * dest, const unsigned char * ref,
01081                           const int stride, int height)
01082 {
01083     vector_u8_t perm, ref0, ref1, tmp;
01084 
01085     perm = vec_lvsl (0, ref);
01086 
01087     height = (height >> 1) - 1;
01088 
01089     ref0 = vec_ld (0, ref);
01090     ref1 = vec_ld (15, ref);
01091     ref += stride;
01092     tmp = vec_perm (ref0, ref1, perm);
01093 
01094     do {
01095         ref0 = vec_ld (0, ref);
01096         ref1 = vec_ld (15, ref);
01097         ref += stride;
01098         vec_st (tmp, 0, dest);
01099         tmp = vec_perm (ref0, ref1, perm);
01100 
01101         ref0 = vec_ld (0, ref);
01102         ref1 = vec_ld (15, ref);
01103         ref += stride;
01104         vec_st (tmp, stride, dest);
01105         dest += 2*stride;
01106         tmp = vec_perm (ref0, ref1, perm);
01107     } while (--height);
01108 
01109     ref0 = vec_ld (0, ref);
01110     ref1 = vec_ld (15, ref);
01111     vec_st (tmp, 0, dest);
01112     tmp = vec_perm (ref0, ref1, perm);
01113     vec_st (tmp, stride, dest);
01114 }
01115 
01116 void MC_put_o_8_altivec (unsigned char * dest, const unsigned char * ref,
01117                          const int stride, int height)
01118 {
01119     vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1;
01120 
01121     tmp0 = vec_lvsl (0, ref);
01122     tmp0 = vec_mergeh (tmp0, tmp0);
01123     perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
01124     tmp1 = vec_lvsl (stride, ref);
01125     tmp1 = vec_mergeh (tmp1, tmp1);
01126     perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
01127 
01128     height = (height >> 1) - 1;
01129 
01130     ref0 = vec_ld (0, ref);
01131     ref1 = vec_ld (7, ref);
01132     ref += stride;
01133     tmp0 = vec_perm (ref0, ref1, perm0);
01134 
01135     do {
01136         ref0 = vec_ld (0, ref);
01137         ref1 = vec_ld (7, ref);
01138         ref += stride;
01139         vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01140         vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01141         dest += stride;
01142         tmp1 = vec_perm (ref0, ref1, perm1);
01143 
01144         ref0 = vec_ld (0, ref);
01145         ref1 = vec_ld (7, ref);
01146         ref += stride;
01147         vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01148         vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01149         dest += stride;
01150         tmp0 = vec_perm (ref0, ref1, perm0);
01151     } while (--height);
01152 
01153     ref0 = vec_ld (0, ref);
01154     ref1 = vec_ld (7, ref);
01155     vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01156     vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01157     dest += stride;
01158     tmp1 = vec_perm (ref0, ref1, perm1);
01159     vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01160     vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01161 }
01162 
01163 void MC_put_x_16_altivec (unsigned char * dest, const unsigned char * ref,
01164                           const int stride, int height)
01165 {
01166     vector_u8_t permA, permB, ref0, ref1, tmp;
01167 
01168     permA = vec_lvsl (0, ref);
01169     permB = vec_add (permA, vec_splat_u8 (1));
01170 
01171     height = (height >> 1) - 1;
01172 
01173     ref0 = vec_ld (0, ref);
01174     ref1 = vec_ld (16, ref);
01175     ref += stride;
01176     tmp = vec_avg (vec_perm (ref0, ref1, permA),
01177                    vec_perm (ref0, ref1, permB));
01178 
01179     do {
01180         ref0 = vec_ld (0, ref);
01181         ref1 = vec_ld (16, ref);
01182         ref += stride;
01183         vec_st (tmp, 0, dest);
01184         tmp = vec_avg (vec_perm (ref0, ref1, permA),
01185                        vec_perm (ref0, ref1, permB));
01186 
01187         ref0 = vec_ld (0, ref);
01188         ref1 = vec_ld (16, ref);
01189         ref += stride;
01190         vec_st (tmp, stride, dest);
01191         dest += 2*stride;
01192         tmp = vec_avg (vec_perm (ref0, ref1, permA),
01193                        vec_perm (ref0, ref1, permB));
01194     } while (--height);
01195 
01196     ref0 = vec_ld (0, ref);
01197     ref1 = vec_ld (16, ref);
01198     vec_st (tmp, 0, dest);
01199     tmp = vec_avg (vec_perm (ref0, ref1, permA),
01200                    vec_perm (ref0, ref1, permB));
01201     vec_st (tmp, stride, dest);
01202 }
01203 
01204 void MC_put_x_8_altivec (unsigned char * dest, const unsigned char * ref,
01205                          const int stride, int height)
01206 {
01207     vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
01208 
01209     ones = vec_splat_u8 (1);
01210     tmp0 = vec_lvsl (0, ref);
01211     tmp0 = vec_mergeh (tmp0, tmp0);
01212     perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
01213     perm0B = vec_add (perm0A, ones);
01214     tmp1 = vec_lvsl (stride, ref);
01215     tmp1 = vec_mergeh (tmp1, tmp1);
01216     perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
01217     perm1B = vec_add (perm1A, ones);
01218 
01219     height = (height >> 1) - 1;
01220 
01221     ref0 = vec_ld (0, ref);
01222     ref1 = vec_ld (8, ref);
01223     ref += stride;
01224     tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
01225                     vec_perm (ref0, ref1, perm0B));
01226 
01227     do {
01228         ref0 = vec_ld (0, ref);
01229         ref1 = vec_ld (8, ref);
01230         ref += stride;
01231         vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01232         vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01233         dest += stride;
01234         tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
01235                         vec_perm (ref0, ref1, perm1B));
01236 
01237         ref0 = vec_ld (0, ref);
01238         ref1 = vec_ld (8, ref);
01239         ref += stride;
01240         vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01241         vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01242         dest += stride;
01243         tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
01244                         vec_perm (ref0, ref1, perm0B));
01245     } while (--height);
01246 
01247     ref0 = vec_ld (0, ref);
01248     ref1 = vec_ld (8, ref);
01249     vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01250     vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01251     dest += stride;
01252     tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
01253                     vec_perm (ref0, ref1, perm1B));
01254     vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01255     vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01256 }
01257 
01258 void MC_put_y_16_altivec (unsigned char * dest, const unsigned char * ref,
01259                           const int stride, int height)
01260 {
01261     vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp;
01262 
01263     perm = vec_lvsl (0, ref);
01264 
01265     height = (height >> 1) - 1;
01266 
01267     ref0 = vec_ld (0, ref);
01268     ref1 = vec_ld (15, ref);
01269     ref += stride;
01270     tmp0 = vec_perm (ref0, ref1, perm);
01271     ref0 = vec_ld (0, ref);
01272     ref1 = vec_ld (15, ref);
01273     ref += stride;
01274     tmp1 = vec_perm (ref0, ref1, perm);
01275     tmp = vec_avg (tmp0, tmp1);
01276 
01277     do {
01278         ref0 = vec_ld (0, ref);
01279         ref1 = vec_ld (15, ref);
01280         ref += stride;
01281         vec_st (tmp, 0, dest);
01282         tmp0 = vec_perm (ref0, ref1, perm);
01283         tmp = vec_avg (tmp0, tmp1);
01284 
01285         ref0 = vec_ld (0, ref);
01286         ref1 = vec_ld (15, ref);
01287         ref += stride;
01288         vec_st (tmp, stride, dest);
01289         dest += 2*stride;
01290         tmp1 = vec_perm (ref0, ref1, perm);
01291         tmp = vec_avg (tmp0, tmp1);
01292     } while (--height);
01293 
01294     ref0 = vec_ld (0, ref);
01295     ref1 = vec_ld (15, ref);
01296     vec_st (tmp, 0, dest);
01297     tmp0 = vec_perm (ref0, ref1, perm);
01298     tmp = vec_avg (tmp0, tmp1);
01299     vec_st (tmp, stride, dest);
01300 }
01301 
01302 void MC_put_y_8_altivec (unsigned char * dest, const unsigned char * ref,
01303                          const int stride, int height)
01304 {
01305     vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1;
01306 
01307     tmp0 = vec_lvsl (0, ref);
01308     tmp0 = vec_mergeh (tmp0, tmp0);
01309     perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
01310     tmp1 = vec_lvsl (stride, ref);
01311     tmp1 = vec_mergeh (tmp1, tmp1);
01312     perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
01313 
01314     height = (height >> 1) - 1;
01315 
01316     ref0 = vec_ld (0, ref);
01317     ref1 = vec_ld (7, ref);
01318     ref += stride;
01319     tmp0 = vec_perm (ref0, ref1, perm0);
01320     ref0 = vec_ld (0, ref);
01321     ref1 = vec_ld (7, ref);
01322     ref += stride;
01323     tmp1 = vec_perm (ref0, ref1, perm1);
01324     tmp = vec_avg (tmp0, tmp1);
01325 
01326     do {
01327         ref0 = vec_ld (0, ref);
01328         ref1 = vec_ld (7, ref);
01329         ref += stride;
01330         vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01331         vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01332         dest += stride;
01333         tmp0 = vec_perm (ref0, ref1, perm0);
01334         tmp = vec_avg (tmp0, tmp1);
01335 
01336         ref0 = vec_ld (0, ref);
01337         ref1 = vec_ld (7, ref);
01338         ref += stride;
01339         vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01340         vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01341         dest += stride;
01342         tmp1 = vec_perm (ref0, ref1, perm1);
01343         tmp = vec_avg (tmp0, tmp1);
01344     } while (--height);
01345 
01346     ref0 = vec_ld (0, ref);
01347     ref1 = vec_ld (7, ref);
01348     vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01349     vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01350     dest += stride;
01351     tmp0 = vec_perm (ref0, ref1, perm0);
01352     tmp = vec_avg (tmp0, tmp1);
01353     vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01354     vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01355 }
01356 
01357 void MC_put_xy_16_altivec (unsigned char * dest, const unsigned char * ref,
01358                            const int stride, int height)
01359 {
01360     vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
01361     vector_u8_t ones;
01362 
01363     ones = vec_splat_u8 (1);
01364     permA = vec_lvsl (0, ref);
01365     permB = vec_add (permA, ones);
01366 
01367     height = (height >> 1) - 1;
01368 
01369     ref0 = vec_ld (0, ref);
01370     ref1 = vec_ld (16, ref);
01371     ref += stride;
01372     A = vec_perm (ref0, ref1, permA);
01373     B = vec_perm (ref0, ref1, permB);
01374     avg0 = vec_avg (A, B);
01375     xor0 = vec_xor (A, B);
01376 
01377     ref0 = vec_ld (0, ref);
01378     ref1 = vec_ld (16, ref);
01379     ref += stride;
01380     A = vec_perm (ref0, ref1, permA);
01381     B = vec_perm (ref0, ref1, permB);
01382     avg1 = vec_avg (A, B);
01383     xor1 = vec_xor (A, B);
01384     tmp = vec_sub (vec_avg (avg0, avg1),
01385                    vec_and (vec_and (ones, vec_or (xor0, xor1)),
01386                             vec_xor (avg0, avg1)));
01387 
01388     do {
01389         ref0 = vec_ld (0, ref);
01390         ref1 = vec_ld (16, ref);
01391         ref += stride;
01392         vec_st (tmp, 0, dest);
01393         A = vec_perm (ref0, ref1, permA);
01394         B = vec_perm (ref0, ref1, permB);
01395         avg0 = vec_avg (A, B);
01396         xor0 = vec_xor (A, B);
01397         tmp = vec_sub (vec_avg (avg0, avg1),
01398                        vec_and (vec_and (ones, vec_or (xor0, xor1)),
01399                                 vec_xor (avg0, avg1)));
01400 
01401         ref0 = vec_ld (0, ref);
01402         ref1 = vec_ld (16, ref);
01403         ref += stride;
01404         vec_st (tmp, stride, dest);
01405         dest += 2*stride;
01406         A = vec_perm (ref0, ref1, permA);
01407         B = vec_perm (ref0, ref1, permB);
01408         avg1 = vec_avg (A, B);
01409         xor1 = vec_xor (A, B);
01410         tmp = vec_sub (vec_avg (avg0, avg1),
01411                        vec_and (vec_and (ones, vec_or (xor0, xor1)),
01412                                 vec_xor (avg0, avg1)));
01413     } while (--height);
01414 
01415     ref0 = vec_ld (0, ref);
01416     ref1 = vec_ld (16, ref);
01417     vec_st (tmp, 0, dest);
01418     A = vec_perm (ref0, ref1, permA);
01419     B = vec_perm (ref0, ref1, permB);
01420     avg0 = vec_avg (A, B);
01421     xor0 = vec_xor (A, B);
01422     tmp = vec_sub (vec_avg (avg0, avg1),
01423                    vec_and (vec_and (ones, vec_or (xor0, xor1)),
01424                             vec_xor (avg0, avg1)));
01425     vec_st (tmp, stride, dest);
01426 }
01427 
01428 void MC_put_xy_8_altivec (unsigned char * dest, const unsigned char * ref,
01429                           const int stride, int height)
01430 {
01431     vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
01432     vector_u8_t avg0, avg1, xor0, xor1, tmp, ones;
01433 
01434     ones = vec_splat_u8 (1);
01435     perm0A = vec_lvsl (0, ref);
01436     perm0A = vec_mergeh (perm0A, perm0A);
01437     perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
01438     perm0B = vec_add (perm0A, ones);
01439     perm1A = vec_lvsl (stride, ref);
01440     perm1A = vec_mergeh (perm1A, perm1A);
01441     perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
01442     perm1B = vec_add (perm1A, ones);
01443 
01444     height = (height >> 1) - 1;
01445 
01446     ref0 = vec_ld (0, ref);
01447     ref1 = vec_ld (8, ref);
01448     ref += stride;
01449     A = vec_perm (ref0, ref1, perm0A);
01450     B = vec_perm (ref0, ref1, perm0B);
01451     avg0 = vec_avg (A, B);
01452     xor0 = vec_xor (A, B);
01453 
01454     ref0 = vec_ld (0, ref);
01455     ref1 = vec_ld (8, ref);
01456     ref += stride;
01457     A = vec_perm (ref0, ref1, perm1A);
01458     B = vec_perm (ref0, ref1, perm1B);
01459     avg1 = vec_avg (A, B);
01460     xor1 = vec_xor (A, B);
01461     tmp = vec_sub (vec_avg (avg0, avg1),
01462                    vec_and (vec_and (ones, vec_or (xor0, xor1)),
01463                             vec_xor (avg0, avg1)));
01464 
01465     do {
01466         ref0 = vec_ld (0, ref);
01467         ref1 = vec_ld (8, ref);
01468         ref += stride;
01469         vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01470         vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01471         dest += stride;
01472         A = vec_perm (ref0, ref1, perm0A);
01473         B = vec_perm (ref0, ref1, perm0B);
01474         avg0 = vec_avg (A, B);
01475         xor0 = vec_xor (A, B);
01476         tmp = vec_sub (vec_avg (avg0, avg1),
01477                        vec_and (vec_and (ones, vec_or (xor0, xor1)),
01478                                 vec_xor (avg0, avg1)));
01479 
01480         ref0 = vec_ld (0, ref);
01481         ref1 = vec_ld (8, ref);
01482         ref += stride;
01483         vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01484         vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01485         dest += stride;
01486         A = vec_perm (ref0, ref1, perm1A);
01487         B = vec_perm (ref0, ref1, perm1B);
01488         avg1 = vec_avg (A, B);
01489         xor1 = vec_xor (A, B);
01490         tmp = vec_sub (vec_avg (avg0, avg1),
01491                        vec_and (vec_and (ones, vec_or (xor0, xor1)),
01492                                 vec_xor (avg0, avg1)));
01493     } while (--height);
01494 
01495     ref0 = vec_ld (0, ref);
01496     ref1 = vec_ld (8, ref);
01497     vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01498     vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01499     dest += stride;
01500     A = vec_perm (ref0, ref1, perm0A);
01501     B = vec_perm (ref0, ref1, perm0B);
01502     avg0 = vec_avg (A, B);
01503     xor0 = vec_xor (A, B);
01504     tmp = vec_sub (vec_avg (avg0, avg1),
01505                    vec_and (vec_and (ones, vec_or (xor0, xor1)),
01506                             vec_xor (avg0, avg1)));
01507     vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01508     vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01509 }
01510 
01511 #if 0
01512 void MC_put_xy_8_altivec (unsigned char * dest, const unsigned char * ref,
01513                           const int stride, int height)
01514 {
01515     vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones;
01516     vector_u16_t splat2, temp;
01517 
01518     ones = vec_splat_u8 (1);
01519     permA = vec_lvsl (0, ref);
01520     permB = vec_add (permA, ones);
01521 
01522     zero = vec_splat_u8 (0);
01523     splat2 = vec_splat_u16 (2);
01524 
01525     do {
01526         ref0 = vec_ld (0, ref);
01527         ref1 = vec_ld (8, ref);
01528         ref += stride;
01529         A = vec_perm (ref0, ref1, permA);
01530         B = vec_perm (ref0, ref1, permB);
01531         ref0 = vec_ld (0, ref);
01532         ref1 = vec_ld (8, ref);
01533         C = vec_perm (ref0, ref1, permA);
01534         D = vec_perm (ref0, ref1, permB);
01535 
01536         temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A),
01537                                 (vector_u16_t)vec_mergeh (zero, B)),
01538                        vec_add ((vector_u16_t)vec_mergeh (zero, C),
01539                                 (vector_u16_t)vec_mergeh (zero, D)));
01540         temp = vec_sr (vec_add (temp, splat2), splat2);
01541         tmp = vec_pack (temp, temp);
01542 
01543         vec_st (tmp, 0, dest);
01544         dest += stride;
01545         tmp = vec_avg (vec_perm (ref0, ref1, permA),
01546                        vec_perm (ref0, ref1, permB));
01547     } while (--height);
01548 }
01549 #endif
01550 
01551 void MC_avg_o_16_altivec (unsigned char * dest, const unsigned char * ref,
01552                           const int stride, int height)
01553 {
01554     vector_u8_t perm, ref0, ref1, tmp, prev;
01555 
01556     perm = vec_lvsl (0, ref);
01557 
01558     height = (height >> 1) - 1;
01559 
01560     ref0 = vec_ld (0, ref);
01561     ref1 = vec_ld (15, ref);
01562     ref += stride;
01563     prev = vec_ld (0, dest);
01564     tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
01565 
01566     do {
01567         ref0 = vec_ld (0, ref);
01568         ref1 = vec_ld (15, ref);
01569         ref += stride;
01570         prev = vec_ld (stride, dest);
01571         vec_st (tmp, 0, dest);
01572         tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
01573 
01574         ref0 = vec_ld (0, ref);
01575         ref1 = vec_ld (15, ref);
01576         ref += stride;
01577         prev = vec_ld (2*stride, dest);
01578         vec_st (tmp, stride, dest);
01579         dest += 2*stride;
01580         tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
01581     } while (--height);
01582 
01583     ref0 = vec_ld (0, ref);
01584     ref1 = vec_ld (15, ref);
01585     prev = vec_ld (stride, dest);
01586     vec_st (tmp, 0, dest);
01587     tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
01588     vec_st (tmp, stride, dest);
01589 }
01590 
01591 void MC_avg_o_8_altivec (unsigned char * dest, const unsigned char * ref,
01592                          const int stride, int height)
01593 {
01594     vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev;
01595 
01596     tmp0 = vec_lvsl (0, ref);
01597     tmp0 = vec_mergeh (tmp0, tmp0);
01598     perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
01599     tmp1 = vec_lvsl (stride, ref);
01600     tmp1 = vec_mergeh (tmp1, tmp1);
01601     perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
01602 
01603     height = (height >> 1) - 1;
01604 
01605     ref0 = vec_ld (0, ref);
01606     ref1 = vec_ld (7, ref);
01607     ref += stride;
01608     prev = vec_ld (0, dest);
01609     tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
01610 
01611     do {
01612         ref0 = vec_ld (0, ref);
01613         ref1 = vec_ld (7, ref);
01614         ref += stride;
01615         prev = vec_ld (stride, dest);
01616         vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01617         vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01618         dest += stride;
01619         tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
01620 
01621         ref0 = vec_ld (0, ref);
01622         ref1 = vec_ld (7, ref);
01623         ref += stride;
01624         prev = vec_ld (stride, dest);
01625         vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01626         vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01627         dest += stride;
01628         tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
01629     } while (--height);
01630 
01631     ref0 = vec_ld (0, ref);
01632     ref1 = vec_ld (7, ref);
01633     prev = vec_ld (stride, dest);
01634     vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01635     vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01636     dest += stride;
01637     tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
01638     vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01639     vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01640 }
01641 
01642 void MC_avg_x_16_altivec (unsigned char * dest, const unsigned char * ref,
01643                           const int stride, int height)
01644 {
01645     vector_u8_t permA, permB, ref0, ref1, tmp, prev;
01646 
01647     permA = vec_lvsl (0, ref);
01648     permB = vec_add (permA, vec_splat_u8 (1));
01649 
01650     height = (height >> 1) - 1;
01651 
01652     ref0 = vec_ld (0, ref);
01653     ref1 = vec_ld (16, ref);
01654     prev = vec_ld (0, dest);
01655     ref += stride;
01656     tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
01657                                   vec_perm (ref0, ref1, permB)));
01658 
01659     do {
01660         ref0 = vec_ld (0, ref);
01661         ref1 = vec_ld (16, ref);
01662         ref += stride;
01663         prev = vec_ld (stride, dest);
01664         vec_st (tmp, 0, dest);
01665         tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
01666                                       vec_perm (ref0, ref1, permB)));
01667 
01668         ref0 = vec_ld (0, ref);
01669         ref1 = vec_ld (16, ref);
01670         ref += stride;
01671         prev = vec_ld (2*stride, dest);
01672         vec_st (tmp, stride, dest);
01673         dest += 2*stride;
01674         tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
01675                                       vec_perm (ref0, ref1, permB)));
01676     } while (--height);
01677 
01678     ref0 = vec_ld (0, ref);
01679     ref1 = vec_ld (16, ref);
01680     prev = vec_ld (stride, dest);
01681     vec_st (tmp, 0, dest);
01682     tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
01683                                   vec_perm (ref0, ref1, permB)));
01684     vec_st (tmp, stride, dest);
01685 }
01686 
01687 void MC_avg_x_8_altivec (unsigned char * dest, const unsigned char * ref,
01688                          const int stride, int height)
01689 {
01690     vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
01691     vector_u8_t prev;
01692 
01693     ones = vec_splat_u8 (1);
01694     tmp0 = vec_lvsl (0, ref);
01695     tmp0 = vec_mergeh (tmp0, tmp0);
01696     perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
01697     perm0B = vec_add (perm0A, ones);
01698     tmp1 = vec_lvsl (stride, ref);
01699     tmp1 = vec_mergeh (tmp1, tmp1);
01700     perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
01701     perm1B = vec_add (perm1A, ones);
01702 
01703     height = (height >> 1) - 1;
01704 
01705     ref0 = vec_ld (0, ref);
01706     ref1 = vec_ld (8, ref);
01707     prev = vec_ld (0, dest);
01708     ref += stride;
01709     tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
01710                                    vec_perm (ref0, ref1, perm0B)));
01711 
01712     do {
01713         ref0 = vec_ld (0, ref);
01714         ref1 = vec_ld (8, ref);
01715         ref += stride;
01716         prev = vec_ld (stride, dest);
01717         vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01718         vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01719         dest += stride;
01720         tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
01721                                        vec_perm (ref0, ref1, perm1B)));
01722 
01723         ref0 = vec_ld (0, ref);
01724         ref1 = vec_ld (8, ref);
01725         ref += stride;
01726         prev = vec_ld (stride, dest);
01727         vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01728         vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01729         dest += stride;
01730         tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
01731                                        vec_perm (ref0, ref1, perm0B)));
01732     } while (--height);
01733 
01734     ref0 = vec_ld (0, ref);
01735     ref1 = vec_ld (8, ref);
01736     prev = vec_ld (stride, dest);
01737     vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01738     vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01739     dest += stride;
01740     tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
01741                                    vec_perm (ref0, ref1, perm1B)));
01742     vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01743     vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01744 }
01745 
01746 void MC_avg_y_16_altivec (unsigned char * dest, const unsigned char * ref,
01747                           const int stride, int height)
01748 {
01749     vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev;
01750 
01751     perm = vec_lvsl (0, ref);
01752 
01753     height = (height >> 1) - 1;
01754 
01755     ref0 = vec_ld (0, ref);
01756     ref1 = vec_ld (15, ref);
01757     ref += stride;
01758     tmp0 = vec_perm (ref0, ref1, perm);
01759     ref0 = vec_ld (0, ref);
01760     ref1 = vec_ld (15, ref);
01761     ref += stride;
01762     prev = vec_ld (0, dest);
01763     tmp1 = vec_perm (ref0, ref1, perm);
01764     tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01765 
01766     do {
01767         ref0 = vec_ld (0, ref);
01768         ref1 = vec_ld (15, ref);
01769         ref += stride;
01770         prev = vec_ld (stride, dest);
01771         vec_st (tmp, 0, dest);
01772         tmp0 = vec_perm (ref0, ref1, perm);
01773         tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01774 
01775         ref0 = vec_ld (0, ref);
01776         ref1 = vec_ld (15, ref);
01777         ref += stride;
01778         prev = vec_ld (2*stride, dest);
01779         vec_st (tmp, stride, dest);
01780         dest += 2*stride;
01781         tmp1 = vec_perm (ref0, ref1, perm);
01782         tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01783     } while (--height);
01784 
01785     ref0 = vec_ld (0, ref);
01786     ref1 = vec_ld (15, ref);
01787     prev = vec_ld (stride, dest);
01788     vec_st (tmp, 0, dest);
01789     tmp0 = vec_perm (ref0, ref1, perm);
01790     tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01791     vec_st (tmp, stride, dest);
01792 }
01793 
01794 void MC_avg_y_8_altivec (unsigned char * dest, const unsigned char * ref,
01795                          const int stride, int height)
01796 {
01797     vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev;
01798 
01799     tmp0 = vec_lvsl (0, ref);
01800     tmp0 = vec_mergeh (tmp0, tmp0);
01801     perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
01802     tmp1 = vec_lvsl (stride, ref);
01803     tmp1 = vec_mergeh (tmp1, tmp1);
01804     perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
01805 
01806     height = (height >> 1) - 1;
01807 
01808     ref0 = vec_ld (0, ref);
01809     ref1 = vec_ld (7, ref);
01810     ref += stride;
01811     tmp0 = vec_perm (ref0, ref1, perm0);
01812     ref0 = vec_ld (0, ref);
01813     ref1 = vec_ld (7, ref);
01814     ref += stride;
01815     prev = vec_ld (0, dest);
01816     tmp1 = vec_perm (ref0, ref1, perm1);
01817     tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01818 
01819     do {
01820         ref0 = vec_ld (0, ref);
01821         ref1 = vec_ld (7, ref);
01822         ref += stride;
01823         prev = vec_ld (stride, dest);
01824         vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01825         vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01826         dest += stride;
01827         tmp0 = vec_perm (ref0, ref1, perm0);
01828         tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01829 
01830         ref0 = vec_ld (0, ref);
01831         ref1 = vec_ld (7, ref);
01832         ref += stride;
01833         prev = vec_ld (stride, dest);
01834         vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01835         vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01836         dest += stride;
01837         tmp1 = vec_perm (ref0, ref1, perm1);
01838         tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01839     } while (--height);
01840 
01841     ref0 = vec_ld (0, ref);
01842     ref1 = vec_ld (7, ref);
01843     prev = vec_ld (stride, dest);
01844     vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01845     vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01846     dest += stride;
01847     tmp0 = vec_perm (ref0, ref1, perm0);
01848     tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01849     vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01850     vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01851 }
01852 
01853 void MC_avg_xy_16_altivec (unsigned char * dest, const unsigned char * ref,
01854                            const int stride, int height)
01855 {
01856     vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
01857     vector_u8_t ones, prev;
01858 
01859     ones = vec_splat_u8 (1);
01860     permA = vec_lvsl (0, ref);
01861     permB = vec_add (permA, ones);
01862 
01863     height = (height >> 1) - 1;
01864 
01865     ref0 = vec_ld (0, ref);
01866     ref1 = vec_ld (16, ref);
01867     ref += stride;
01868     A = vec_perm (ref0, ref1, permA);
01869     B = vec_perm (ref0, ref1, permB);
01870     avg0 = vec_avg (A, B);
01871     xor0 = vec_xor (A, B);
01872 
01873     ref0 = vec_ld (0, ref);
01874     ref1 = vec_ld (16, ref);
01875     ref += stride;
01876     prev = vec_ld (0, dest);
01877     A = vec_perm (ref0, ref1, permA);
01878     B = vec_perm (ref0, ref1, permB);
01879     avg1 = vec_avg (A, B);
01880     xor1 = vec_xor (A, B);
01881     tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
01882                                   vec_and (vec_and (ones, vec_or (xor0, xor1)),
01883                                            vec_xor (avg0, avg1))));
01884 
01885     do {
01886         ref0 = vec_ld (0, ref);
01887         ref1 = vec_ld (16, ref);
01888         ref += stride;
01889         prev = vec_ld (stride, dest);
01890         vec_st (tmp, 0, dest);
01891         A = vec_perm (ref0, ref1, permA);
01892         B = vec_perm (ref0, ref1, permB);
01893         avg0 = vec_avg (A, B);
01894         xor0 = vec_xor (A, B);
01895         tmp = vec_avg (prev,
01896                        vec_sub (vec_avg (avg0, avg1),
01897                                 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01898                                          vec_xor (avg0, avg1))));
01899 
01900         ref0 = vec_ld (0, ref);
01901         ref1 = vec_ld (16, ref);
01902         ref += stride;
01903         prev = vec_ld (2*stride, dest);
01904         vec_st (tmp, stride, dest);
01905         dest += 2*stride;
01906         A = vec_perm (ref0, ref1, permA);
01907         B = vec_perm (ref0, ref1, permB);
01908         avg1 = vec_avg (A, B);
01909         xor1 = vec_xor (A, B);
01910         tmp = vec_avg (prev,
01911                        vec_sub (vec_avg (avg0, avg1),
01912                                 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01913                                          vec_xor (avg0, avg1))));
01914     } while (--height);
01915 
01916     ref0 = vec_ld (0, ref);
01917     ref1 = vec_ld (16, ref);
01918     prev = vec_ld (stride, dest);
01919     vec_st (tmp, 0, dest);
01920     A = vec_perm (ref0, ref1, permA);
01921     B = vec_perm (ref0, ref1, permB);
01922     avg0 = vec_avg (A, B);
01923     xor0 = vec_xor (A, B);
01924     tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
01925                                   vec_and (vec_and (ones, vec_or (xor0, xor1)),
01926                                            vec_xor (avg0, avg1))));
01927     vec_st (tmp, stride, dest);
01928 }
01929 
01930 void MC_avg_xy_8_altivec (unsigned char * dest, const unsigned char * ref,
01931                           const int stride, int height)
01932 {
01933     vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
01934     vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev;
01935 
01936     ones = vec_splat_u8 (1);
01937     perm0A = vec_lvsl (0, ref);
01938     perm0A = vec_mergeh (perm0A, perm0A);
01939     perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
01940     perm0B = vec_add (perm0A, ones);
01941     perm1A = vec_lvsl (stride, ref);
01942     perm1A = vec_mergeh (perm1A, perm1A);
01943     perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
01944     perm1B = vec_add (perm1A, ones);
01945 
01946     height = (height >> 1) - 1;
01947 
01948     ref0 = vec_ld (0, ref);
01949     ref1 = vec_ld (8, ref);
01950     ref += stride;
01951     A = vec_perm (ref0, ref1, perm0A);
01952     B = vec_perm (ref0, ref1, perm0B);
01953     avg0 = vec_avg (A, B);
01954     xor0 = vec_xor (A, B);
01955 
01956     ref0 = vec_ld (0, ref);
01957     ref1 = vec_ld (8, ref);
01958     ref += stride;
01959     prev = vec_ld (0, dest);
01960     A = vec_perm (ref0, ref1, perm1A);
01961     B = vec_perm (ref0, ref1, perm1B);
01962     avg1 = vec_avg (A, B);
01963     xor1 = vec_xor (A, B);
01964     tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
01965                                   vec_and (vec_and (ones, vec_or (xor0, xor1)),
01966                                            vec_xor (avg0, avg1))));
01967 
01968     do {
01969         ref0 = vec_ld (0, ref);
01970         ref1 = vec_ld (8, ref);
01971         ref += stride;
01972         prev = vec_ld (stride, dest);
01973         vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01974         vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01975         dest += stride;
01976         A = vec_perm (ref0, ref1, perm0A);
01977         B = vec_perm (ref0, ref1, perm0B);
01978         avg0 = vec_avg (A, B);
01979         xor0 = vec_xor (A, B);
01980         tmp = vec_avg (prev,
01981                        vec_sub (vec_avg (avg0, avg1),
01982                                 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01983                                          vec_xor (avg0, avg1))));
01984 
01985         ref0 = vec_ld (0, ref);
01986         ref1 = vec_ld (8, ref);
01987         ref += stride;
01988         prev = vec_ld (stride, dest);
01989         vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01990         vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01991         dest += stride;
01992         A = vec_perm (ref0, ref1, perm1A);
01993         B = vec_perm (ref0, ref1, perm1B);
01994         avg1 = vec_avg (A, B);
01995         xor1 = vec_xor (A, B);
01996         tmp = vec_avg (prev,
01997                        vec_sub (vec_avg (avg0, avg1),
01998                                 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01999                                          vec_xor (avg0, avg1))));
02000     } while (--height);
02001 
02002     ref0 = vec_ld (0, ref);
02003     ref1 = vec_ld (8, ref);
02004     prev = vec_ld (stride, dest);
02005     vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
02006     vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
02007     dest += stride;
02008     A = vec_perm (ref0, ref1, perm0A);
02009     B = vec_perm (ref0, ref1, perm0B);
02010     avg0 = vec_avg (A, B);
02011     xor0 = vec_xor (A, B);
02012     tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
02013                                   vec_and (vec_and (ones, vec_or (xor0, xor1)),
02014                                            vec_xor (avg0, avg1))));
02015     vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
02016     vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
02017 }
02018 
02019 #endif  /* __ALTIVEC__ */
 

Powered by Plone

This site conforms to the following standards: