00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef __ALTIVEC__
00025
00026 #include "config.h"
00027
00028 #ifdef ARCH_PPC
00029
00030 #include <inttypes.h>
00031
00032 #include "mpeg2.h"
00033 #include "mpeg2_internal.h"
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049 static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref,
00050 int stride, int height)
00051 {
00052 asm (" \n"
00053 " srawi %r6, %r6, 1 \n"
00054 " li %r9, 15 \n"
00055 " addi %r6, %r6, -1 \n"
00056 " lvsl %v12, 0, %r4 \n"
00057 " mtctr %r6 \n"
00058 " lvx %v1, 0, %r4 \n"
00059 " lvx %v0, %r9, %r4 \n"
00060 " add %r0, %r5, %r5 \n"
00061 " vperm %v13, %v1, %v0, %v12 \n"
00062 " add %r4, %r4, %r5 \n"
00063 "._L6: \n"
00064 " li %r9, 15 \n"
00065 " lvx %v1, 0, %r4 \n"
00066 " lvx %v0, %r9, %r4 \n"
00067 " stvx %v13, 0, %r3 \n"
00068 " vperm %v13, %v1, %v0, %v12 \n"
00069 " add %r4, %r4, %r5 \n"
00070 " lvx %v1, 0, %r4 \n"
00071 " lvx %v0, %r9, %r4 \n"
00072 " stvx %v13, %r5, %r3 \n"
00073 " vperm %v13, %v1, %v0, %v12 \n"
00074 " add %r4, %r4, %r5 \n"
00075 " add %r3, %r3, %r0 \n"
00076 " bdnz ._L6 \n"
00077 " lvx %v0, %r9, %r4 \n"
00078 " lvx %v1, 0, %r4 \n"
00079 " stvx %v13, 0, %r3 \n"
00080 " vperm %v13, %v1, %v0, %v12 \n"
00081 " stvx %v13, %r5, %r3 \n"
00082 );
00083 }
00084
00085 static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref,
00086 int stride, int height)
00087 {
00088 asm (" \n"
00089 " lvsl %v12, 0, %r4 \n"
00090 " lvsl %v1, %r5, %r4 \n"
00091 " vmrghb %v12, %v12, %v12 \n"
00092 " srawi %r6, %r6, 1 \n"
00093 " li %r9, 7 \n"
00094 " vmrghb %v1, %v1, %v1 \n"
00095 " addi %r6, %r6, -1 \n"
00096 " vpkuhum %v10, %v12, %v12 \n"
00097 " lvx %v13, 0, %r4 \n"
00098 " mtctr %r6 \n"
00099 " vpkuhum %v11, %v1, %v1 \n"
00100 " lvx %v0, %r9, %r4 \n"
00101 " add %r4, %r4, %r5 \n"
00102 " vperm %v12, %v13, %v0, %v10 \n"
00103 "._L11: \n"
00104 " li %r9, 7 \n"
00105 " lvx %v0, %r9, %r4 \n"
00106 " lvx %v13, 0, %r4 \n"
00107 " stvewx %v12, 0, %r3 \n"
00108 " li %r9, 4 \n"
00109 " vperm %v1, %v13, %v0, %v11 \n"
00110 " stvewx %v12, %r9, %r3 \n"
00111 " add %r4, %r4, %r5 \n"
00112 " li %r9, 7 \n"
00113 " lvx %v0, %r9, %r4 \n"
00114 " lvx %v13, 0, %r4 \n"
00115 " add %r3, %r3, %r5 \n"
00116 " stvewx %v1, 0, %r3 \n"
00117 " vperm %v12, %v13, %v0, %v10 \n"
00118 " li %r9, 4 \n"
00119 " stvewx %v1, %r9, %r3 \n"
00120 " add %r4, %r4, %r5 \n"
00121 " add %r3, %r3, %r5 \n"
00122 " bdnz ._L11 \n"
00123 " li %r9, 7 \n"
00124 " lvx %v0, %r9, %r4 \n"
00125 " lvx %v13, 0, %r4 \n"
00126 " stvewx %v12, 0, %r3 \n"
00127 " li %r9, 4 \n"
00128 " vperm %v1, %v13, %v0, %v11 \n"
00129 " stvewx %v12, %r9, %r3 \n"
00130 " add %r3, %r3, %r5 \n"
00131 " stvewx %v1, 0, %r3 \n"
00132 " stvewx %v1, %r9, %r3 \n"
00133 );
00134 }
00135
00136 static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref,
00137 int stride, int height)
00138 {
00139 asm (" \n"
00140 " lvsl %v11, 0, %r4 \n"
00141 " vspltisb %v0, 1 \n"
00142 " li %r9, 16 \n"
00143 " lvx %v12, 0, %r4 \n"
00144 " vaddubm %v10, %v11, %v0 \n"
00145 " lvx %v13, %r9, %r4 \n"
00146 " srawi %r6, %r6, 1 \n"
00147 " addi %r6, %r6, -1 \n"
00148 " vperm %v1, %v12, %v13, %v10 \n"
00149 " vperm %v0, %v12, %v13, %v11 \n"
00150 " mtctr %r6 \n"
00151 " add %r0, %r5, %r5 \n"
00152 " add %r4, %r4, %r5 \n"
00153 " vavgub %v0, %v0, %v1 \n"
00154 "._L16: \n"
00155 " li %r9, 16 \n"
00156 " lvx %v12, 0, %r4 \n"
00157 " lvx %v13, %r9, %r4 \n"
00158 " stvx %v0, 0, %r3 \n"
00159 " vperm %v1, %v12, %v13, %v10 \n"
00160 " add %r4, %r4, %r5 \n"
00161 " vperm %v0, %v12, %v13, %v11 \n"
00162 " lvx %v12, 0, %r4 \n"
00163 " lvx %v13, %r9, %r4 \n"
00164 " vavgub %v0, %v0, %v1 \n"
00165 " stvx %v0, %r5, %r3 \n"
00166 " vperm %v1, %v12, %v13, %v10 \n"
00167 " add %r4, %r4, %r5 \n"
00168 " vperm %v0, %v12, %v13, %v11 \n"
00169 " add %r3, %r3, %r0 \n"
00170 " vavgub %v0, %v0, %v1 \n"
00171 " bdnz ._L16 \n"
00172 " lvx %v13, %r9, %r4 \n"
00173 " lvx %v12, 0, %r4 \n"
00174 " stvx %v0, 0, %r3 \n"
00175 " vperm %v1, %v12, %v13, %v10 \n"
00176 " vperm %v0, %v12, %v13, %v11 \n"
00177 " vavgub %v0, %v0, %v1 \n"
00178 " stvx %v0, %r5, %r3 \n"
00179 );
00180 }
00181
00182 static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref,
00183 int stride, int height)
00184 {
00185 asm (" \n"
00186 " lvsl %v0, 0, %r4 \n"
00187 " vspltisb %v13, 1 \n"
00188 " lvsl %v10, %r5, %r4 \n"
00189 " vmrghb %v0, %v0, %v0 \n"
00190 " li %r9, 8 \n"
00191 " lvx %v11, 0, %r4 \n"
00192 " vmrghb %v10, %v10, %v10 \n"
00193 " vpkuhum %v8, %v0, %v0 \n"
00194 " lvx %v12, %r9, %r4 \n"
00195 " srawi %r6, %r6, 1 \n"
00196 " vpkuhum %v9, %v10, %v10 \n"
00197 " vaddubm %v7, %v8, %v13 \n"
00198 " addi %r6, %r6, -1 \n"
00199 " vperm %v1, %v11, %v12, %v8 \n"
00200 " mtctr %r6 \n"
00201 " vaddubm %v13, %v9, %v13 \n"
00202 " add %r4, %r4, %r5 \n"
00203 " vperm %v0, %v11, %v12, %v7 \n"
00204 " vavgub %v0, %v1, %v0 \n"
00205 "._L21: \n"
00206 " li %r9, 8 \n"
00207 " lvx %v12, %r9, %r4 \n"
00208 " lvx %v11, 0, %r4 \n"
00209 " stvewx %v0, 0, %r3 \n"
00210 " li %r9, 4 \n"
00211 " vperm %v1, %v11, %v12, %v13 \n"
00212 " stvewx %v0, %r9, %r3 \n"
00213 " vperm %v0, %v11, %v12, %v9 \n"
00214 " add %r4, %r4, %r5 \n"
00215 " li %r9, 8 \n"
00216 " lvx %v12, %r9, %r4 \n"
00217 " vavgub %v10, %v0, %v1 \n"
00218 " lvx %v11, 0, %r4 \n"
00219 " add %r3, %r3, %r5 \n"
00220 " stvewx %v10, 0, %r3 \n"
00221 " vperm %v1, %v11, %v12, %v7 \n"
00222 " vperm %v0, %v11, %v12, %v8 \n"
00223 " li %r9, 4 \n"
00224 " stvewx %v10, %r9, %r3 \n"
00225 " add %r4, %r4, %r5 \n"
00226 " vavgub %v0, %v0, %v1 \n"
00227 " add %r3, %r3, %r5 \n"
00228 " bdnz ._L21 \n"
00229 " li %r9, 8 \n"
00230 " lvx %v12, %r9, %r4 \n"
00231 " lvx %v11, 0, %r4 \n"
00232 " stvewx %v0, 0, %r3 \n"
00233 " li %r9, 4 \n"
00234 " vperm %v1, %v11, %v12, %v13 \n"
00235 " stvewx %v0, %r9, %r3 \n"
00236 " vperm %v0, %v11, %v12, %v9 \n"
00237 " add %r3, %r3, %r5 \n"
00238 " vavgub %v10, %v0, %v1 \n"
00239 " stvewx %v10, 0, %r3 \n"
00240 " stvewx %v10, %r9, %r3 \n"
00241 );
00242 }
00243
00244 static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref,
00245 int stride, int height)
00246 {
00247 asm (" \n"
00248 " li %r9, 15 \n"
00249 " lvsl %v10, 0, %r4 \n"
00250 " lvx %v13, 0, %r4 \n"
00251 " lvx %v1, %r9, %r4 \n"
00252 " add %r4, %r4, %r5 \n"
00253 " vperm %v12, %v13, %v1, %v10 \n"
00254 " srawi %r6, %r6, 1 \n"
00255 " lvx %v13, 0, %r4 \n"
00256 " lvx %v1, %r9, %r4 \n"
00257 " addi %r6, %r6, -1 \n"
00258 " vperm %v11, %v13, %v1, %v10 \n"
00259 " mtctr %r6 \n"
00260 " add %r0, %r5, %r5 \n"
00261 " add %r4, %r4, %r5 \n"
00262 " vavgub %v0, %v12, %v11 \n"
00263 "._L26: \n"
00264 " li %r9, 15 \n"
00265 " lvx %v13, 0, %r4 \n"
00266 " lvx %v1, %r9, %r4 \n"
00267 " stvx %v0, 0, %r3 \n"
00268 " vperm %v12, %v13, %v1, %v10 \n"
00269 " add %r4, %r4, %r5 \n"
00270 " lvx %v13, 0, %r4 \n"
00271 " lvx %v1, %r9, %r4 \n"
00272 " vavgub %v0, %v12, %v11 \n"
00273 " stvx %v0, %r5, %r3 \n"
00274 " vperm %v11, %v13, %v1, %v10 \n"
00275 " add %r4, %r4, %r5 \n"
00276 " add %r3, %r3, %r0 \n"
00277 " vavgub %v0, %v12, %v11 \n"
00278 " bdnz ._L26 \n"
00279 " lvx %v1, %r9, %r4 \n"
00280 " lvx %v13, 0, %r4 \n"
00281 " stvx %v0, 0, %r3 \n"
00282 " vperm %v12, %v13, %v1, %v10 \n"
00283 " vavgub %v0, %v12, %v11 \n"
00284 " stvx %v0, %r5, %r3 \n"
00285 );
00286 }
00287
00288 static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref,
00289 int stride, int height)
00290 {
00291 asm (" \n"
00292 " lvsl %v13, 0, %r4 \n"
00293 " lvsl %v11, %r5, %r4 \n"
00294 " vmrghb %v13, %v13, %v13 \n"
00295 " li %r9, 7 \n"
00296 " lvx %v12, 0, %r4 \n"
00297 " vmrghb %v11, %v11, %v11 \n"
00298 " lvx %v1, %r9, %r4 \n"
00299 " vpkuhum %v9, %v13, %v13 \n"
00300 " add %r4, %r4, %r5 \n"
00301 " vpkuhum %v10, %v11, %v11 \n"
00302 " vperm %v13, %v12, %v1, %v9 \n"
00303 " srawi %r6, %r6, 1 \n"
00304 " lvx %v12, 0, %r4 \n"
00305 " lvx %v1, %r9, %r4 \n"
00306 " addi %r6, %r6, -1 \n"
00307 " vperm %v11, %v12, %v1, %v10 \n"
00308 " mtctr %r6 \n"
00309 " add %r4, %r4, %r5 \n"
00310 " vavgub %v0, %v13, %v11 \n"
00311 "._L31: \n"
00312 " li %r9, 7 \n"
00313 " lvx %v1, %r9, %r4 \n"
00314 " lvx %v12, 0, %r4 \n"
00315 " stvewx %v0, 0, %r3 \n"
00316 " li %r9, 4 \n"
00317 " vperm %v13, %v12, %v1, %v9 \n"
00318 " stvewx %v0, %r9, %r3 \n"
00319 " add %r4, %r4, %r5 \n"
00320 " vavgub %v0, %v13, %v11 \n"
00321 " li %r9, 7 \n"
00322 " lvx %v1, %r9, %r4 \n"
00323 " lvx %v12, 0, %r4 \n"
00324 " add %r3, %r3, %r5 \n"
00325 " stvewx %v0, 0, %r3 \n"
00326 " vperm %v11, %v12, %v1, %v10 \n"
00327 " li %r9, 4 \n"
00328 " stvewx %v0, %r9, %r3 \n"
00329 " vavgub %v0, %v13, %v11 \n"
00330 " add %r4, %r4, %r5 \n"
00331 " add %r3, %r3, %r5 \n"
00332 " bdnz ._L31 \n"
00333 " li %r9, 7 \n"
00334 " lvx %v1, %r9, %r4 \n"
00335 " lvx %v12, 0, %r4 \n"
00336 " stvewx %v0, 0, %r3 \n"
00337 " li %r9, 4 \n"
00338 " vperm %v13, %v12, %v1, %v9 \n"
00339 " stvewx %v0, %r9, %r3 \n"
00340 " add %r3, %r3, %r5 \n"
00341 " vavgub %v0, %v13, %v11 \n"
00342 " stvewx %v0, 0, %r3 \n"
00343 " stvewx %v0, %r9, %r3 \n"
00344 );
00345 }
00346
00347 static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
00348 int stride, int height)
00349 {
00350 asm (" \n"
00351 " lvsl %v5, 0, %r4 \n"
00352 " vspltisb %v3, 1 \n"
00353 " li %r9, 16 \n"
00354 " lvx %v1, 0, %r4 \n"
00355 " vaddubm %v4, %v5, %v3 \n"
00356 " lvx %v0, %r9, %r4 \n"
00357 " add %r4, %r4, %r5 \n"
00358 " vperm %v10, %v1, %v0, %v4 \n"
00359 " srawi %r6, %r6, 1 \n"
00360 " vperm %v11, %v1, %v0, %v5 \n"
00361 " addi %r6, %r6, -1 \n"
00362 " lvx %v1, 0, %r4 \n"
00363 " mtctr %r6 \n"
00364 " lvx %v0, %r9, %r4 \n"
00365 " vavgub %v9, %v11, %v10 \n"
00366 " vxor %v8, %v11, %v10 \n"
00367 " add %r0, %r5, %r5 \n"
00368 " vperm %v10, %v1, %v0, %v4 \n"
00369 " add %r4, %r4, %r5 \n"
00370 " vperm %v11, %v1, %v0, %v5 \n"
00371 " vxor %v6, %v11, %v10 \n"
00372 " vavgub %v7, %v11, %v10 \n"
00373 " vor %v0, %v8, %v6 \n"
00374 " vxor %v13, %v9, %v7 \n"
00375 " vand %v0, %v3, %v0 \n"
00376 " vavgub %v1, %v9, %v7 \n"
00377 " vand %v0, %v0, %v13 \n"
00378 " vsububm %v13, %v1, %v0 \n"
00379 "._L36: \n"
00380 " li %r9, 16 \n"
00381 " lvx %v1, 0, %r4 \n"
00382 " lvx %v0, %r9, %r4 \n"
00383 " stvx %v13, 0, %r3 \n"
00384 " vperm %v10, %v1, %v0, %v4 \n"
00385 " add %r4, %r4, %r5 \n"
00386 " vperm %v11, %v1, %v0, %v5 \n"
00387 " lvx %v1, 0, %r4 \n"
00388 " lvx %v0, %r9, %r4 \n"
00389 " vavgub %v9, %v11, %v10 \n"
00390 " vxor %v8, %v11, %v10 \n"
00391 " add %r4, %r4, %r5 \n"
00392 " vperm %v10, %v1, %v0, %v4 \n"
00393 " vavgub %v12, %v9, %v7 \n"
00394 " vperm %v11, %v1, %v0, %v5 \n"
00395 " vor %v13, %v8, %v6 \n"
00396 " vxor %v0, %v9, %v7 \n"
00397 " vxor %v6, %v11, %v10 \n"
00398 " vand %v13, %v3, %v13 \n"
00399 " vavgub %v7, %v11, %v10 \n"
00400 " vor %v1, %v8, %v6 \n"
00401 " vand %v13, %v13, %v0 \n"
00402 " vxor %v0, %v9, %v7 \n"
00403 " vand %v1, %v3, %v1 \n"
00404 " vsububm %v13, %v12, %v13 \n"
00405 " vand %v1, %v1, %v0 \n"
00406 " stvx %v13, %r5, %r3 \n"
00407 " vavgub %v0, %v9, %v7 \n"
00408 " add %r3, %r3, %r0 \n"
00409 " vsububm %v13, %v0, %v1 \n"
00410 " bdnz ._L36 \n"
00411 " lvx %v0, %r9, %r4 \n"
00412 " lvx %v1, 0, %r4 \n"
00413 " stvx %v13, 0, %r3 \n"
00414 " vperm %v10, %v1, %v0, %v4 \n"
00415 " vperm %v11, %v1, %v0, %v5 \n"
00416 " vxor %v8, %v11, %v10 \n"
00417 " vavgub %v9, %v11, %v10 \n"
00418 " vor %v0, %v8, %v6 \n"
00419 " vxor %v13, %v9, %v7 \n"
00420 " vand %v0, %v3, %v0 \n"
00421 " vavgub %v1, %v9, %v7 \n"
00422 " vand %v0, %v0, %v13 \n"
00423 " vsububm %v13, %v1, %v0 \n"
00424 " stvx %v13, %r5, %r3 \n"
00425 );
00426 }
00427
00428 static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
00429 int stride, int height)
00430 {
00431 asm (" \n"
00432 " lvsl %v4, 0, %r4 \n"
00433 " vspltisb %v3, 1 \n"
00434 " lvsl %v5, %r5, %r4 \n"
00435 " vmrghb %v4, %v4, %v4 \n"
00436 " li %r9, 8 \n"
00437 " vmrghb %v5, %v5, %v5 \n"
00438 " lvx %v1, 0, %r4 \n"
00439 " vpkuhum %v4, %v4, %v4 \n"
00440 " lvx %v0, %r9, %r4 \n"
00441 " vpkuhum %v5, %v5, %v5 \n"
00442 " add %r4, %r4, %r5 \n"
00443 " vaddubm %v2, %v4, %v3 \n"
00444 " vperm %v11, %v1, %v0, %v4 \n"
00445 " srawi %r6, %r6, 1 \n"
00446 " vaddubm %v19, %v5, %v3 \n"
00447 " addi %r6, %r6, -1 \n"
00448 " vperm %v10, %v1, %v0, %v2 \n"
00449 " mtctr %r6 \n"
00450 " lvx %v1, 0, %r4 \n"
00451 " lvx %v0, %r9, %r4 \n"
00452 " vavgub %v9, %v11, %v10 \n"
00453 " vxor %v8, %v11, %v10 \n"
00454 " add %r4, %r4, %r5 \n"
00455 " vperm %v10, %v1, %v0, %v19 \n"
00456 " vperm %v11, %v1, %v0, %v5 \n"
00457 " vxor %v6, %v11, %v10 \n"
00458 " vavgub %v7, %v11, %v10 \n"
00459 " vor %v0, %v8, %v6 \n"
00460 " vxor %v13, %v9, %v7 \n"
00461 " vand %v0, %v3, %v0 \n"
00462 " vavgub %v1, %v9, %v7 \n"
00463 " vand %v0, %v0, %v13 \n"
00464 " vsububm %v13, %v1, %v0 \n"
00465 "._L41: \n"
00466 " li %r9, 8 \n"
00467 " lvx %v0, %r9, %r4 \n"
00468 " lvx %v1, 0, %r4 \n"
00469 " stvewx %v13, 0, %r3 \n"
00470 " li %r9, 4 \n"
00471 " vperm %v10, %v1, %v0, %v2 \n"
00472 " stvewx %v13, %r9, %r3 \n"
00473 " vperm %v11, %v1, %v0, %v4 \n"
00474 " add %r4, %r4, %r5 \n"
00475 " li %r9, 8 \n"
00476 " vavgub %v9, %v11, %v10 \n"
00477 " lvx %v0, %r9, %r4 \n"
00478 " vxor %v8, %v11, %v10 \n"
00479 " lvx %v1, 0, %r4 \n"
00480 " vavgub %v12, %v9, %v7 \n"
00481 " vor %v13, %v8, %v6 \n"
00482 " add %r3, %r3, %r5 \n"
00483 " vperm %v10, %v1, %v0, %v19 \n"
00484 " li %r9, 4 \n"
00485 " vperm %v11, %v1, %v0, %v5 \n"
00486 " vand %v13, %v3, %v13 \n"
00487 " add %r4, %r4, %r5 \n"
00488 " vxor %v0, %v9, %v7 \n"
00489 " vxor %v6, %v11, %v10 \n"
00490 " vavgub %v7, %v11, %v10 \n"
00491 " vor %v1, %v8, %v6 \n"
00492 " vand %v13, %v13, %v0 \n"
00493 " vxor %v0, %v9, %v7 \n"
00494 " vand %v1, %v3, %v1 \n"
00495 " vsububm %v13, %v12, %v13 \n"
00496 " vand %v1, %v1, %v0 \n"
00497 " stvewx %v13, 0, %r3 \n"
00498 " vavgub %v0, %v9, %v7 \n"
00499 " stvewx %v13, %r9, %r3 \n"
00500 " add %r3, %r3, %r5 \n"
00501 " vsububm %v13, %v0, %v1 \n"
00502 " bdnz ._L41 \n"
00503 " li %r9, 8 \n"
00504 " lvx %v0, %r9, %r4 \n"
00505 " lvx %v1, 0, %r4 \n"
00506 " stvewx %v13, 0, %r3 \n"
00507 " vperm %v10, %v1, %v0, %v2 \n"
00508 " li %r9, 4 \n"
00509 " vperm %v11, %v1, %v0, %v4 \n"
00510 " stvewx %v13, %r9, %r3 \n"
00511 " add %r3, %r3, %r5 \n"
00512 " vxor %v8, %v11, %v10 \n"
00513 " vavgub %v9, %v11, %v10 \n"
00514 " vor %v0, %v8, %v6 \n"
00515 " vxor %v13, %v9, %v7 \n"
00516 " vand %v0, %v3, %v0 \n"
00517 " vavgub %v1, %v9, %v7 \n"
00518 " vand %v0, %v0, %v13 \n"
00519 " vsububm %v13, %v1, %v0 \n"
00520 " stvewx %v13, 0, %r3 \n"
00521 " stvewx %v13, %r9, %r3 \n"
00522 );
00523 }
00524
00525 static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref,
00526 int stride, int height)
00527 {
00528 asm (" \n"
00529 " li %r9, 15 \n"
00530 " lvx %v0, %r9, %r4 \n"
00531 " lvsl %v11, 0, %r4 \n"
00532 " lvx %v1, 0, %r4 \n"
00533 " srawi %r6, %r6, 1 \n"
00534 " addi %r6, %r6, -1 \n"
00535 " vperm %v0, %v1, %v0, %v11 \n"
00536 " lvx %v13, 0, %r3 \n"
00537 " mtctr %r6 \n"
00538 " add %r9, %r5, %r5 \n"
00539 " vavgub %v12, %v13, %v0 \n"
00540 " add %r4, %r4, %r5 \n"
00541 "._L46: \n"
00542 " li %r11, 15 \n"
00543 " lvx %v1, 0, %r4 \n"
00544 " lvx %v0, %r11, %r4 \n"
00545 " lvx %v13, %r5, %r3 \n"
00546 " vperm %v0, %v1, %v0, %v11 \n"
00547 " stvx %v12, 0, %r3 \n"
00548 " add %r4, %r4, %r5 \n"
00549 " vavgub %v12, %v13, %v0 \n"
00550 " lvx %v1, 0, %r4 \n"
00551 " lvx %v0, %r11, %r4 \n"
00552 " lvx %v13, %r9, %r3 \n"
00553 " vperm %v0, %v1, %v0, %v11 \n"
00554 " stvx %v12, %r5, %r3 \n"
00555 " add %r4, %r4, %r5 \n"
00556 " vavgub %v12, %v13, %v0 \n"
00557 " add %r3, %r3, %r9 \n"
00558 " bdnz ._L46 \n"
00559 " lvx %v0, %r11, %r4 \n"
00560 " lvx %v1, 0, %r4 \n"
00561 " lvx %v13, %r5, %r3 \n"
00562 " vperm %v0, %v1, %v0, %v11 \n"
00563 " stvx %v12, 0, %r3 \n"
00564 " vavgub %v12, %v13, %v0 \n"
00565 " stvx %v12, %r5, %r3 \n"
00566 );
00567 }
00568
00569 static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref,
00570 int stride, int height)
00571 {
00572 asm (" \n"
00573 " lvsl %v12, 0, %r4 \n"
00574 " li %r9, 7 \n"
00575 " vmrghb %v12, %v12, %v12 \n"
00576 " lvsl %v1, %r5, %r4 \n"
00577 " lvx %v13, 0, %r4 \n"
00578 " vpkuhum %v9, %v12, %v12 \n"
00579 " lvx %v0, %r9, %r4 \n"
00580 " srawi %r6, %r6, 1 \n"
00581 " vmrghb %v1, %v1, %v1 \n"
00582 " addi %r6, %r6, -1 \n"
00583 " vperm %v0, %v13, %v0, %v9 \n"
00584 " lvx %v11, 0, %r3 \n"
00585 " mtctr %r6 \n"
00586 " vpkuhum %v10, %v1, %v1 \n"
00587 " add %r4, %r4, %r5 \n"
00588 " vavgub %v12, %v11, %v0 \n"
00589 "._L51: \n"
00590 " li %r9, 7 \n"
00591 " lvx %v0, %r9, %r4 \n"
00592 " lvx %v13, 0, %r4 \n"
00593 " lvx %v11, %r5, %r3 \n"
00594 " stvewx %v12, 0, %r3 \n"
00595 " vperm %v0, %v13, %v0, %v10 \n"
00596 " li %r9, 4 \n"
00597 " stvewx %v12, %r9, %r3 \n"
00598 " vavgub %v1, %v11, %v0 \n"
00599 " add %r4, %r4, %r5 \n"
00600 " li %r9, 7 \n"
00601 " lvx %v0, %r9, %r4 \n"
00602 " add %r3, %r3, %r5 \n"
00603 " lvx %v13, 0, %r4 \n"
00604 " lvx %v11, %r5, %r3 \n"
00605 " stvewx %v1, 0, %r3 \n"
00606 " vperm %v0, %v13, %v0, %v9 \n"
00607 " li %r9, 4 \n"
00608 " stvewx %v1, %r9, %r3 \n"
00609 " vavgub %v12, %v11, %v0 \n"
00610 " add %r4, %r4, %r5 \n"
00611 " add %r3, %r3, %r5 \n"
00612 " bdnz ._L51 \n"
00613 " li %r9, 7 \n"
00614 " lvx %v0, %r9, %r4 \n"
00615 " lvx %v13, 0, %r4 \n"
00616 " lvx %v11, %r5, %r3 \n"
00617 " stvewx %v12, 0, %r3 \n"
00618 " vperm %v0, %v13, %v0, %v10 \n"
00619 " li %r9, 4 \n"
00620 " stvewx %v12, %r9, %r3 \n"
00621 " vavgub %v1, %v11, %v0 \n"
00622 " add %r3, %r3, %r5 \n"
00623 " stvewx %v1, 0, %r3 \n"
00624 " stvewx %v1, %r9, %r3 \n"
00625 );
00626 }
00627
00628 static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref,
00629 int stride, int height)
00630 {
00631 asm (" \n"
00632 " lvsl %v8, 0, %r4 \n"
00633 " vspltisb %v0, 1 \n"
00634 " li %r9, 16 \n"
00635 " lvx %v12, %r9, %r4 \n"
00636 " vaddubm %v7, %v8, %v0 \n"
00637 " lvx %v11, 0, %r4 \n"
00638 " srawi %r6, %r6, 1 \n"
00639 " vperm %v1, %v11, %v12, %v7 \n"
00640 " addi %r6, %r6, -1 \n"
00641 " vperm %v0, %v11, %v12, %v8 \n"
00642 " lvx %v9, 0, %r3 \n"
00643 " mtctr %r6 \n"
00644 " add %r9, %r5, %r5 \n"
00645 " vavgub %v0, %v0, %v1 \n"
00646 " add %r4, %r4, %r5 \n"
00647 " vavgub %v10, %v9, %v0 \n"
00648 "._L56: \n"
00649 " li %r11, 16 \n"
00650 " lvx %v11, 0, %r4 \n"
00651 " lvx %v12, %r11, %r4 \n"
00652 " lvx %v9, %r5, %r3 \n"
00653 " stvx %v10, 0, %r3 \n"
00654 " vperm %v0, %v11, %v12, %v7 \n"
00655 " add %r4, %r4, %r5 \n"
00656 " vperm %v1, %v11, %v12, %v8 \n"
00657 " lvx %v11, 0, %r4 \n"
00658 " lvx %v12, %r11, %r4 \n"
00659 " vavgub %v1, %v1, %v0 \n"
00660 " add %r4, %r4, %r5 \n"
00661 " vperm %v13, %v11, %v12, %v7 \n"
00662 " vavgub %v10, %v9, %v1 \n"
00663 " vperm %v0, %v11, %v12, %v8 \n"
00664 " lvx %v9, %r9, %r3 \n"
00665 " stvx %v10, %r5, %r3 \n"
00666 " vavgub %v0, %v0, %v13 \n"
00667 " add %r3, %r3, %r9 \n"
00668 " vavgub %v10, %v9, %v0 \n"
00669 " bdnz ._L56 \n"
00670 " lvx %v12, %r11, %r4 \n"
00671 " lvx %v11, 0, %r4 \n"
00672 " lvx %v9, %r5, %r3 \n"
00673 " vperm %v1, %v11, %v12, %v7 \n"
00674 " stvx %v10, 0, %r3 \n"
00675 " vperm %v0, %v11, %v12, %v8 \n"
00676 " vavgub %v0, %v0, %v1 \n"
00677 " vavgub %v10, %v9, %v0 \n"
00678 " stvx %v10, %r5, %r3 \n"
00679 );
00680 }
00681
00682 static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref,
00683 int stride, int height)
00684 {
00685 asm (" \n"
00686 " lvsl %v10, 0, %r4 \n"
00687 " vspltisb %v13, 1 \n"
00688 " li %r9, 8 \n"
00689 " vmrghb %v10, %v10, %v10 \n"
00690 " lvx %v11, 0, %r4 \n"
00691 " lvx %v12, %r9, %r4 \n"
00692 " vpkuhum %v7, %v10, %v10 \n"
00693 " srawi %r6, %r6, 1 \n"
00694 " lvsl %v10, %r5, %r4 \n"
00695 " vaddubm %v6, %v7, %v13 \n"
00696 " vperm %v0, %v11, %v12, %v7 \n"
00697 " addi %r6, %r6, -1 \n"
00698 " vmrghb %v10, %v10, %v10 \n"
00699 " lvx %v9, 0, %r3 \n"
00700 " mtctr %r6 \n"
00701 " vperm %v1, %v11, %v12, %v6 \n"
00702 " add %r4, %r4, %r5 \n"
00703 " vpkuhum %v8, %v10, %v10 \n"
00704 " vavgub %v0, %v0, %v1 \n"
00705 " vaddubm %v13, %v8, %v13 \n"
00706 " vavgub %v10, %v9, %v0 \n"
00707 "._L61: \n"
00708 " li %r9, 8 \n"
00709 " lvx %v12, %r9, %r4 \n"
00710 " lvx %v11, 0, %r4 \n"
00711 " lvx %v9, %r5, %r3 \n"
00712 " stvewx %v10, 0, %r3 \n"
00713 " vperm %v1, %v11, %v12, %v13 \n"
00714 " vperm %v0, %v11, %v12, %v8 \n"
00715 " li %r9, 4 \n"
00716 " stvewx %v10, %r9, %r3 \n"
00717 " add %r4, %r4, %r5 \n"
00718 " vavgub %v0, %v0, %v1 \n"
00719 " li %r9, 8 \n"
00720 " lvx %v12, %r9, %r4 \n"
00721 " vavgub %v10, %v9, %v0 \n"
00722 " lvx %v11, 0, %r4 \n"
00723 " add %r3, %r3, %r5 \n"
00724 " vperm %v1, %v11, %v12, %v6 \n"
00725 " lvx %v9, %r5, %r3 \n"
00726 " vperm %v0, %v11, %v12, %v7 \n"
00727 " stvewx %v10, 0, %r3 \n"
00728 " li %r9, 4 \n"
00729 " vavgub %v0, %v0, %v1 \n"
00730 " stvewx %v10, %r9, %r3 \n"
00731 " add %r4, %r4, %r5 \n"
00732 " add %r3, %r3, %r5 \n"
00733 " vavgub %v10, %v9, %v0 \n"
00734 " bdnz ._L61 \n"
00735 " li %r9, 8 \n"
00736 " lvx %v12, %r9, %r4 \n"
00737 " lvx %v11, 0, %r4 \n"
00738 " lvx %v9, %r5, %r3 \n"
00739 " vperm %v1, %v11, %v12, %v13 \n"
00740 " stvewx %v10, 0, %r3 \n"
00741 " vperm %v0, %v11, %v12, %v8 \n"
00742 " li %r9, 4 \n"
00743 " stvewx %v10, %r9, %r3 \n"
00744 " vavgub %v0, %v0, %v1 \n"
00745 " add %r3, %r3, %r5 \n"
00746 " vavgub %v10, %v9, %v0 \n"
00747 " stvewx %v10, 0, %r3 \n"
00748 " stvewx %v10, %r9, %r3 \n"
00749 );
00750 }
00751
00752 static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref,
00753 int stride, int height)
00754 {
00755 asm (" \n"
00756 " li %r9, 15 \n"
00757 " lvx %v1, %r9, %r4 \n"
00758 " lvsl %v9, 0, %r4 \n"
00759 " lvx %v13, 0, %r4 \n"
00760 " add %r4, %r4, %r5 \n"
00761 " vperm %v11, %v13, %v1, %v9 \n"
00762 " li %r11, 15 \n"
00763 " lvx %v13, 0, %r4 \n"
00764 " lvx %v1, %r11, %r4 \n"
00765 " srawi %r6, %r6, 1 \n"
00766 " vperm %v10, %v13, %v1, %v9 \n"
00767 " addi %r6, %r6, -1 \n"
00768 " lvx %v12, 0, %r3 \n"
00769 " mtctr %r6 \n"
00770 " vavgub %v0, %v11, %v10 \n"
00771 " add %r9, %r5, %r5 \n"
00772 " add %r4, %r4, %r5 \n"
00773 " vavgub %v0, %v12, %v0 \n"
00774 "._L66: \n"
00775 " li %r11, 15 \n"
00776 " lvx %v13, 0, %r4 \n"
00777 " lvx %v1, %r11, %r4 \n"
00778 " lvx %v12, %r5, %r3 \n"
00779 " vperm %v11, %v13, %v1, %v9 \n"
00780 " stvx %v0, 0, %r3 \n"
00781 " add %r4, %r4, %r5 \n"
00782 " vavgub %v0, %v11, %v10 \n"
00783 " lvx %v13, 0, %r4 \n"
00784 " lvx %v1, %r11, %r4 \n"
00785 " vavgub %v0, %v12, %v0 \n"
00786 " add %r4, %r4, %r5 \n"
00787 " lvx %v12, %r9, %r3 \n"
00788 " vperm %v10, %v13, %v1, %v9 \n"
00789 " stvx %v0, %r5, %r3 \n"
00790 " vavgub %v0, %v11, %v10 \n"
00791 " add %r3, %r3, %r9 \n"
00792 " vavgub %v0, %v12, %v0 \n"
00793 " bdnz ._L66 \n"
00794 " lvx %v1, %r11, %r4 \n"
00795 " lvx %v13, 0, %r4 \n"
00796 " lvx %v12, %r5, %r3 \n"
00797 " vperm %v11, %v13, %v1, %v9 \n"
00798 " stvx %v0, 0, %r3 \n"
00799 " vavgub %v0, %v11, %v10 \n"
00800 " vavgub %v0, %v12, %v0 \n"
00801 " stvx %v0, %r5, %r3 \n"
00802 );
00803 }
00804
00805 static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref,
00806 int stride, int height)
00807 {
00808 asm (" \n"
00809 " lvsl %v12, 0, %r4 \n"
00810 " lvsl %v9, %r5, %r4 \n"
00811 " vmrghb %v12, %v12, %v12 \n"
00812 " li %r9, 7 \n"
00813 " lvx %v11, 0, %r4 \n"
00814 " vmrghb %v9, %v9, %v9 \n"
00815 " lvx %v13, %r9, %r4 \n"
00816 " vpkuhum %v7, %v12, %v12 \n"
00817 " add %r4, %r4, %r5 \n"
00818 " vpkuhum %v8, %v9, %v9 \n"
00819 " vperm %v12, %v11, %v13, %v7 \n"
00820 " srawi %r6, %r6, 1 \n"
00821 " lvx %v11, 0, %r4 \n"
00822 " lvx %v13, %r9, %r4 \n"
00823 " addi %r6, %r6, -1 \n"
00824 " vperm %v9, %v11, %v13, %v8 \n"
00825 " lvx %v10, 0, %r3 \n"
00826 " mtctr %r6 \n"
00827 " add %r4, %r4, %r5 \n"
00828 " vavgub %v0, %v12, %v9 \n"
00829 " vavgub %v1, %v10, %v0 \n"
00830 "._L71: \n"
00831 " li %r9, 7 \n"
00832 " lvx %v13, %r9, %r4 \n"
00833 " lvx %v11, 0, %r4 \n"
00834 " lvx %v10, %r5, %r3 \n"
00835 " stvewx %v1, 0, %r3 \n"
00836 " vperm %v12, %v11, %v13, %v7 \n"
00837 " li %r9, 4 \n"
00838 " stvewx %v1, %r9, %r3 \n"
00839 " vavgub %v0, %v12, %v9 \n"
00840 " add %r4, %r4, %r5 \n"
00841 " li %r9, 7 \n"
00842 " vavgub %v1, %v10, %v0 \n"
00843 " lvx %v13, %r9, %r4 \n"
00844 " lvx %v11, 0, %r4 \n"
00845 " add %r3, %r3, %r5 \n"
00846 " vperm %v9, %v11, %v13, %v8 \n"
00847 " lvx %v10, %r5, %r3 \n"
00848 " stvewx %v1, 0, %r3 \n"
00849 " vavgub %v0, %v12, %v9 \n"
00850 " li %r9, 4 \n"
00851 " stvewx %v1, %r9, %r3 \n"
00852 " add %r4, %r4, %r5 \n"
00853 " vavgub %v1, %v10, %v0 \n"
00854 " add %r3, %r3, %r5 \n"
00855 " bdnz ._L71 \n"
00856 " li %r9, 7 \n"
00857 " lvx %v13, %r9, %r4 \n"
00858 " lvx %v11, 0, %r4 \n"
00859 " lvx %v10, %r5, %r3 \n"
00860 " vperm %v12, %v11, %v13, %v7 \n"
00861 " stvewx %v1, 0, %r3 \n"
00862 " li %r9, 4 \n"
00863 " vavgub %v0, %v12, %v9 \n"
00864 " stvewx %v1, %r9, %r3 \n"
00865 " add %r3, %r3, %r5 \n"
00866 " vavgub %v1, %v10, %v0 \n"
00867 " stvewx %v1, 0, %r3 \n"
00868 " stvewx %v1, %r9, %r3 \n"
00869 );
00870 }
00871
00872 static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
00873 int stride, int height)
00874 {
00875 asm (" \n"
00876 " lvsl %v4, 0, %r4 \n"
00877 " vspltisb %v2, 1 \n"
00878 " li %r9, 16 \n"
00879 " lvx %v1, %r9, %r4 \n"
00880 " vaddubm %v3, %v4, %v2 \n"
00881 " lvx %v13, 0, %r4 \n"
00882 " add %r4, %r4, %r5 \n"
00883 " vperm %v10, %v13, %v1, %v3 \n"
00884 " li %r11, 16 \n"
00885 " vperm %v11, %v13, %v1, %v4 \n"
00886 " srawi %r6, %r6, 1 \n"
00887 " lvx %v13, 0, %r4 \n"
00888 " lvx %v1, %r11, %r4 \n"
00889 " vavgub %v9, %v11, %v10 \n"
00890 " vxor %v8, %v11, %v10 \n"
00891 " addi %r6, %r6, -1 \n"
00892 " vperm %v10, %v13, %v1, %v3 \n"
00893 " lvx %v6, 0, %r3 \n"
00894 " mtctr %r6 \n"
00895 " vperm %v11, %v13, %v1, %v4 \n"
00896 " add %r9, %r5, %r5 \n"
00897 " add %r4, %r4, %r5 \n"
00898 " vxor %v5, %v11, %v10 \n"
00899 " vavgub %v7, %v11, %v10 \n"
00900 " vor %v1, %v8, %v5 \n"
00901 " vxor %v13, %v9, %v7 \n"
00902 " vand %v1, %v2, %v1 \n"
00903 " vavgub %v0, %v9, %v7 \n"
00904 " vand %v1, %v1, %v13 \n"
00905 " vsububm %v0, %v0, %v1 \n"
00906 " vavgub %v12, %v6, %v0 \n"
00907 "._L76: \n"
00908 " li %r11, 16 \n"
00909 " lvx %v13, 0, %r4 \n"
00910 " lvx %v1, %r11, %r4 \n"
00911 " lvx %v6, %r5, %r3 \n"
00912 " stvx %v12, 0, %r3 \n"
00913 " vperm %v10, %v13, %v1, %v3 \n"
00914 " vperm %v11, %v13, %v1, %v4 \n"
00915 " add %r4, %r4, %r5 \n"
00916 " lvx %v13, 0, %r4 \n"
00917 " lvx %v1, %r11, %r4 \n"
00918 " vavgub %v9, %v11, %v10 \n"
00919 " vxor %v8, %v11, %v10 \n"
00920 " add %r4, %r4, %r5 \n"
00921 " vperm %v10, %v13, %v1, %v3 \n"
00922 " vavgub %v12, %v9, %v7 \n"
00923 " vperm %v11, %v13, %v1, %v4 \n"
00924 " vor %v0, %v8, %v5 \n"
00925 " vxor %v13, %v9, %v7 \n"
00926 " vxor %v5, %v11, %v10 \n"
00927 " vand %v0, %v2, %v0 \n"
00928 " vavgub %v7, %v11, %v10 \n"
00929 " vor %v1, %v8, %v5 \n"
00930 " vand %v0, %v0, %v13 \n"
00931 " vand %v1, %v2, %v1 \n"
00932 " vxor %v13, %v9, %v7 \n"
00933 " vsububm %v12, %v12, %v0 \n"
00934 " vand %v1, %v1, %v13 \n"
00935 " vavgub %v0, %v9, %v7 \n"
00936 " vavgub %v12, %v6, %v12 \n"
00937 " lvx %v6, %r9, %r3 \n"
00938 " vsububm %v0, %v0, %v1 \n"
00939 " stvx %v12, %r5, %r3 \n"
00940 " vavgub %v12, %v6, %v0 \n"
00941 " add %r3, %r3, %r9 \n"
00942 " bdnz ._L76 \n"
00943 " lvx %v1, %r11, %r4 \n"
00944 " lvx %v13, 0, %r4 \n"
00945 " lvx %v6, %r5, %r3 \n"
00946 " vperm %v10, %v13, %v1, %v3 \n"
00947 " stvx %v12, 0, %r3 \n"
00948 " vperm %v11, %v13, %v1, %v4 \n"
00949 " vxor %v8, %v11, %v10 \n"
00950 " vavgub %v9, %v11, %v10 \n"
00951 " vor %v0, %v8, %v5 \n"
00952 " vxor %v13, %v9, %v7 \n"
00953 " vand %v0, %v2, %v0 \n"
00954 " vavgub %v1, %v9, %v7 \n"
00955 " vand %v0, %v0, %v13 \n"
00956 " vsububm %v1, %v1, %v0 \n"
00957 " vavgub %v12, %v6, %v1 \n"
00958 " stvx %v12, %r5, %r3 \n"
00959 );
00960 }
00961
00962 static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
00963 int stride, int height)
00964 {
00965 asm (" \n"
00966 " lvsl %v2, 0, %r4 \n"
00967 " vspltisb %v19, 1 \n"
00968 " lvsl %v3, %r5, %r4 \n"
00969 " vmrghb %v2, %v2, %v2 \n"
00970 " li %r9, 8 \n"
00971 " vmrghb %v3, %v3, %v3 \n"
00972 " lvx %v9, 0, %r4 \n"
00973 " vpkuhum %v2, %v2, %v2 \n"
00974 " lvx %v1, %r9, %r4 \n"
00975 " vpkuhum %v3, %v3, %v3 \n"
00976 " add %r4, %r4, %r5 \n"
00977 " vaddubm %v18, %v2, %v19 \n"
00978 " vperm %v11, %v9, %v1, %v2 \n"
00979 " srawi %r6, %r6, 1 \n"
00980 " vaddubm %v17, %v3, %v19 \n"
00981 " addi %r6, %r6, -1 \n"
00982 " vperm %v10, %v9, %v1, %v18 \n"
00983 " lvx %v4, 0, %r3 \n"
00984 " mtctr %r6 \n"
00985 " lvx %v1, %r9, %r4 \n"
00986 " lvx %v9, 0, %r4 \n"
00987 " vavgub %v8, %v11, %v10 \n"
00988 " vxor %v7, %v11, %v10 \n"
00989 " add %r4, %r4, %r5 \n"
00990 " vperm %v10, %v9, %v1, %v17 \n"
00991 " vperm %v11, %v9, %v1, %v3 \n"
00992 " vxor %v5, %v11, %v10 \n"
00993 " vavgub %v6, %v11, %v10 \n"
00994 " vor %v1, %v7, %v5 \n"
00995 " vxor %v13, %v8, %v6 \n"
00996 " vand %v1, %v19, %v1 \n"
00997 " vavgub %v0, %v8, %v6 \n"
00998 " vand %v1, %v1, %v13 \n"
00999 " vsububm %v0, %v0, %v1 \n"
01000 " vavgub %v13, %v4, %v0 \n"
01001 "._L81: \n"
01002 " li %r9, 8 \n"
01003 " lvx %v1, %r9, %r4 \n"
01004 " lvx %v9, 0, %r4 \n"
01005 " lvx %v4, %r5, %r3 \n"
01006 " stvewx %v13, 0, %r3 \n"
01007 " vperm %v10, %v9, %v1, %v18 \n"
01008 " vperm %v11, %v9, %v1, %v2 \n"
01009 " li %r9, 4 \n"
01010 " stvewx %v13, %r9, %r3 \n"
01011 " vxor %v7, %v11, %v10 \n"
01012 " add %r4, %r4, %r5 \n"
01013 " li %r9, 8 \n"
01014 " vavgub %v8, %v11, %v10 \n"
01015 " lvx %v1, %r9, %r4 \n"
01016 " vor %v0, %v7, %v5 \n"
01017 " lvx %v9, 0, %r4 \n"
01018 " vxor %v12, %v8, %v6 \n"
01019 " vand %v0, %v19, %v0 \n"
01020 " add %r3, %r3, %r5 \n"
01021 " vperm %v10, %v9, %v1, %v17 \n"
01022 " vavgub %v13, %v8, %v6 \n"
01023 " li %r9, 4 \n"
01024 " vperm %v11, %v9, %v1, %v3 \n"
01025 " vand %v0, %v0, %v12 \n"
01026 " add %r4, %r4, %r5 \n"
01027 " vxor %v5, %v11, %v10 \n"
01028 " vavgub %v6, %v11, %v10 \n"
01029 " vor %v1, %v7, %v5 \n"
01030 " vsububm %v13, %v13, %v0 \n"
01031 " vxor %v0, %v8, %v6 \n"
01032 " vand %v1, %v19, %v1 \n"
01033 " vavgub %v13, %v4, %v13 \n"
01034 " vand %v1, %v1, %v0 \n"
01035 " lvx %v4, %r5, %r3 \n"
01036 " vavgub %v0, %v8, %v6 \n"
01037 " stvewx %v13, 0, %r3 \n"
01038 " stvewx %v13, %r9, %r3 \n"
01039 " vsububm %v0, %v0, %v1 \n"
01040 " add %r3, %r3, %r5 \n"
01041 " vavgub %v13, %v4, %v0 \n"
01042 " bdnz ._L81 \n"
01043 " li %r9, 8 \n"
01044 " lvx %v1, %r9, %r4 \n"
01045 " lvx %v9, 0, %r4 \n"
01046 " lvx %v4, %r5, %r3 \n"
01047 " vperm %v10, %v9, %v1, %v18 \n"
01048 " stvewx %v13, 0, %r3 \n"
01049 " vperm %v11, %v9, %v1, %v2 \n"
01050 " li %r9, 4 \n"
01051 " stvewx %v13, %r9, %r3 \n"
01052 " vxor %v7, %v11, %v10 \n"
01053 " add %r3, %r3, %r5 \n"
01054 " vavgub %v8, %v11, %v10 \n"
01055 " vor %v0, %v7, %v5 \n"
01056 " vxor %v13, %v8, %v6 \n"
01057 " vand %v0, %v19, %v0 \n"
01058 " vavgub %v1, %v8, %v6 \n"
01059 " vand %v0, %v0, %v13 \n"
01060 " vsububm %v1, %v1, %v0 \n"
01061 " vavgub %v13, %v4, %v1 \n"
01062 " stvewx %v13, 0, %r3 \n"
01063 " stvewx %v13, %r9, %r3 \n"
01064 );
01065 }
01066
01067 MPEG2_MC_EXTERN (altivec)
01068
01069 #endif
01070
01071 #else
01072
01073 #define vector_s16_t vector signed short
01074 #define vector_u16_t vector unsigned short
01075 #define vector_s8_t vector signed char
01076 #define vector_u8_t vector unsigned char
01077 #define vector_s32_t vector signed int
01078 #define vector_u32_t vector unsigned int
01079
01080 void MC_put_o_16_altivec (unsigned char * dest, const unsigned char * ref,
01081 const int stride, int height)
01082 {
01083 vector_u8_t perm, ref0, ref1, tmp;
01084
01085 perm = vec_lvsl (0, ref);
01086
01087 height = (height >> 1) - 1;
01088
01089 ref0 = vec_ld (0, ref);
01090 ref1 = vec_ld (15, ref);
01091 ref += stride;
01092 tmp = vec_perm (ref0, ref1, perm);
01093
01094 do {
01095 ref0 = vec_ld (0, ref);
01096 ref1 = vec_ld (15, ref);
01097 ref += stride;
01098 vec_st (tmp, 0, dest);
01099 tmp = vec_perm (ref0, ref1, perm);
01100
01101 ref0 = vec_ld (0, ref);
01102 ref1 = vec_ld (15, ref);
01103 ref += stride;
01104 vec_st (tmp, stride, dest);
01105 dest += 2*stride;
01106 tmp = vec_perm (ref0, ref1, perm);
01107 } while (--height);
01108
01109 ref0 = vec_ld (0, ref);
01110 ref1 = vec_ld (15, ref);
01111 vec_st (tmp, 0, dest);
01112 tmp = vec_perm (ref0, ref1, perm);
01113 vec_st (tmp, stride, dest);
01114 }
01115
01116 void MC_put_o_8_altivec (unsigned char * dest, const unsigned char * ref,
01117 const int stride, int height)
01118 {
01119 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1;
01120
01121 tmp0 = vec_lvsl (0, ref);
01122 tmp0 = vec_mergeh (tmp0, tmp0);
01123 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
01124 tmp1 = vec_lvsl (stride, ref);
01125 tmp1 = vec_mergeh (tmp1, tmp1);
01126 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
01127
01128 height = (height >> 1) - 1;
01129
01130 ref0 = vec_ld (0, ref);
01131 ref1 = vec_ld (7, ref);
01132 ref += stride;
01133 tmp0 = vec_perm (ref0, ref1, perm0);
01134
01135 do {
01136 ref0 = vec_ld (0, ref);
01137 ref1 = vec_ld (7, ref);
01138 ref += stride;
01139 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01140 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01141 dest += stride;
01142 tmp1 = vec_perm (ref0, ref1, perm1);
01143
01144 ref0 = vec_ld (0, ref);
01145 ref1 = vec_ld (7, ref);
01146 ref += stride;
01147 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01148 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01149 dest += stride;
01150 tmp0 = vec_perm (ref0, ref1, perm0);
01151 } while (--height);
01152
01153 ref0 = vec_ld (0, ref);
01154 ref1 = vec_ld (7, ref);
01155 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01156 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01157 dest += stride;
01158 tmp1 = vec_perm (ref0, ref1, perm1);
01159 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01160 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01161 }
01162
01163 void MC_put_x_16_altivec (unsigned char * dest, const unsigned char * ref,
01164 const int stride, int height)
01165 {
01166 vector_u8_t permA, permB, ref0, ref1, tmp;
01167
01168 permA = vec_lvsl (0, ref);
01169 permB = vec_add (permA, vec_splat_u8 (1));
01170
01171 height = (height >> 1) - 1;
01172
01173 ref0 = vec_ld (0, ref);
01174 ref1 = vec_ld (16, ref);
01175 ref += stride;
01176 tmp = vec_avg (vec_perm (ref0, ref1, permA),
01177 vec_perm (ref0, ref1, permB));
01178
01179 do {
01180 ref0 = vec_ld (0, ref);
01181 ref1 = vec_ld (16, ref);
01182 ref += stride;
01183 vec_st (tmp, 0, dest);
01184 tmp = vec_avg (vec_perm (ref0, ref1, permA),
01185 vec_perm (ref0, ref1, permB));
01186
01187 ref0 = vec_ld (0, ref);
01188 ref1 = vec_ld (16, ref);
01189 ref += stride;
01190 vec_st (tmp, stride, dest);
01191 dest += 2*stride;
01192 tmp = vec_avg (vec_perm (ref0, ref1, permA),
01193 vec_perm (ref0, ref1, permB));
01194 } while (--height);
01195
01196 ref0 = vec_ld (0, ref);
01197 ref1 = vec_ld (16, ref);
01198 vec_st (tmp, 0, dest);
01199 tmp = vec_avg (vec_perm (ref0, ref1, permA),
01200 vec_perm (ref0, ref1, permB));
01201 vec_st (tmp, stride, dest);
01202 }
01203
01204 void MC_put_x_8_altivec (unsigned char * dest, const unsigned char * ref,
01205 const int stride, int height)
01206 {
01207 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
01208
01209 ones = vec_splat_u8 (1);
01210 tmp0 = vec_lvsl (0, ref);
01211 tmp0 = vec_mergeh (tmp0, tmp0);
01212 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
01213 perm0B = vec_add (perm0A, ones);
01214 tmp1 = vec_lvsl (stride, ref);
01215 tmp1 = vec_mergeh (tmp1, tmp1);
01216 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
01217 perm1B = vec_add (perm1A, ones);
01218
01219 height = (height >> 1) - 1;
01220
01221 ref0 = vec_ld (0, ref);
01222 ref1 = vec_ld (8, ref);
01223 ref += stride;
01224 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
01225 vec_perm (ref0, ref1, perm0B));
01226
01227 do {
01228 ref0 = vec_ld (0, ref);
01229 ref1 = vec_ld (8, ref);
01230 ref += stride;
01231 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01232 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01233 dest += stride;
01234 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
01235 vec_perm (ref0, ref1, perm1B));
01236
01237 ref0 = vec_ld (0, ref);
01238 ref1 = vec_ld (8, ref);
01239 ref += stride;
01240 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01241 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01242 dest += stride;
01243 tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
01244 vec_perm (ref0, ref1, perm0B));
01245 } while (--height);
01246
01247 ref0 = vec_ld (0, ref);
01248 ref1 = vec_ld (8, ref);
01249 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01250 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01251 dest += stride;
01252 tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
01253 vec_perm (ref0, ref1, perm1B));
01254 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01255 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01256 }
01257
01258 void MC_put_y_16_altivec (unsigned char * dest, const unsigned char * ref,
01259 const int stride, int height)
01260 {
01261 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp;
01262
01263 perm = vec_lvsl (0, ref);
01264
01265 height = (height >> 1) - 1;
01266
01267 ref0 = vec_ld (0, ref);
01268 ref1 = vec_ld (15, ref);
01269 ref += stride;
01270 tmp0 = vec_perm (ref0, ref1, perm);
01271 ref0 = vec_ld (0, ref);
01272 ref1 = vec_ld (15, ref);
01273 ref += stride;
01274 tmp1 = vec_perm (ref0, ref1, perm);
01275 tmp = vec_avg (tmp0, tmp1);
01276
01277 do {
01278 ref0 = vec_ld (0, ref);
01279 ref1 = vec_ld (15, ref);
01280 ref += stride;
01281 vec_st (tmp, 0, dest);
01282 tmp0 = vec_perm (ref0, ref1, perm);
01283 tmp = vec_avg (tmp0, tmp1);
01284
01285 ref0 = vec_ld (0, ref);
01286 ref1 = vec_ld (15, ref);
01287 ref += stride;
01288 vec_st (tmp, stride, dest);
01289 dest += 2*stride;
01290 tmp1 = vec_perm (ref0, ref1, perm);
01291 tmp = vec_avg (tmp0, tmp1);
01292 } while (--height);
01293
01294 ref0 = vec_ld (0, ref);
01295 ref1 = vec_ld (15, ref);
01296 vec_st (tmp, 0, dest);
01297 tmp0 = vec_perm (ref0, ref1, perm);
01298 tmp = vec_avg (tmp0, tmp1);
01299 vec_st (tmp, stride, dest);
01300 }
01301
01302 void MC_put_y_8_altivec (unsigned char * dest, const unsigned char * ref,
01303 const int stride, int height)
01304 {
01305 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1;
01306
01307 tmp0 = vec_lvsl (0, ref);
01308 tmp0 = vec_mergeh (tmp0, tmp0);
01309 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
01310 tmp1 = vec_lvsl (stride, ref);
01311 tmp1 = vec_mergeh (tmp1, tmp1);
01312 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
01313
01314 height = (height >> 1) - 1;
01315
01316 ref0 = vec_ld (0, ref);
01317 ref1 = vec_ld (7, ref);
01318 ref += stride;
01319 tmp0 = vec_perm (ref0, ref1, perm0);
01320 ref0 = vec_ld (0, ref);
01321 ref1 = vec_ld (7, ref);
01322 ref += stride;
01323 tmp1 = vec_perm (ref0, ref1, perm1);
01324 tmp = vec_avg (tmp0, tmp1);
01325
01326 do {
01327 ref0 = vec_ld (0, ref);
01328 ref1 = vec_ld (7, ref);
01329 ref += stride;
01330 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01331 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01332 dest += stride;
01333 tmp0 = vec_perm (ref0, ref1, perm0);
01334 tmp = vec_avg (tmp0, tmp1);
01335
01336 ref0 = vec_ld (0, ref);
01337 ref1 = vec_ld (7, ref);
01338 ref += stride;
01339 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01340 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01341 dest += stride;
01342 tmp1 = vec_perm (ref0, ref1, perm1);
01343 tmp = vec_avg (tmp0, tmp1);
01344 } while (--height);
01345
01346 ref0 = vec_ld (0, ref);
01347 ref1 = vec_ld (7, ref);
01348 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01349 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01350 dest += stride;
01351 tmp0 = vec_perm (ref0, ref1, perm0);
01352 tmp = vec_avg (tmp0, tmp1);
01353 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01354 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01355 }
01356
01357 void MC_put_xy_16_altivec (unsigned char * dest, const unsigned char * ref,
01358 const int stride, int height)
01359 {
01360 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
01361 vector_u8_t ones;
01362
01363 ones = vec_splat_u8 (1);
01364 permA = vec_lvsl (0, ref);
01365 permB = vec_add (permA, ones);
01366
01367 height = (height >> 1) - 1;
01368
01369 ref0 = vec_ld (0, ref);
01370 ref1 = vec_ld (16, ref);
01371 ref += stride;
01372 A = vec_perm (ref0, ref1, permA);
01373 B = vec_perm (ref0, ref1, permB);
01374 avg0 = vec_avg (A, B);
01375 xor0 = vec_xor (A, B);
01376
01377 ref0 = vec_ld (0, ref);
01378 ref1 = vec_ld (16, ref);
01379 ref += stride;
01380 A = vec_perm (ref0, ref1, permA);
01381 B = vec_perm (ref0, ref1, permB);
01382 avg1 = vec_avg (A, B);
01383 xor1 = vec_xor (A, B);
01384 tmp = vec_sub (vec_avg (avg0, avg1),
01385 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01386 vec_xor (avg0, avg1)));
01387
01388 do {
01389 ref0 = vec_ld (0, ref);
01390 ref1 = vec_ld (16, ref);
01391 ref += stride;
01392 vec_st (tmp, 0, dest);
01393 A = vec_perm (ref0, ref1, permA);
01394 B = vec_perm (ref0, ref1, permB);
01395 avg0 = vec_avg (A, B);
01396 xor0 = vec_xor (A, B);
01397 tmp = vec_sub (vec_avg (avg0, avg1),
01398 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01399 vec_xor (avg0, avg1)));
01400
01401 ref0 = vec_ld (0, ref);
01402 ref1 = vec_ld (16, ref);
01403 ref += stride;
01404 vec_st (tmp, stride, dest);
01405 dest += 2*stride;
01406 A = vec_perm (ref0, ref1, permA);
01407 B = vec_perm (ref0, ref1, permB);
01408 avg1 = vec_avg (A, B);
01409 xor1 = vec_xor (A, B);
01410 tmp = vec_sub (vec_avg (avg0, avg1),
01411 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01412 vec_xor (avg0, avg1)));
01413 } while (--height);
01414
01415 ref0 = vec_ld (0, ref);
01416 ref1 = vec_ld (16, ref);
01417 vec_st (tmp, 0, dest);
01418 A = vec_perm (ref0, ref1, permA);
01419 B = vec_perm (ref0, ref1, permB);
01420 avg0 = vec_avg (A, B);
01421 xor0 = vec_xor (A, B);
01422 tmp = vec_sub (vec_avg (avg0, avg1),
01423 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01424 vec_xor (avg0, avg1)));
01425 vec_st (tmp, stride, dest);
01426 }
01427
01428 void MC_put_xy_8_altivec (unsigned char * dest, const unsigned char * ref,
01429 const int stride, int height)
01430 {
01431 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
01432 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones;
01433
01434 ones = vec_splat_u8 (1);
01435 perm0A = vec_lvsl (0, ref);
01436 perm0A = vec_mergeh (perm0A, perm0A);
01437 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
01438 perm0B = vec_add (perm0A, ones);
01439 perm1A = vec_lvsl (stride, ref);
01440 perm1A = vec_mergeh (perm1A, perm1A);
01441 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
01442 perm1B = vec_add (perm1A, ones);
01443
01444 height = (height >> 1) - 1;
01445
01446 ref0 = vec_ld (0, ref);
01447 ref1 = vec_ld (8, ref);
01448 ref += stride;
01449 A = vec_perm (ref0, ref1, perm0A);
01450 B = vec_perm (ref0, ref1, perm0B);
01451 avg0 = vec_avg (A, B);
01452 xor0 = vec_xor (A, B);
01453
01454 ref0 = vec_ld (0, ref);
01455 ref1 = vec_ld (8, ref);
01456 ref += stride;
01457 A = vec_perm (ref0, ref1, perm1A);
01458 B = vec_perm (ref0, ref1, perm1B);
01459 avg1 = vec_avg (A, B);
01460 xor1 = vec_xor (A, B);
01461 tmp = vec_sub (vec_avg (avg0, avg1),
01462 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01463 vec_xor (avg0, avg1)));
01464
01465 do {
01466 ref0 = vec_ld (0, ref);
01467 ref1 = vec_ld (8, ref);
01468 ref += stride;
01469 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01470 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01471 dest += stride;
01472 A = vec_perm (ref0, ref1, perm0A);
01473 B = vec_perm (ref0, ref1, perm0B);
01474 avg0 = vec_avg (A, B);
01475 xor0 = vec_xor (A, B);
01476 tmp = vec_sub (vec_avg (avg0, avg1),
01477 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01478 vec_xor (avg0, avg1)));
01479
01480 ref0 = vec_ld (0, ref);
01481 ref1 = vec_ld (8, ref);
01482 ref += stride;
01483 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01484 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01485 dest += stride;
01486 A = vec_perm (ref0, ref1, perm1A);
01487 B = vec_perm (ref0, ref1, perm1B);
01488 avg1 = vec_avg (A, B);
01489 xor1 = vec_xor (A, B);
01490 tmp = vec_sub (vec_avg (avg0, avg1),
01491 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01492 vec_xor (avg0, avg1)));
01493 } while (--height);
01494
01495 ref0 = vec_ld (0, ref);
01496 ref1 = vec_ld (8, ref);
01497 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01498 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01499 dest += stride;
01500 A = vec_perm (ref0, ref1, perm0A);
01501 B = vec_perm (ref0, ref1, perm0B);
01502 avg0 = vec_avg (A, B);
01503 xor0 = vec_xor (A, B);
01504 tmp = vec_sub (vec_avg (avg0, avg1),
01505 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01506 vec_xor (avg0, avg1)));
01507 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01508 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01509 }
01510
01511 #if 0
01512 void MC_put_xy_8_altivec (unsigned char * dest, const unsigned char * ref,
01513 const int stride, int height)
01514 {
01515 vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones;
01516 vector_u16_t splat2, temp;
01517
01518 ones = vec_splat_u8 (1);
01519 permA = vec_lvsl (0, ref);
01520 permB = vec_add (permA, ones);
01521
01522 zero = vec_splat_u8 (0);
01523 splat2 = vec_splat_u16 (2);
01524
01525 do {
01526 ref0 = vec_ld (0, ref);
01527 ref1 = vec_ld (8, ref);
01528 ref += stride;
01529 A = vec_perm (ref0, ref1, permA);
01530 B = vec_perm (ref0, ref1, permB);
01531 ref0 = vec_ld (0, ref);
01532 ref1 = vec_ld (8, ref);
01533 C = vec_perm (ref0, ref1, permA);
01534 D = vec_perm (ref0, ref1, permB);
01535
01536 temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A),
01537 (vector_u16_t)vec_mergeh (zero, B)),
01538 vec_add ((vector_u16_t)vec_mergeh (zero, C),
01539 (vector_u16_t)vec_mergeh (zero, D)));
01540 temp = vec_sr (vec_add (temp, splat2), splat2);
01541 tmp = vec_pack (temp, temp);
01542
01543 vec_st (tmp, 0, dest);
01544 dest += stride;
01545 tmp = vec_avg (vec_perm (ref0, ref1, permA),
01546 vec_perm (ref0, ref1, permB));
01547 } while (--height);
01548 }
01549 #endif
01550
01551 void MC_avg_o_16_altivec (unsigned char * dest, const unsigned char * ref,
01552 const int stride, int height)
01553 {
01554 vector_u8_t perm, ref0, ref1, tmp, prev;
01555
01556 perm = vec_lvsl (0, ref);
01557
01558 height = (height >> 1) - 1;
01559
01560 ref0 = vec_ld (0, ref);
01561 ref1 = vec_ld (15, ref);
01562 ref += stride;
01563 prev = vec_ld (0, dest);
01564 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
01565
01566 do {
01567 ref0 = vec_ld (0, ref);
01568 ref1 = vec_ld (15, ref);
01569 ref += stride;
01570 prev = vec_ld (stride, dest);
01571 vec_st (tmp, 0, dest);
01572 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
01573
01574 ref0 = vec_ld (0, ref);
01575 ref1 = vec_ld (15, ref);
01576 ref += stride;
01577 prev = vec_ld (2*stride, dest);
01578 vec_st (tmp, stride, dest);
01579 dest += 2*stride;
01580 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
01581 } while (--height);
01582
01583 ref0 = vec_ld (0, ref);
01584 ref1 = vec_ld (15, ref);
01585 prev = vec_ld (stride, dest);
01586 vec_st (tmp, 0, dest);
01587 tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
01588 vec_st (tmp, stride, dest);
01589 }
01590
01591 void MC_avg_o_8_altivec (unsigned char * dest, const unsigned char * ref,
01592 const int stride, int height)
01593 {
01594 vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev;
01595
01596 tmp0 = vec_lvsl (0, ref);
01597 tmp0 = vec_mergeh (tmp0, tmp0);
01598 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
01599 tmp1 = vec_lvsl (stride, ref);
01600 tmp1 = vec_mergeh (tmp1, tmp1);
01601 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
01602
01603 height = (height >> 1) - 1;
01604
01605 ref0 = vec_ld (0, ref);
01606 ref1 = vec_ld (7, ref);
01607 ref += stride;
01608 prev = vec_ld (0, dest);
01609 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
01610
01611 do {
01612 ref0 = vec_ld (0, ref);
01613 ref1 = vec_ld (7, ref);
01614 ref += stride;
01615 prev = vec_ld (stride, dest);
01616 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01617 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01618 dest += stride;
01619 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
01620
01621 ref0 = vec_ld (0, ref);
01622 ref1 = vec_ld (7, ref);
01623 ref += stride;
01624 prev = vec_ld (stride, dest);
01625 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01626 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01627 dest += stride;
01628 tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
01629 } while (--height);
01630
01631 ref0 = vec_ld (0, ref);
01632 ref1 = vec_ld (7, ref);
01633 prev = vec_ld (stride, dest);
01634 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01635 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01636 dest += stride;
01637 tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
01638 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01639 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01640 }
01641
01642 void MC_avg_x_16_altivec (unsigned char * dest, const unsigned char * ref,
01643 const int stride, int height)
01644 {
01645 vector_u8_t permA, permB, ref0, ref1, tmp, prev;
01646
01647 permA = vec_lvsl (0, ref);
01648 permB = vec_add (permA, vec_splat_u8 (1));
01649
01650 height = (height >> 1) - 1;
01651
01652 ref0 = vec_ld (0, ref);
01653 ref1 = vec_ld (16, ref);
01654 prev = vec_ld (0, dest);
01655 ref += stride;
01656 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
01657 vec_perm (ref0, ref1, permB)));
01658
01659 do {
01660 ref0 = vec_ld (0, ref);
01661 ref1 = vec_ld (16, ref);
01662 ref += stride;
01663 prev = vec_ld (stride, dest);
01664 vec_st (tmp, 0, dest);
01665 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
01666 vec_perm (ref0, ref1, permB)));
01667
01668 ref0 = vec_ld (0, ref);
01669 ref1 = vec_ld (16, ref);
01670 ref += stride;
01671 prev = vec_ld (2*stride, dest);
01672 vec_st (tmp, stride, dest);
01673 dest += 2*stride;
01674 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
01675 vec_perm (ref0, ref1, permB)));
01676 } while (--height);
01677
01678 ref0 = vec_ld (0, ref);
01679 ref1 = vec_ld (16, ref);
01680 prev = vec_ld (stride, dest);
01681 vec_st (tmp, 0, dest);
01682 tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
01683 vec_perm (ref0, ref1, permB)));
01684 vec_st (tmp, stride, dest);
01685 }
01686
01687 void MC_avg_x_8_altivec (unsigned char * dest, const unsigned char * ref,
01688 const int stride, int height)
01689 {
01690 vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
01691 vector_u8_t prev;
01692
01693 ones = vec_splat_u8 (1);
01694 tmp0 = vec_lvsl (0, ref);
01695 tmp0 = vec_mergeh (tmp0, tmp0);
01696 perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
01697 perm0B = vec_add (perm0A, ones);
01698 tmp1 = vec_lvsl (stride, ref);
01699 tmp1 = vec_mergeh (tmp1, tmp1);
01700 perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
01701 perm1B = vec_add (perm1A, ones);
01702
01703 height = (height >> 1) - 1;
01704
01705 ref0 = vec_ld (0, ref);
01706 ref1 = vec_ld (8, ref);
01707 prev = vec_ld (0, dest);
01708 ref += stride;
01709 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
01710 vec_perm (ref0, ref1, perm0B)));
01711
01712 do {
01713 ref0 = vec_ld (0, ref);
01714 ref1 = vec_ld (8, ref);
01715 ref += stride;
01716 prev = vec_ld (stride, dest);
01717 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01718 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01719 dest += stride;
01720 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
01721 vec_perm (ref0, ref1, perm1B)));
01722
01723 ref0 = vec_ld (0, ref);
01724 ref1 = vec_ld (8, ref);
01725 ref += stride;
01726 prev = vec_ld (stride, dest);
01727 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01728 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01729 dest += stride;
01730 tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
01731 vec_perm (ref0, ref1, perm0B)));
01732 } while (--height);
01733
01734 ref0 = vec_ld (0, ref);
01735 ref1 = vec_ld (8, ref);
01736 prev = vec_ld (stride, dest);
01737 vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
01738 vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
01739 dest += stride;
01740 tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
01741 vec_perm (ref0, ref1, perm1B)));
01742 vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
01743 vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
01744 }
01745
01746 void MC_avg_y_16_altivec (unsigned char * dest, const unsigned char * ref,
01747 const int stride, int height)
01748 {
01749 vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev;
01750
01751 perm = vec_lvsl (0, ref);
01752
01753 height = (height >> 1) - 1;
01754
01755 ref0 = vec_ld (0, ref);
01756 ref1 = vec_ld (15, ref);
01757 ref += stride;
01758 tmp0 = vec_perm (ref0, ref1, perm);
01759 ref0 = vec_ld (0, ref);
01760 ref1 = vec_ld (15, ref);
01761 ref += stride;
01762 prev = vec_ld (0, dest);
01763 tmp1 = vec_perm (ref0, ref1, perm);
01764 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01765
01766 do {
01767 ref0 = vec_ld (0, ref);
01768 ref1 = vec_ld (15, ref);
01769 ref += stride;
01770 prev = vec_ld (stride, dest);
01771 vec_st (tmp, 0, dest);
01772 tmp0 = vec_perm (ref0, ref1, perm);
01773 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01774
01775 ref0 = vec_ld (0, ref);
01776 ref1 = vec_ld (15, ref);
01777 ref += stride;
01778 prev = vec_ld (2*stride, dest);
01779 vec_st (tmp, stride, dest);
01780 dest += 2*stride;
01781 tmp1 = vec_perm (ref0, ref1, perm);
01782 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01783 } while (--height);
01784
01785 ref0 = vec_ld (0, ref);
01786 ref1 = vec_ld (15, ref);
01787 prev = vec_ld (stride, dest);
01788 vec_st (tmp, 0, dest);
01789 tmp0 = vec_perm (ref0, ref1, perm);
01790 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01791 vec_st (tmp, stride, dest);
01792 }
01793
01794 void MC_avg_y_8_altivec (unsigned char * dest, const unsigned char * ref,
01795 const int stride, int height)
01796 {
01797 vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev;
01798
01799 tmp0 = vec_lvsl (0, ref);
01800 tmp0 = vec_mergeh (tmp0, tmp0);
01801 perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
01802 tmp1 = vec_lvsl (stride, ref);
01803 tmp1 = vec_mergeh (tmp1, tmp1);
01804 perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
01805
01806 height = (height >> 1) - 1;
01807
01808 ref0 = vec_ld (0, ref);
01809 ref1 = vec_ld (7, ref);
01810 ref += stride;
01811 tmp0 = vec_perm (ref0, ref1, perm0);
01812 ref0 = vec_ld (0, ref);
01813 ref1 = vec_ld (7, ref);
01814 ref += stride;
01815 prev = vec_ld (0, dest);
01816 tmp1 = vec_perm (ref0, ref1, perm1);
01817 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01818
01819 do {
01820 ref0 = vec_ld (0, ref);
01821 ref1 = vec_ld (7, ref);
01822 ref += stride;
01823 prev = vec_ld (stride, dest);
01824 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01825 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01826 dest += stride;
01827 tmp0 = vec_perm (ref0, ref1, perm0);
01828 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01829
01830 ref0 = vec_ld (0, ref);
01831 ref1 = vec_ld (7, ref);
01832 ref += stride;
01833 prev = vec_ld (stride, dest);
01834 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01835 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01836 dest += stride;
01837 tmp1 = vec_perm (ref0, ref1, perm1);
01838 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01839 } while (--height);
01840
01841 ref0 = vec_ld (0, ref);
01842 ref1 = vec_ld (7, ref);
01843 prev = vec_ld (stride, dest);
01844 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01845 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01846 dest += stride;
01847 tmp0 = vec_perm (ref0, ref1, perm0);
01848 tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
01849 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01850 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01851 }
01852
01853 void MC_avg_xy_16_altivec (unsigned char * dest, const unsigned char * ref,
01854 const int stride, int height)
01855 {
01856 vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
01857 vector_u8_t ones, prev;
01858
01859 ones = vec_splat_u8 (1);
01860 permA = vec_lvsl (0, ref);
01861 permB = vec_add (permA, ones);
01862
01863 height = (height >> 1) - 1;
01864
01865 ref0 = vec_ld (0, ref);
01866 ref1 = vec_ld (16, ref);
01867 ref += stride;
01868 A = vec_perm (ref0, ref1, permA);
01869 B = vec_perm (ref0, ref1, permB);
01870 avg0 = vec_avg (A, B);
01871 xor0 = vec_xor (A, B);
01872
01873 ref0 = vec_ld (0, ref);
01874 ref1 = vec_ld (16, ref);
01875 ref += stride;
01876 prev = vec_ld (0, dest);
01877 A = vec_perm (ref0, ref1, permA);
01878 B = vec_perm (ref0, ref1, permB);
01879 avg1 = vec_avg (A, B);
01880 xor1 = vec_xor (A, B);
01881 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
01882 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01883 vec_xor (avg0, avg1))));
01884
01885 do {
01886 ref0 = vec_ld (0, ref);
01887 ref1 = vec_ld (16, ref);
01888 ref += stride;
01889 prev = vec_ld (stride, dest);
01890 vec_st (tmp, 0, dest);
01891 A = vec_perm (ref0, ref1, permA);
01892 B = vec_perm (ref0, ref1, permB);
01893 avg0 = vec_avg (A, B);
01894 xor0 = vec_xor (A, B);
01895 tmp = vec_avg (prev,
01896 vec_sub (vec_avg (avg0, avg1),
01897 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01898 vec_xor (avg0, avg1))));
01899
01900 ref0 = vec_ld (0, ref);
01901 ref1 = vec_ld (16, ref);
01902 ref += stride;
01903 prev = vec_ld (2*stride, dest);
01904 vec_st (tmp, stride, dest);
01905 dest += 2*stride;
01906 A = vec_perm (ref0, ref1, permA);
01907 B = vec_perm (ref0, ref1, permB);
01908 avg1 = vec_avg (A, B);
01909 xor1 = vec_xor (A, B);
01910 tmp = vec_avg (prev,
01911 vec_sub (vec_avg (avg0, avg1),
01912 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01913 vec_xor (avg0, avg1))));
01914 } while (--height);
01915
01916 ref0 = vec_ld (0, ref);
01917 ref1 = vec_ld (16, ref);
01918 prev = vec_ld (stride, dest);
01919 vec_st (tmp, 0, dest);
01920 A = vec_perm (ref0, ref1, permA);
01921 B = vec_perm (ref0, ref1, permB);
01922 avg0 = vec_avg (A, B);
01923 xor0 = vec_xor (A, B);
01924 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
01925 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01926 vec_xor (avg0, avg1))));
01927 vec_st (tmp, stride, dest);
01928 }
01929
01930 void MC_avg_xy_8_altivec (unsigned char * dest, const unsigned char * ref,
01931 const int stride, int height)
01932 {
01933 vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
01934 vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev;
01935
01936 ones = vec_splat_u8 (1);
01937 perm0A = vec_lvsl (0, ref);
01938 perm0A = vec_mergeh (perm0A, perm0A);
01939 perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
01940 perm0B = vec_add (perm0A, ones);
01941 perm1A = vec_lvsl (stride, ref);
01942 perm1A = vec_mergeh (perm1A, perm1A);
01943 perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
01944 perm1B = vec_add (perm1A, ones);
01945
01946 height = (height >> 1) - 1;
01947
01948 ref0 = vec_ld (0, ref);
01949 ref1 = vec_ld (8, ref);
01950 ref += stride;
01951 A = vec_perm (ref0, ref1, perm0A);
01952 B = vec_perm (ref0, ref1, perm0B);
01953 avg0 = vec_avg (A, B);
01954 xor0 = vec_xor (A, B);
01955
01956 ref0 = vec_ld (0, ref);
01957 ref1 = vec_ld (8, ref);
01958 ref += stride;
01959 prev = vec_ld (0, dest);
01960 A = vec_perm (ref0, ref1, perm1A);
01961 B = vec_perm (ref0, ref1, perm1B);
01962 avg1 = vec_avg (A, B);
01963 xor1 = vec_xor (A, B);
01964 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
01965 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01966 vec_xor (avg0, avg1))));
01967
01968 do {
01969 ref0 = vec_ld (0, ref);
01970 ref1 = vec_ld (8, ref);
01971 ref += stride;
01972 prev = vec_ld (stride, dest);
01973 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01974 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01975 dest += stride;
01976 A = vec_perm (ref0, ref1, perm0A);
01977 B = vec_perm (ref0, ref1, perm0B);
01978 avg0 = vec_avg (A, B);
01979 xor0 = vec_xor (A, B);
01980 tmp = vec_avg (prev,
01981 vec_sub (vec_avg (avg0, avg1),
01982 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01983 vec_xor (avg0, avg1))));
01984
01985 ref0 = vec_ld (0, ref);
01986 ref1 = vec_ld (8, ref);
01987 ref += stride;
01988 prev = vec_ld (stride, dest);
01989 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
01990 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
01991 dest += stride;
01992 A = vec_perm (ref0, ref1, perm1A);
01993 B = vec_perm (ref0, ref1, perm1B);
01994 avg1 = vec_avg (A, B);
01995 xor1 = vec_xor (A, B);
01996 tmp = vec_avg (prev,
01997 vec_sub (vec_avg (avg0, avg1),
01998 vec_and (vec_and (ones, vec_or (xor0, xor1)),
01999 vec_xor (avg0, avg1))));
02000 } while (--height);
02001
02002 ref0 = vec_ld (0, ref);
02003 ref1 = vec_ld (8, ref);
02004 prev = vec_ld (stride, dest);
02005 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
02006 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
02007 dest += stride;
02008 A = vec_perm (ref0, ref1, perm0A);
02009 B = vec_perm (ref0, ref1, perm0B);
02010 avg0 = vec_avg (A, B);
02011 xor0 = vec_xor (A, B);
02012 tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
02013 vec_and (vec_and (ones, vec_or (xor0, xor1)),
02014 vec_xor (avg0, avg1))));
02015 vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
02016 vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
02017 }
02018
02019 #endif