00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "config.h"
00025
00026 #ifdef ARCH_X86
00027
00028 #include <inttypes.h>
00029
00030 #include "mpeg2.h"
00031 #include "mpeg2_internal.h"
00032 #include "attributes.h"
00033 #include "mmx.h"
00034
00035 #define CPU_MMXEXT 0
00036 #define CPU_3DNOW 1
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055 static mmx_t mask1 = {0xfefefefefefefefeLL};
00056 static mmx_t round4 = {0x0002000200020002LL};
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066 static inline void mmx_zero_reg ()
00067 {
00068
00069 pxor_r2r (mm0, mm0);
00070 }
00071
00072 static inline void mmx_average_2_U8 (uint8_t * dest, const uint8_t * src1,
00073 const uint8_t * src2)
00074 {
00075
00076
00077 movq_m2r (*src1, mm1);
00078 movq_r2r (mm1, mm2);
00079
00080 movq_m2r (*src2, mm3);
00081 movq_r2r (mm3, mm4);
00082
00083 pxor_r2r (mm1, mm3);
00084 pand_m2r (mask1, mm3);
00085 psrlq_i2r (1, mm3);
00086 por_r2r (mm2, mm4);
00087 psubb_r2r (mm3, mm4);
00088 movq_r2m (mm4, *dest);
00089 }
00090
00091 static inline void mmx_interp_average_2_U8 (uint8_t * dest,
00092 const uint8_t * src1,
00093 const uint8_t * src2)
00094 {
00095
00096
00097 movq_m2r (*dest, mm1);
00098 movq_r2r (mm1, mm2);
00099
00100 movq_m2r (*src1, mm3);
00101 movq_r2r (mm3, mm4);
00102
00103 movq_m2r (*src2, mm5);
00104 movq_r2r (mm5, mm6);
00105
00106 pxor_r2r (mm3, mm5);
00107 pand_m2r (mask1, mm5);
00108 psrlq_i2r (1, mm5);
00109 por_r2r (mm4, mm6);
00110 psubb_r2r (mm5, mm6);
00111 movq_r2r (mm6, mm5);
00112
00113 pxor_r2r (mm1, mm5);
00114 pand_m2r (mask1, mm5);
00115 psrlq_i2r (1, mm5);
00116 por_r2r (mm2, mm6);
00117 psubb_r2r (mm5, mm6);
00118 movq_r2m (mm6, *dest);
00119 }
00120
00121 static inline void mmx_average_4_U8 (uint8_t * dest, const uint8_t * src1,
00122 const uint8_t * src2,
00123 const uint8_t * src3,
00124 const uint8_t * src4)
00125 {
00126
00127
00128 movq_m2r (*src1, mm1);
00129 movq_r2r (mm1, mm2);
00130
00131 punpcklbw_r2r (mm0, mm1);
00132 punpckhbw_r2r (mm0, mm2);
00133
00134 movq_m2r (*src2, mm3);
00135 movq_r2r (mm3, mm4);
00136
00137 punpcklbw_r2r (mm0, mm3);
00138 punpckhbw_r2r (mm0, mm4);
00139
00140 paddw_r2r (mm3, mm1);
00141 paddw_r2r (mm4, mm2);
00142
00143
00144
00145 movq_m2r (*src3, mm3);
00146 movq_r2r (mm3, mm4);
00147
00148 punpcklbw_r2r (mm0, mm3);
00149 punpckhbw_r2r (mm0, mm4);
00150
00151 paddw_r2r (mm3, mm1);
00152 paddw_r2r (mm4, mm2);
00153
00154 movq_m2r (*src4, mm5);
00155 movq_r2r (mm5, mm6);
00156
00157 punpcklbw_r2r (mm0, mm5);
00158 punpckhbw_r2r (mm0, mm6);
00159
00160 paddw_r2r (mm5, mm1);
00161 paddw_r2r (mm6, mm2);
00162
00163
00164
00165 paddw_m2r (round4, mm1);
00166 psraw_i2r (2, mm1);
00167 paddw_m2r (round4, mm2);
00168 psraw_i2r (2, mm2);
00169
00170 packuswb_r2r (mm2, mm1);
00171 movq_r2m (mm1, *dest);
00172 }
00173
00174 static inline void mmx_interp_average_4_U8 (uint8_t * dest,
00175 const uint8_t * src1,
00176 const uint8_t * src2,
00177 const uint8_t * src3,
00178 const uint8_t * src4)
00179 {
00180
00181
00182 movq_m2r (*src1, mm1);
00183 movq_r2r (mm1, mm2);
00184
00185 punpcklbw_r2r (mm0, mm1);
00186 punpckhbw_r2r (mm0, mm2);
00187
00188 movq_m2r (*src2, mm3);
00189 movq_r2r (mm3, mm4);
00190
00191 punpcklbw_r2r (mm0, mm3);
00192 punpckhbw_r2r (mm0, mm4);
00193
00194 paddw_r2r (mm3, mm1);
00195 paddw_r2r (mm4, mm2);
00196
00197
00198
00199 movq_m2r (*src3, mm3);
00200 movq_r2r (mm3, mm4);
00201
00202 punpcklbw_r2r (mm0, mm3);
00203 punpckhbw_r2r (mm0, mm4);
00204
00205 paddw_r2r (mm3, mm1);
00206 paddw_r2r (mm4, mm2);
00207
00208 movq_m2r (*src4, mm5);
00209 movq_r2r (mm5, mm6);
00210
00211 punpcklbw_r2r (mm0, mm5);
00212 punpckhbw_r2r (mm0, mm6);
00213
00214 paddw_r2r (mm5, mm1);
00215 paddw_r2r (mm6, mm2);
00216
00217 paddw_m2r (round4, mm1);
00218 psraw_i2r (2, mm1);
00219 paddw_m2r (round4, mm2);
00220 psraw_i2r (2, mm2);
00221
00222
00223
00224 movq_m2r (*dest, mm3);
00225 movq_r2r (mm3, mm4);
00226
00227 packuswb_r2r (mm2, mm1);
00228 movq_r2r (mm1,mm2);
00229
00230 pxor_r2r (mm1, mm3);
00231 pand_m2r (mask1, mm3);
00232 psrlq_i2r (1, mm3);
00233 por_r2r (mm2, mm4);
00234 psubb_r2r (mm3, mm4);
00235 movq_r2m (mm4, *dest);
00236 }
00237
00238
00239
00240 static inline void MC_avg_mmx (const int width, int height, uint8_t * dest,
00241 const uint8_t * ref, const int stride)
00242 {
00243 mmx_zero_reg ();
00244
00245 do {
00246 mmx_average_2_U8 (dest, dest, ref);
00247
00248 if (width == 16)
00249 mmx_average_2_U8 (dest+8, dest+8, ref+8);
00250
00251 dest += stride;
00252 ref += stride;
00253 } while (--height);
00254 }
00255
00256 static void MC_avg_o_16_mmx (uint8_t * dest, const uint8_t * ref,
00257 int stride, int height)
00258 {
00259 MC_avg_mmx (16, height, dest, ref, stride);
00260 }
00261
00262 static void MC_avg_o_8_mmx (uint8_t * dest, const uint8_t * ref,
00263 int stride, int height)
00264 {
00265 MC_avg_mmx (8, height, dest, ref, stride);
00266 }
00267
00268
00269
00270 static inline void MC_put_mmx (const int width, int height, uint8_t * dest,
00271 const uint8_t * ref, const int stride)
00272 {
00273 mmx_zero_reg ();
00274
00275 do {
00276 movq_m2r (* ref, mm1);
00277 movq_r2m (mm1,* dest);
00278
00279 if (width == 16)
00280 {
00281 movq_m2r (* (ref+8), mm1);
00282 movq_r2m (mm1,* (dest+8));
00283 }
00284
00285 dest += stride;
00286 ref += stride;
00287 } while (--height);
00288 }
00289
00290 static void MC_put_o_16_mmx (uint8_t * dest, const uint8_t * ref,
00291 int stride, int height)
00292 {
00293 MC_put_mmx (16, height, dest, ref, stride);
00294 }
00295
00296 static void MC_put_o_8_mmx (uint8_t * dest, const uint8_t * ref,
00297 int stride, int height)
00298 {
00299 MC_put_mmx (8, height, dest, ref, stride);
00300 }
00301
00302
00303
00304
00305 static inline void MC_avg_x_mmx (const int width, int height, uint8_t * dest,
00306 const uint8_t * ref, const int stride)
00307 {
00308 mmx_zero_reg ();
00309
00310 do {
00311 mmx_interp_average_2_U8 (dest, ref, ref+1);
00312
00313 if (width == 16)
00314 mmx_interp_average_2_U8 (dest+8, ref+8, ref+9);
00315
00316 dest += stride;
00317 ref += stride;
00318 } while (--height);
00319 }
00320
00321 static void MC_avg_x_16_mmx (uint8_t * dest, const uint8_t * ref,
00322 int stride, int height)
00323 {
00324 MC_avg_x_mmx (16, height, dest, ref, stride);
00325 }
00326
00327 static void MC_avg_x_8_mmx (uint8_t * dest, const uint8_t * ref,
00328 int stride, int height)
00329 {
00330 MC_avg_x_mmx (8, height, dest, ref, stride);
00331 }
00332
00333
00334
00335 static inline void MC_put_x_mmx (const int width, int height, uint8_t * dest,
00336 const uint8_t * ref, const int stride)
00337 {
00338 mmx_zero_reg ();
00339
00340 do {
00341 mmx_average_2_U8 (dest, ref, ref+1);
00342
00343 if (width == 16)
00344 mmx_average_2_U8 (dest+8, ref+8, ref+9);
00345
00346 dest += stride;
00347 ref += stride;
00348 } while (--height);
00349 }
00350
00351 static void MC_put_x_16_mmx (uint8_t * dest, const uint8_t * ref,
00352 int stride, int height)
00353 {
00354 MC_put_x_mmx (16, height, dest, ref, stride);
00355 }
00356
00357 static void MC_put_x_8_mmx (uint8_t * dest, const uint8_t * ref,
00358 int stride, int height)
00359 {
00360 MC_put_x_mmx (8, height, dest, ref, stride);
00361 }
00362
00363
00364
00365 static inline void MC_avg_xy_mmx (const int width, int height, uint8_t * dest,
00366 const uint8_t * ref, const int stride)
00367 {
00368 const uint8_t * ref_next = ref + stride;
00369
00370 mmx_zero_reg ();
00371
00372 do {
00373 mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1);
00374
00375 if (width == 16)
00376 mmx_interp_average_4_U8 (dest+8, ref+8, ref+9,
00377 ref_next+8, ref_next+9);
00378
00379 dest += stride;
00380 ref += stride;
00381 ref_next += stride;
00382 } while (--height);
00383 }
00384
00385 static void MC_avg_xy_16_mmx (uint8_t * dest, const uint8_t * ref,
00386 int stride, int height)
00387 {
00388 MC_avg_xy_mmx (16, height, dest, ref, stride);
00389 }
00390
00391 static void MC_avg_xy_8_mmx (uint8_t * dest, const uint8_t * ref,
00392 int stride, int height)
00393 {
00394 MC_avg_xy_mmx (8, height, dest, ref, stride);
00395 }
00396
00397
00398
00399 static inline void MC_put_xy_mmx (const int width, int height, uint8_t * dest,
00400 const uint8_t * ref, const int stride)
00401 {
00402 const uint8_t * ref_next = ref + stride;
00403
00404 mmx_zero_reg ();
00405
00406 do {
00407 mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1);
00408
00409 if (width == 16)
00410 mmx_average_4_U8 (dest+8, ref+8, ref+9, ref_next+8, ref_next+9);
00411
00412 dest += stride;
00413 ref += stride;
00414 ref_next += stride;
00415 } while (--height);
00416 }
00417
00418 static void MC_put_xy_16_mmx (uint8_t * dest, const uint8_t * ref,
00419 int stride, int height)
00420 {
00421 MC_put_xy_mmx (16, height, dest, ref, stride);
00422 }
00423
00424 static void MC_put_xy_8_mmx (uint8_t * dest, const uint8_t * ref,
00425 int stride, int height)
00426 {
00427 MC_put_xy_mmx (8, height, dest, ref, stride);
00428 }
00429
00430
00431
00432 static inline void MC_avg_y_mmx (const int width, int height, uint8_t * dest,
00433 const uint8_t * ref, const int stride)
00434 {
00435 const uint8_t * ref_next = ref + stride;
00436
00437 mmx_zero_reg ();
00438
00439 do {
00440 mmx_interp_average_2_U8 (dest, ref, ref_next);
00441
00442 if (width == 16)
00443 mmx_interp_average_2_U8 (dest+8, ref+8, ref_next+8);
00444
00445 dest += stride;
00446 ref += stride;
00447 ref_next += stride;
00448 } while (--height);
00449 }
00450
00451 static void MC_avg_y_16_mmx (uint8_t * dest, const uint8_t * ref,
00452 int stride, int height)
00453 {
00454 MC_avg_y_mmx (16, height, dest, ref, stride);
00455 }
00456
00457 static void MC_avg_y_8_mmx (uint8_t * dest, const uint8_t * ref,
00458 int stride, int height)
00459 {
00460 MC_avg_y_mmx (8, height, dest, ref, stride);
00461 }
00462
00463
00464
00465 static inline void MC_put_y_mmx (const int width, int height, uint8_t * dest,
00466 const uint8_t * ref, const int stride)
00467 {
00468 const uint8_t * ref_next = ref + stride;
00469
00470 mmx_zero_reg ();
00471
00472 do {
00473 mmx_average_2_U8 (dest, ref, ref_next);
00474
00475 if (width == 16)
00476 mmx_average_2_U8 (dest+8, ref+8, ref_next+8);
00477
00478 dest += stride;
00479 ref += stride;
00480 ref_next += stride;
00481 } while (--height);
00482 }
00483
00484 static void MC_put_y_16_mmx (uint8_t * dest, const uint8_t * ref,
00485 int stride, int height)
00486 {
00487 MC_put_y_mmx (16, height, dest, ref, stride);
00488 }
00489
00490 static void MC_put_y_8_mmx (uint8_t * dest, const uint8_t * ref,
00491 int stride, int height)
00492 {
00493 MC_put_y_mmx (8, height, dest, ref, stride);
00494 }
00495
00496
00497 MPEG2_MC_EXTERN (mmx)
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507 #define pavg_r2r(src,dest) \
00508 do { \
00509 if (cpu == CPU_MMXEXT) \
00510 pavgb_r2r (src, dest); \
00511 else \
00512 pavgusb_r2r (src, dest); \
00513 } while (0)
00514
00515 #define pavg_m2r(src,dest) \
00516 do { \
00517 if (cpu == CPU_MMXEXT) \
00518 pavgb_m2r (src, dest); \
00519 else \
00520 pavgusb_m2r (src, dest); \
00521 } while (0)
00522
00523
00524
00525
00526
00527 static inline void MC_put1_8 (int height, uint8_t * dest, const uint8_t * ref,
00528 const int stride)
00529 {
00530 do {
00531 movq_m2r (*ref, mm0);
00532 movq_r2m (mm0, *dest);
00533 ref += stride;
00534 dest += stride;
00535 } while (--height);
00536 }
00537
00538 static inline void MC_put1_16 (int height, uint8_t * dest, const uint8_t * ref,
00539 const int stride)
00540 {
00541 do {
00542 movq_m2r (*ref, mm0);
00543 movq_m2r (*(ref+8), mm1);
00544 ref += stride;
00545 movq_r2m (mm0, *dest);
00546 movq_r2m (mm1, *(dest+8));
00547 dest += stride;
00548 } while (--height);
00549 }
00550
00551 static inline void MC_avg1_8 (int height, uint8_t * dest, const uint8_t * ref,
00552 const int stride, const int cpu)
00553 {
00554 do {
00555 movq_m2r (*ref, mm0);
00556 pavg_m2r (*dest, mm0);
00557 ref += stride;
00558 movq_r2m (mm0, *dest);
00559 dest += stride;
00560 } while (--height);
00561 }
00562
00563 static inline void MC_avg1_16 (int height, uint8_t * dest, const uint8_t * ref,
00564 const int stride, const int cpu)
00565 {
00566 do {
00567 movq_m2r (*ref, mm0);
00568 movq_m2r (*(ref+8), mm1);
00569 pavg_m2r (*dest, mm0);
00570 pavg_m2r (*(dest+8), mm1);
00571 movq_r2m (mm0, *dest);
00572 ref += stride;
00573 movq_r2m (mm1, *(dest+8));
00574 dest += stride;
00575 } while (--height);
00576 }
00577
00578 static inline void MC_put2_8 (int height, uint8_t * dest, const uint8_t * ref,
00579 const int stride, const int offset,
00580 const int cpu)
00581 {
00582 do {
00583 movq_m2r (*ref, mm0);
00584 pavg_m2r (*(ref+offset), mm0);
00585 ref += stride;
00586 movq_r2m (mm0, *dest);
00587 dest += stride;
00588 } while (--height);
00589 }
00590
00591 static inline void MC_put2_16 (int height, uint8_t * dest, const uint8_t * ref,
00592 const int stride, const int offset,
00593 const int cpu)
00594 {
00595 do {
00596 movq_m2r (*ref, mm0);
00597 movq_m2r (*(ref+8), mm1);
00598 pavg_m2r (*(ref+offset), mm0);
00599 pavg_m2r (*(ref+offset+8), mm1);
00600 movq_r2m (mm0, *dest);
00601 ref += stride;
00602 movq_r2m (mm1, *(dest+8));
00603 dest += stride;
00604 } while (--height);
00605 }
00606
00607 static inline void MC_avg2_8 (int height, uint8_t * dest, const uint8_t * ref,
00608 const int stride, const int offset,
00609 const int cpu)
00610 {
00611 do {
00612 movq_m2r (*ref, mm0);
00613 pavg_m2r (*(ref+offset), mm0);
00614 pavg_m2r (*dest, mm0);
00615 ref += stride;
00616 movq_r2m (mm0, *dest);
00617 dest += stride;
00618 } while (--height);
00619 }
00620
00621 static inline void MC_avg2_16 (int height, uint8_t * dest, const uint8_t * ref,
00622 const int stride, const int offset,
00623 const int cpu)
00624 {
00625 do {
00626 movq_m2r (*ref, mm0);
00627 movq_m2r (*(ref+8), mm1);
00628 pavg_m2r (*(ref+offset), mm0);
00629 pavg_m2r (*(ref+offset+8), mm1);
00630 pavg_m2r (*dest, mm0);
00631 pavg_m2r (*(dest+8), mm1);
00632 ref += stride;
00633 movq_r2m (mm0, *dest);
00634 movq_r2m (mm1, *(dest+8));
00635 dest += stride;
00636 } while (--height);
00637 }
00638
00639 static mmx_t mask_one = {0x0101010101010101LL};
00640
00641 static inline void MC_put4_8 (int height, uint8_t * dest, const uint8_t * ref,
00642 const int stride, const int cpu)
00643 {
00644 movq_m2r (*ref, mm0);
00645 movq_m2r (*(ref+1), mm1);
00646 movq_r2r (mm0, mm7);
00647 pxor_r2r (mm1, mm7);
00648 pavg_r2r (mm1, mm0);
00649 ref += stride;
00650
00651 do {
00652 movq_m2r (*ref, mm2);
00653 movq_r2r (mm0, mm5);
00654
00655 movq_m2r (*(ref+1), mm3);
00656 movq_r2r (mm2, mm6);
00657
00658 pxor_r2r (mm3, mm6);
00659 pavg_r2r (mm3, mm2);
00660
00661 por_r2r (mm6, mm7);
00662 pxor_r2r (mm2, mm5);
00663
00664 pand_r2r (mm5, mm7);
00665 pavg_r2r (mm2, mm0);
00666
00667 pand_m2r (mask_one, mm7);
00668
00669 psubusb_r2r (mm7, mm0);
00670
00671 ref += stride;
00672 movq_r2m (mm0, *dest);
00673 dest += stride;
00674
00675 movq_r2r (mm6, mm7);
00676 movq_r2r (mm2, mm0);
00677 } while (--height);
00678 }
00679
00680 static inline void MC_put4_16 (int height, uint8_t * dest, const uint8_t * ref,
00681 const int stride, const int cpu)
00682 {
00683 do {
00684 movq_m2r (*ref, mm0);
00685 movq_m2r (*(ref+stride+1), mm1);
00686 movq_r2r (mm0, mm7);
00687 movq_m2r (*(ref+1), mm2);
00688 pxor_r2r (mm1, mm7);
00689 movq_m2r (*(ref+stride), mm3);
00690 movq_r2r (mm2, mm6);
00691 pxor_r2r (mm3, mm6);
00692 pavg_r2r (mm1, mm0);
00693 pavg_r2r (mm3, mm2);
00694 por_r2r (mm6, mm7);
00695 movq_r2r (mm0, mm6);
00696 pxor_r2r (mm2, mm6);
00697 pand_r2r (mm6, mm7);
00698 pand_m2r (mask_one, mm7);
00699 pavg_r2r (mm2, mm0);
00700 psubusb_r2r (mm7, mm0);
00701 movq_r2m (mm0, *dest);
00702
00703 movq_m2r (*(ref+8), mm0);
00704 movq_m2r (*(ref+stride+9), mm1);
00705 movq_r2r (mm0, mm7);
00706 movq_m2r (*(ref+9), mm2);
00707 pxor_r2r (mm1, mm7);
00708 movq_m2r (*(ref+stride+8), mm3);
00709 movq_r2r (mm2, mm6);
00710 pxor_r2r (mm3, mm6);
00711 pavg_r2r (mm1, mm0);
00712 pavg_r2r (mm3, mm2);
00713 por_r2r (mm6, mm7);
00714 movq_r2r (mm0, mm6);
00715 pxor_r2r (mm2, mm6);
00716 pand_r2r (mm6, mm7);
00717 pand_m2r (mask_one, mm7);
00718 pavg_r2r (mm2, mm0);
00719 psubusb_r2r (mm7, mm0);
00720 ref += stride;
00721 movq_r2m (mm0, *(dest+8));
00722 dest += stride;
00723 } while (--height);
00724 }
00725
00726 static inline void MC_avg4_8 (int height, uint8_t * dest, const uint8_t * ref,
00727 const int stride, const int cpu)
00728 {
00729 do {
00730 movq_m2r (*ref, mm0);
00731 movq_m2r (*(ref+stride+1), mm1);
00732 movq_r2r (mm0, mm7);
00733 movq_m2r (*(ref+1), mm2);
00734 pxor_r2r (mm1, mm7);
00735 movq_m2r (*(ref+stride), mm3);
00736 movq_r2r (mm2, mm6);
00737 pxor_r2r (mm3, mm6);
00738 pavg_r2r (mm1, mm0);
00739 pavg_r2r (mm3, mm2);
00740 por_r2r (mm6, mm7);
00741 movq_r2r (mm0, mm6);
00742 pxor_r2r (mm2, mm6);
00743 pand_r2r (mm6, mm7);
00744 pand_m2r (mask_one, mm7);
00745 pavg_r2r (mm2, mm0);
00746 psubusb_r2r (mm7, mm0);
00747 movq_m2r (*dest, mm1);
00748 pavg_r2r (mm1, mm0);
00749 ref += stride;
00750 movq_r2m (mm0, *dest);
00751 dest += stride;
00752 } while (--height);
00753 }
00754
00755 static inline void MC_avg4_16 (int height, uint8_t * dest, const uint8_t * ref,
00756 const int stride, const int cpu)
00757 {
00758 do {
00759 movq_m2r (*ref, mm0);
00760 movq_m2r (*(ref+stride+1), mm1);
00761 movq_r2r (mm0, mm7);
00762 movq_m2r (*(ref+1), mm2);
00763 pxor_r2r (mm1, mm7);
00764 movq_m2r (*(ref+stride), mm3);
00765 movq_r2r (mm2, mm6);
00766 pxor_r2r (mm3, mm6);
00767 pavg_r2r (mm1, mm0);
00768 pavg_r2r (mm3, mm2);
00769 por_r2r (mm6, mm7);
00770 movq_r2r (mm0, mm6);
00771 pxor_r2r (mm2, mm6);
00772 pand_r2r (mm6, mm7);
00773 pand_m2r (mask_one, mm7);
00774 pavg_r2r (mm2, mm0);
00775 psubusb_r2r (mm7, mm0);
00776 movq_m2r (*dest, mm1);
00777 pavg_r2r (mm1, mm0);
00778 movq_r2m (mm0, *dest);
00779
00780 movq_m2r (*(ref+8), mm0);
00781 movq_m2r (*(ref+stride+9), mm1);
00782 movq_r2r (mm0, mm7);
00783 movq_m2r (*(ref+9), mm2);
00784 pxor_r2r (mm1, mm7);
00785 movq_m2r (*(ref+stride+8), mm3);
00786 movq_r2r (mm2, mm6);
00787 pxor_r2r (mm3, mm6);
00788 pavg_r2r (mm1, mm0);
00789 pavg_r2r (mm3, mm2);
00790 por_r2r (mm6, mm7);
00791 movq_r2r (mm0, mm6);
00792 pxor_r2r (mm2, mm6);
00793 pand_r2r (mm6, mm7);
00794 pand_m2r (mask_one, mm7);
00795 pavg_r2r (mm2, mm0);
00796 psubusb_r2r (mm7, mm0);
00797 movq_m2r (*(dest+8), mm1);
00798 pavg_r2r (mm1, mm0);
00799 ref += stride;
00800 movq_r2m (mm0, *(dest+8));
00801 dest += stride;
00802 } while (--height);
00803 }
00804
00805 static void MC_avg_o_16_mmxext (uint8_t * dest, const uint8_t * ref,
00806 int stride, int height)
00807 {
00808 MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
00809 }
00810
00811 static void MC_avg_o_8_mmxext (uint8_t * dest, const uint8_t * ref,
00812 int stride, int height)
00813 {
00814 MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
00815 }
00816
00817 static void MC_put_o_16_mmxext (uint8_t * dest, const uint8_t * ref,
00818 int stride, int height)
00819 {
00820 MC_put1_16 (height, dest, ref, stride);
00821 }
00822
00823 static void MC_put_o_8_mmxext (uint8_t * dest, const uint8_t * ref,
00824 int stride, int height)
00825 {
00826 MC_put1_8 (height, dest, ref, stride);
00827 }
00828
00829 static void MC_avg_x_16_mmxext (uint8_t * dest, const uint8_t * ref,
00830 int stride, int height)
00831 {
00832 MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
00833 }
00834
00835 static void MC_avg_x_8_mmxext (uint8_t * dest, const uint8_t * ref,
00836 int stride, int height)
00837 {
00838 MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
00839 }
00840
00841 static void MC_put_x_16_mmxext (uint8_t * dest, const uint8_t * ref,
00842 int stride, int height)
00843 {
00844 MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
00845 }
00846
00847 static void MC_put_x_8_mmxext (uint8_t * dest, const uint8_t * ref,
00848 int stride, int height)
00849 {
00850 MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
00851 }
00852
00853 static void MC_avg_y_16_mmxext (uint8_t * dest, const uint8_t * ref,
00854 int stride, int height)
00855 {
00856 MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
00857 }
00858
00859 static void MC_avg_y_8_mmxext (uint8_t * dest, const uint8_t * ref,
00860 int stride, int height)
00861 {
00862 MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
00863 }
00864
00865 static void MC_put_y_16_mmxext (uint8_t * dest, const uint8_t * ref,
00866 int stride, int height)
00867 {
00868 MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
00869 }
00870
00871 static void MC_put_y_8_mmxext (uint8_t * dest, const uint8_t * ref,
00872 int stride, int height)
00873 {
00874 MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
00875 }
00876
00877 static void MC_avg_xy_16_mmxext (uint8_t * dest, const uint8_t * ref,
00878 int stride, int height)
00879 {
00880 MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
00881 }
00882
00883 static void MC_avg_xy_8_mmxext (uint8_t * dest, const uint8_t * ref,
00884 int stride, int height)
00885 {
00886 MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
00887 }
00888
00889 static void MC_put_xy_16_mmxext (uint8_t * dest, const uint8_t * ref,
00890 int stride, int height)
00891 {
00892 MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
00893 }
00894
00895 static void MC_put_xy_8_mmxext (uint8_t * dest, const uint8_t * ref,
00896 int stride, int height)
00897 {
00898 MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
00899 }
00900
00901
00902 MPEG2_MC_EXTERN (mmxext)
00903
00904
00905
00906 static void MC_avg_o_16_3dnow (uint8_t * dest, const uint8_t * ref,
00907 int stride, int height)
00908 {
00909 MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
00910 }
00911
00912 static void MC_avg_o_8_3dnow (uint8_t * dest, const uint8_t * ref,
00913 int stride, int height)
00914 {
00915 MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
00916 }
00917
00918 static void MC_put_o_16_3dnow (uint8_t * dest, const uint8_t * ref,
00919 int stride, int height)
00920 {
00921 MC_put1_16 (height, dest, ref, stride);
00922 }
00923
00924 static void MC_put_o_8_3dnow (uint8_t * dest, const uint8_t * ref,
00925 int stride, int height)
00926 {
00927 MC_put1_8 (height, dest, ref, stride);
00928 }
00929
00930 static void MC_avg_x_16_3dnow (uint8_t * dest, const uint8_t * ref,
00931 int stride, int height)
00932 {
00933 MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
00934 }
00935
00936 static void MC_avg_x_8_3dnow (uint8_t * dest, const uint8_t * ref,
00937 int stride, int height)
00938 {
00939 MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
00940 }
00941
00942 static void MC_put_x_16_3dnow (uint8_t * dest, const uint8_t * ref,
00943 int stride, int height)
00944 {
00945 MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
00946 }
00947
00948 static void MC_put_x_8_3dnow (uint8_t * dest, const uint8_t * ref,
00949 int stride, int height)
00950 {
00951 MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
00952 }
00953
00954 static void MC_avg_y_16_3dnow (uint8_t * dest, const uint8_t * ref,
00955 int stride, int height)
00956 {
00957 MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
00958 }
00959
00960 static void MC_avg_y_8_3dnow (uint8_t * dest, const uint8_t * ref,
00961 int stride, int height)
00962 {
00963 MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
00964 }
00965
00966 static void MC_put_y_16_3dnow (uint8_t * dest, const uint8_t * ref,
00967 int stride, int height)
00968 {
00969 MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
00970 }
00971
00972 static void MC_put_y_8_3dnow (uint8_t * dest, const uint8_t * ref,
00973 int stride, int height)
00974 {
00975 MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
00976 }
00977
00978 static void MC_avg_xy_16_3dnow (uint8_t * dest, const uint8_t * ref,
00979 int stride, int height)
00980 {
00981 MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW);
00982 }
00983
00984 static void MC_avg_xy_8_3dnow (uint8_t * dest, const uint8_t * ref,
00985 int stride, int height)
00986 {
00987 MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW);
00988 }
00989
00990 static void MC_put_xy_16_3dnow (uint8_t * dest, const uint8_t * ref,
00991 int stride, int height)
00992 {
00993 MC_put4_16 (height, dest, ref, stride, CPU_3DNOW);
00994 }
00995
00996 static void MC_put_xy_8_3dnow (uint8_t * dest, const uint8_t * ref,
00997 int stride, int height)
00998 {
00999 MC_put4_8 (height, dest, ref, stride, CPU_3DNOW);
01000 }
01001
01002
01003 MPEG2_MC_EXTERN (3dnow)
01004
01005 #endif