00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "config.h"
00027
00028 #ifdef ARCH_X86
00029
00030 #include <stdio.h>
00031 #include <stdlib.h>
00032 #include <inttypes.h>
00033
00034 #include "convert.h"
00035 #include "convert_internal.h"
00036 #include "attributes.h"
00037 #include "mmx.h"
00038
00039 #define CPU_MMXEXT 0
00040 #define CPU_MMX 1
00041
00042
00043
00044 #define movntq(src,dest) \
00045 do { \
00046 if (cpu == CPU_MMXEXT) \
00047 movntq_r2m (src, dest); \
00048 else \
00049 movq_r2m (src, dest); \
00050 } while (0)
00051
00052 static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv)
00053 {
00054 static mmx_t mmx_80w = {0x0080008000800080LL};
00055 static mmx_t mmx_U_green = {0xf37df37df37df37dLL};
00056 static mmx_t mmx_U_blue = {0x4093409340934093LL};
00057 static mmx_t mmx_V_red = {0x3312331233123312LL};
00058 static mmx_t mmx_V_green = {0xe5fce5fce5fce5fcLL};
00059 static mmx_t mmx_10w = {0x1010101010101010LL};
00060 static mmx_t mmx_00ffw = {0x00ff00ff00ff00ffLL};
00061 static mmx_t mmx_Y_coeff = {0x253f253f253f253fLL};
00062
00063 movd_m2r (*pu, mm0);
00064 movd_m2r (*pv, mm1);
00065 movq_m2r (*py, mm6);
00066 pxor_r2r (mm4, mm4);
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077 punpcklbw_r2r (mm4, mm0);
00078 punpcklbw_r2r (mm4, mm1);
00079 psubsw_m2r (mmx_80w, mm0);
00080 psubsw_m2r (mmx_80w, mm1);
00081 psllw_i2r (3, mm0);
00082 psllw_i2r (3, mm1);
00083 movq_r2r (mm0, mm2);
00084 movq_r2r (mm1, mm3);
00085 pmulhw_m2r (mmx_U_green, mm2);
00086 pmulhw_m2r (mmx_V_green, mm3);
00087 pmulhw_m2r (mmx_U_blue, mm0);
00088 pmulhw_m2r (mmx_V_red, mm1);
00089 paddsw_r2r (mm3, mm2);
00090
00091 psubusb_m2r (mmx_10w, mm6);
00092 movq_r2r (mm6, mm7);
00093 pand_m2r (mmx_00ffw, mm6);
00094 psrlw_i2r (8, mm7);
00095 psllw_i2r (3, mm6);
00096 psllw_i2r (3, mm7);
00097 pmulhw_m2r (mmx_Y_coeff, mm6);
00098 pmulhw_m2r (mmx_Y_coeff, mm7);
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108 movq_r2r (mm0, mm3);
00109 movq_r2r (mm1, mm4);
00110 movq_r2r (mm2, mm5);
00111 paddsw_r2r (mm6, mm0);
00112 paddsw_r2r (mm7, mm3);
00113 paddsw_r2r (mm6, mm1);
00114 paddsw_r2r (mm7, mm4);
00115 paddsw_r2r (mm6, mm2);
00116 paddsw_r2r (mm7, mm5);
00117 packuswb_r2r (mm0, mm0);
00118 packuswb_r2r (mm1, mm1);
00119 packuswb_r2r (mm2, mm2);
00120 packuswb_r2r (mm3, mm3);
00121 packuswb_r2r (mm4, mm4);
00122 packuswb_r2r (mm5, mm5);
00123 punpcklbw_r2r (mm3, mm0);
00124 punpcklbw_r2r (mm4, mm1);
00125 punpcklbw_r2r (mm5, mm2);
00126 }
00127
00128 static inline void mmx_unpack_16rgb (uint8_t * image, const int cpu)
00129 {
00130 static mmx_t mmx_bluemask = {0xf8f8f8f8f8f8f8f8LL};
00131 static mmx_t mmx_greenmask = {0xfcfcfcfcfcfcfcfcLL};
00132 static mmx_t mmx_redmask = {0xf8f8f8f8f8f8f8f8LL};
00133
00134
00135
00136
00137
00138
00139
00140
00141 pand_m2r (mmx_bluemask, mm0);
00142 pand_m2r (mmx_greenmask, mm2);
00143 pand_m2r (mmx_redmask, mm1);
00144 psrlq_i2r (3, mm0);
00145 pxor_r2r (mm4, mm4);
00146 movq_r2r (mm0, mm5);
00147 movq_r2r (mm2, mm7);
00148
00149 punpcklbw_r2r (mm4, mm2);
00150 punpcklbw_r2r (mm1, mm0);
00151 psllq_i2r (3, mm2);
00152 por_r2r (mm2, mm0);
00153 movntq (mm0, *image);
00154
00155 punpckhbw_r2r (mm4, mm7);
00156 punpckhbw_r2r (mm1, mm5);
00157 psllq_i2r (3, mm7);
00158 por_r2r (mm7, mm5);
00159 movntq (mm5, *(image+8));
00160 }
00161
00162 static inline void mmx_unpack_32rgb (uint8_t * image, const int cpu)
00163 {
00164
00165
00166
00167
00168
00169
00170
00171 pxor_r2r (mm3, mm3);
00172 movq_r2r (mm0, mm6);
00173 movq_r2r (mm1, mm7);
00174 movq_r2r (mm0, mm4);
00175 movq_r2r (mm1, mm5);
00176 punpcklbw_r2r (mm2, mm6);
00177 punpcklbw_r2r (mm3, mm7);
00178 punpcklwd_r2r (mm7, mm6);
00179 movntq (mm6, *image);
00180 movq_r2r (mm0, mm6);
00181 punpcklbw_r2r (mm2, mm6);
00182 punpckhwd_r2r (mm7, mm6);
00183 movntq (mm6, *(image+8));
00184 punpckhbw_r2r (mm2, mm4);
00185 punpckhbw_r2r (mm3, mm5);
00186 punpcklwd_r2r (mm5, mm4);
00187 movntq (mm4, *(image+16));
00188 movq_r2r (mm0, mm4);
00189 punpckhbw_r2r (mm2, mm4);
00190 punpckhwd_r2r (mm5, mm4);
00191 movntq (mm4, *(image+24));
00192 }
00193
00194 static inline void yuv420_rgb16 (uint8_t * image,
00195 uint8_t * py, uint8_t * pu, uint8_t * pv,
00196 int width, int height,
00197 int rgb_stride, int y_stride, int uv_stride,
00198 const int cpu)
00199 {
00200 int i;
00201
00202 rgb_stride -= 2 * width;
00203 y_stride -= width;
00204 uv_stride -= width >> 1;
00205 width >>= 3;
00206
00207 do {
00208 i = width;
00209 do {
00210 mmx_yuv2rgb (py, pu, pv);
00211 mmx_unpack_16rgb (image, cpu);
00212 py += 8;
00213 pu += 4;
00214 pv += 4;
00215 image += 16;
00216 } while (--i);
00217
00218 py += y_stride;
00219 image += rgb_stride;
00220 if (height & 1) {
00221 pu += uv_stride;
00222 pv += uv_stride;
00223 } else {
00224 pu -= 4 * width;
00225 pv -= 4 * width;
00226 }
00227 } while (--height);
00228 }
00229
00230 static inline void yuv420_argb32 (uint8_t * image, uint8_t * py,
00231 uint8_t * pu, uint8_t * pv,
00232 int width, int height,
00233 int rgb_stride, int y_stride, int uv_stride,
00234 const int cpu)
00235 {
00236 int i;
00237
00238 rgb_stride -= 4 * width;
00239 y_stride -= width;
00240 uv_stride -= width >> 1;
00241 width >>= 3;
00242
00243 do {
00244 i = width;
00245 do {
00246 mmx_yuv2rgb (py, pu, pv);
00247 mmx_unpack_32rgb (image, cpu);
00248 py += 8;
00249 pu += 4;
00250 pv += 4;
00251 image += 32;
00252 } while (--i);
00253
00254 py += y_stride;
00255 image += rgb_stride;
00256 if (height & 1) {
00257 pu += uv_stride;
00258 pv += uv_stride;
00259 } else {
00260 pu -= 4 * width;
00261 pv -= 4 * width;
00262 }
00263 } while (--height);
00264 }
00265
00266 static void mmxext_rgb16 (void * _id, uint8_t * const * src,
00267 unsigned int v_offset)
00268 {
00269 convert_rgb_t * id = (convert_rgb_t *) _id;
00270
00271 yuv420_rgb16 (id->rgb_ptr + id->rgb_stride * v_offset,
00272 src[0], src[1], src[2], id->width, 16,
00273 id->rgb_stride, id->uv_stride << 1, id->uv_stride,
00274 CPU_MMXEXT);
00275 }
00276
00277 static void mmxext_argb32 (void * _id, uint8_t * const * src,
00278 unsigned int v_offset)
00279 {
00280 convert_rgb_t * id = (convert_rgb_t *) _id;
00281
00282 yuv420_argb32 (id->rgb_ptr + id->rgb_stride * v_offset,
00283 src[0], src[1], src[2], id->width, 16,
00284 id->rgb_stride, id->uv_stride << 1, id->uv_stride,
00285 CPU_MMXEXT);
00286 }
00287
00288 static void mmx_rgb16 (void * _id, uint8_t * const * src,
00289 unsigned int v_offset)
00290 {
00291 convert_rgb_t * id = (convert_rgb_t *) _id;
00292
00293 yuv420_rgb16 (id->rgb_ptr + id->rgb_stride * v_offset,
00294 src[0], src[1], src[2], id->width, 16,
00295 id->rgb_stride, id->uv_stride << 1, id->uv_stride, CPU_MMX);
00296 }
00297
00298 static void mmx_argb32 (void * _id, uint8_t * const * src,
00299 unsigned int v_offset)
00300 {
00301 convert_rgb_t * id = (convert_rgb_t *) _id;
00302
00303 yuv420_argb32 (id->rgb_ptr + id->rgb_stride * v_offset,
00304 src[0], src[1], src[2], id->width, 16,
00305 id->rgb_stride, id->uv_stride << 1, id->uv_stride, CPU_MMX);
00306 }
00307
00308 yuv2rgb_copy * yuv2rgb_init_mmxext (int order, int bpp)
00309 {
00310 if ((order == CONVERT_RGB) && (bpp == 16))
00311 return mmxext_rgb16;
00312 else if ((order == CONVERT_RGB) && (bpp == 32))
00313 return mmxext_argb32;
00314 return NULL;
00315 }
00316
00317 yuv2rgb_copy * yuv2rgb_init_mmx (int order, int bpp)
00318 {
00319 if ((order == CONVERT_RGB) && (bpp == 16))
00320 return mmx_rgb16;
00321 else if ((order == CONVERT_RGB) && (bpp == 32))
00322 return mmx_argb32;
00323 return NULL;
00324 }
00325 #endif