00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "config.h"
00024
00025 #ifdef ARCH_ALPHA
00026
00027 #include <inttypes.h>
00028
00029 #include "mpeg2.h"
00030 #include "mpeg2_internal.h"
00031 #include "alpha_asm.h"
00032
00033 static inline uint64_t avg2(uint64_t a, uint64_t b)
00034 {
00035 return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
00036 }
00037
00038
00039
00040 #define ULOAD16(ret_l, ret_r, addr) \
00041 do { \
00042 uint64_t _l = ldq_u(addr + 0); \
00043 uint64_t _m = ldq_u(addr + 8); \
00044 uint64_t _r = ldq_u(addr + 16); \
00045 ret_l = extql(_l, addr) | extqh(_m, addr); \
00046 ret_r = extql(_m, addr) | extqh(_r, addr); \
00047 } while (0)
00048
00049
00050 #define ALOAD16(ret_l, ret_r, addr) \
00051 do { \
00052 ret_l = ldq(addr); \
00053 ret_r = ldq(addr + 8); \
00054 } while (0)
00055
00056 #define OP8(LOAD, LOAD16, STORE) \
00057 do { \
00058 STORE(LOAD(pixels), block); \
00059 pixels += line_size; \
00060 block += line_size; \
00061 } while (--h)
00062
00063 #define OP16(LOAD, LOAD16, STORE) \
00064 do { \
00065 uint64_t l, r; \
00066 LOAD16(l, r, pixels); \
00067 STORE(l, block); \
00068 STORE(r, block + 8); \
00069 pixels += line_size; \
00070 block += line_size; \
00071 } while (--h)
00072
00073 #define OP8_X2(LOAD, LOAD16, STORE) \
00074 do { \
00075 uint64_t p0, p1; \
00076 \
00077 p0 = LOAD(pixels); \
00078 p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56); \
00079 STORE(avg2(p0, p1), block); \
00080 pixels += line_size; \
00081 block += line_size; \
00082 } while (--h)
00083
00084 #define OP16_X2(LOAD, LOAD16, STORE) \
00085 do { \
00086 uint64_t p0, p1; \
00087 \
00088 LOAD16(p0, p1, pixels); \
00089 STORE(avg2(p0, p0 >> 8 | p1 << 56), block); \
00090 STORE(avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56), \
00091 block + 8); \
00092 pixels += line_size; \
00093 block += line_size; \
00094 } while (--h)
00095
00096 #define OP8_Y2(LOAD, LOAD16, STORE) \
00097 do { \
00098 uint64_t p0, p1; \
00099 p0 = LOAD(pixels); \
00100 pixels += line_size; \
00101 p1 = LOAD(pixels); \
00102 do { \
00103 uint64_t av = avg2(p0, p1); \
00104 if (--h == 0) line_size = 0; \
00105 pixels += line_size; \
00106 p0 = p1; \
00107 p1 = LOAD(pixels); \
00108 STORE(av, block); \
00109 block += line_size; \
00110 } while (h); \
00111 } while (0)
00112
00113 #define OP16_Y2(LOAD, LOAD16, STORE) \
00114 do { \
00115 uint64_t p0l, p0r, p1l, p1r; \
00116 LOAD16(p0l, p0r, pixels); \
00117 pixels += line_size; \
00118 LOAD16(p1l, p1r, pixels); \
00119 do { \
00120 uint64_t avl, avr; \
00121 if (--h == 0) line_size = 0; \
00122 avl = avg2(p0l, p1l); \
00123 avr = avg2(p0r, p1r); \
00124 p0l = p1l; \
00125 p0r = p1r; \
00126 pixels += line_size; \
00127 LOAD16(p1l, p1r, pixels); \
00128 STORE(avl, block); \
00129 STORE(avr, block + 8); \
00130 block += line_size; \
00131 } while (h); \
00132 } while (0)
00133
00134 #define OP8_XY2(LOAD, LOAD16, STORE) \
00135 do { \
00136 uint64_t pl, ph; \
00137 uint64_t p1 = LOAD(pixels); \
00138 uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56); \
00139 \
00140 ph = ((p1 & ~BYTE_VEC(0x03)) >> 2) \
00141 + ((p2 & ~BYTE_VEC(0x03)) >> 2); \
00142 pl = (p1 & BYTE_VEC(0x03)) \
00143 + (p2 & BYTE_VEC(0x03)); \
00144 \
00145 do { \
00146 uint64_t npl, nph; \
00147 \
00148 pixels += line_size; \
00149 p1 = LOAD(pixels); \
00150 p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56); \
00151 nph = ((p1 & ~BYTE_VEC(0x03)) >> 2) \
00152 + ((p2 & ~BYTE_VEC(0x03)) >> 2); \
00153 npl = (p1 & BYTE_VEC(0x03)) \
00154 + (p2 & BYTE_VEC(0x03)); \
00155 \
00156 STORE(ph + nph \
00157 + (((pl + npl + BYTE_VEC(0x02)) >> 2) \
00158 & BYTE_VEC(0x03)), block); \
00159 \
00160 block += line_size; \
00161 pl = npl; \
00162 ph = nph; \
00163 } while (--h); \
00164 } while (0)
00165
00166 #define OP16_XY2(LOAD, LOAD16, STORE) \
00167 do { \
00168 uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r; \
00169 LOAD16(p0, p2, pixels); \
00170 p1 = p0 >> 8 | (p2 << 56); \
00171 p3 = p2 >> 8 | ((uint64_t) pixels[16] << 56); \
00172 \
00173 ph_l = ((p0 & ~BYTE_VEC(0x03)) >> 2) \
00174 + ((p1 & ~BYTE_VEC(0x03)) >> 2); \
00175 pl_l = (p0 & BYTE_VEC(0x03)) \
00176 + (p1 & BYTE_VEC(0x03)); \
00177 ph_r = ((p2 & ~BYTE_VEC(0x03)) >> 2) \
00178 + ((p3 & ~BYTE_VEC(0x03)) >> 2); \
00179 pl_r = (p2 & BYTE_VEC(0x03)) \
00180 + (p3 & BYTE_VEC(0x03)); \
00181 \
00182 do { \
00183 uint64_t npl_l, nph_l, npl_r, nph_r; \
00184 \
00185 pixels += line_size; \
00186 LOAD16(p0, p2, pixels); \
00187 p1 = p0 >> 8 | (p2 << 56); \
00188 p3 = p2 >> 8 | ((uint64_t) pixels[16] << 56); \
00189 nph_l = ((p0 & ~BYTE_VEC(0x03)) >> 2) \
00190 + ((p1 & ~BYTE_VEC(0x03)) >> 2); \
00191 npl_l = (p0 & BYTE_VEC(0x03)) \
00192 + (p1 & BYTE_VEC(0x03)); \
00193 nph_r = ((p2 & ~BYTE_VEC(0x03)) >> 2) \
00194 + ((p3 & ~BYTE_VEC(0x03)) >> 2); \
00195 npl_r = (p2 & BYTE_VEC(0x03)) \
00196 + (p3 & BYTE_VEC(0x03)); \
00197 \
00198 STORE(ph_l + nph_l \
00199 + (((pl_l + npl_l + BYTE_VEC(0x02)) >> 2) \
00200 & BYTE_VEC(0x03)), block); \
00201 STORE(ph_r + nph_r \
00202 + (((pl_r + npl_r + BYTE_VEC(0x02)) >> 2) \
00203 & BYTE_VEC(0x03)), block + 8); \
00204 \
00205 block += line_size; \
00206 pl_l = npl_l; \
00207 ph_l = nph_l; \
00208 pl_r = npl_r; \
00209 ph_r = nph_r; \
00210 } while (--h); \
00211 } while (0)
00212
00213 #define MAKE_OP(OPNAME, SIZE, SUFF, OPKIND, STORE) \
00214 static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha \
00215 (uint8_t *restrict block, const uint8_t *restrict pixels, \
00216 int line_size, int h) \
00217 { \
00218 if ((uint64_t) pixels & 0x7) { \
00219 OPKIND(uldq, ULOAD16, STORE); \
00220 } else { \
00221 OPKIND(ldq, ALOAD16, STORE); \
00222 } \
00223 }
00224
00225 #define PIXOP(OPNAME, STORE) \
00226 MAKE_OP(OPNAME, 8, o, OP8, STORE); \
00227 MAKE_OP(OPNAME, 8, x, OP8_X2, STORE); \
00228 MAKE_OP(OPNAME, 8, y, OP8_Y2, STORE); \
00229 MAKE_OP(OPNAME, 8, xy, OP8_XY2, STORE); \
00230 MAKE_OP(OPNAME, 16, o, OP16, STORE); \
00231 MAKE_OP(OPNAME, 16, x, OP16_X2, STORE); \
00232 MAKE_OP(OPNAME, 16, y, OP16_Y2, STORE); \
00233 MAKE_OP(OPNAME, 16, xy, OP16_XY2, STORE);
00234
00235 #define STORE(l, b) stq(l, b)
00236 PIXOP(put, STORE);
00237
00238 #undef STORE
00239 #define STORE(l, b) stq(avg2(l, ldq(b)), b);
00240 PIXOP(avg, STORE);
00241
00242 mpeg2_mc_t mpeg2_mc_alpha = {
00243 { MC_put_o_16_alpha, MC_put_x_16_alpha,
00244 MC_put_y_16_alpha, MC_put_xy_16_alpha,
00245 MC_put_o_8_alpha, MC_put_x_8_alpha,
00246 MC_put_y_8_alpha, MC_put_xy_8_alpha },
00247 { MC_avg_o_16_alpha, MC_avg_x_16_alpha,
00248 MC_avg_y_16_alpha, MC_avg_xy_16_alpha,
00249 MC_avg_o_8_alpha, MC_avg_x_8_alpha,
00250 MC_avg_y_8_alpha, MC_avg_xy_8_alpha }
00251 };
00252
00253 #endif