25 #include <core/macros.h>
26 #include <fvutils/color/yuvrgb.h>
27 #include <fvutils/cpu/mmx.h>
29 namespace firevision {
47 yuv411packed_to_rgb_plainc(
const unsigned char *YUV,
52 int y0, y1, y2, y3, u, v;
54 while (i < (width * height) * 3 / 2) {
63 *RGB++ = clip((76284 * y0 + 104595 * v) >> 16);
64 *RGB++ = clip((76284 * y0 - 25625 * u - 53281 * v) >> 16);
65 *RGB++ = clip((76284 * y0 + 132252 * u) >> 16);
68 *RGB++ = clip((76284 * y1 + 104595 * v) >> 16);
69 *RGB++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
70 *RGB++ = clip((76284 * y1 + 132252 * u) >> 16);
73 *RGB++ = clip((76284 * y2 + 104595 * v) >> 16);
74 *RGB++ = clip((76284 * y2 - 25625 * u - 53281 * v) >> 16);
75 *RGB++ = clip((76284 * y2 + 132252 * u) >> 16);
78 *RGB++ = clip((76284 * y3 + 104595 * v) >> 16);
79 *RGB++ = clip((76284 * y3 - 25625 * u - 53281 * v) >> 16);
80 *RGB++ = clip((76284 * y3 + 132252 * u) >> 16);
100 yuv422planar_to_rgb_plainc(
const unsigned char *planar,
106 const unsigned char *yp, *up, *vp;
110 up = planar + (width * height);
111 vp = up + (width * height / 2);
113 for (i = 0; i < (width * height / 2); ++i) {
125 *RGB++ = clip((76284 * y1 + 104595 * v) >> 16);
126 *RGB++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
127 *RGB++ = clip((76284 * y1 + 132252 * u) >> 16);
130 *RGB++ = clip((76284 * y2 + 104595 * v) >> 16);
131 *RGB++ = clip((76284 * y2 - 25625 * u - 53281 * v) >> 16);
132 *RGB++ = clip((76284 * y2 + 132252 * u) >> 16);
152 yuv422packed_to_rgb_plainc(
const unsigned char *YUV,
154 const unsigned int width,
155 const unsigned int height)
159 for (
unsigned int pixel = 0; pixel < (width * height); pixel += 2) {
166 *RGB++ = clip((76284 * y0 + 104595 * v) >> 16);
167 *RGB++ = clip((76284 * y0 - 25625 * u - 53281 * v) >> 16);
168 *RGB++ = clip((76284 * y0 + 132252 * u) >> 16);
171 *RGB++ = clip((76284 * y1 + 104595 * v) >> 16);
172 *RGB++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
173 *RGB++ = clip((76284 * y1 + 132252 * u) >> 16);
185 yuv422planar_to_bgr_plainc(
const unsigned char *planar,
191 const unsigned char *yp, *up, *vp;
195 up = planar + (width * height);
196 vp = up + (width * height / 2);
198 for (i = 0; i < (width * height / 2); ++i) {
210 *BGR++ = clip((76284 * y1 + 132252 * u) >> 16);
211 *BGR++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
212 *BGR++ = clip((76284 * y1 + 104595 * v) >> 16);
215 *BGR++ = clip((76284 * y2 + 132252 * u) >> 16);
216 *BGR++ = clip((76284 * y2 - 25625 * u - 53281 * v) >> 16);
217 *BGR++ = clip((76284 * y2 + 104595 * v) >> 16);
222 yuv422planar_to_rgb_with_alpha_plainc(
const unsigned char *planar,
228 const unsigned char *yp, *up, *vp;
232 up = planar + (width * height);
233 vp = up + (width * height / 2);
235 for (i = 0; i < (width * height / 2); ++i) {
247 *RGB++ = clip((76284 * y1 + 104595 * v) >> 16);
248 *RGB++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
249 *RGB++ = clip((76284 * y1 + 132252 * u) >> 16);
253 *RGB++ = clip((76284 * y2 + 104595 * v) >> 16);
254 *RGB++ = clip((76284 * y2 - 25625 * u - 53281 * v) >> 16);
255 *RGB++ = clip((76284 * y2 + 132252 * u) >> 16);
261 yuv422planar_to_bgr_with_alpha_plainc(
const unsigned char *planar,
267 const unsigned char *yp, *up, *vp;
271 up = planar + (width * height);
272 vp = up + (width * height / 2);
274 for (i = 0; i < (width * height / 2); ++i) {
286 *BGR++ = clip((76284 * y1 + 132252 * u) >> 16);
287 *BGR++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
288 *BGR++ = clip((76284 * y1 + 104595 * v) >> 16);
292 *BGR++ = clip((76284 * y2 + 132252 * u) >> 16);
293 *BGR++ = clip((76284 * y2 - 25625 * u - 53281 * v) >> 16);
294 *BGR++ = clip((76284 * y2 + 104595 * v) >> 16);
300 yuv422packed_to_bgr_with_alpha_plainc(
const unsigned char *YUV,
307 while (i < (width * height * 2)) {
314 *BGR++ = clip((76284 * y0 + 132252 * u) >> 16);
315 *BGR++ = clip((76284 * y0 - 25625 * u - 53281 * v) >> 16);
316 *BGR++ = clip((76284 * y0 + 104595 * v) >> 16);
320 *BGR++ = clip((76284 * y1 + 132252 * u) >> 16);
321 *BGR++ = clip((76284 * y1 - 25625 * u - 53281 * v) >> 16);
322 *BGR++ = clip((76284 * y1 + 104595 * v) >> 16);
327 #if (defined __i386__ || defined __386__ || defined __X86__ || defined _M_IX86 || defined i386)
344 # define RZ(i) (i >> (BITRES - RES))
350 __aligned(8) const volatile
unsigned short _const_crvcrv[4] = FOUR(RZ(CRV));
351 __aligned(8) const volatile
unsigned short _const_cbucbu[4] = FOUR(RZ(CBU));
352 __aligned(8) const volatile
unsigned short _const_cgucgu[4] = FOUR(RZ(CGU));
353 __aligned(8) const volatile
unsigned short _const_cgvcgv[4] = FOUR(RZ(CGV));
354 __aligned(8) const volatile
unsigned short _const_ymul[4] = FOUR(RZ(YMUL));
355 __aligned(8) const volatile
unsigned short _const_128[4] = FOUR(128);
356 __aligned(8) const volatile
unsigned short _const_32[4] = FOUR(RZ(OFF));
357 __aligned(8) const volatile
unsigned short _const_16[4] = FOUR(16);
359 # define CONST_CRVCRV *_const_crvcrv
360 # define CONST_CBUCBU *_const_cbucbu
361 # define CONST_CGUCGU *_const_cgucgu
362 # define CONST_CGVCGV *_const_cgvcgv
363 # define CONST_YMUL *_const_ymul
364 # define CONST_128 *_const_128
365 # define CONST_32 *_const_32
366 # define CONST_16 *_const_16
369 yuv411planar_to_rgb_mmx(
const unsigned char *yuv,
375 const unsigned char *yp1, *up, *vp;
381 vp = up + (w * (h / 4));
387 vp = up + ((w / 2) * (h / 2));
389 for (yy = 0; yy < h; yy++) {
390 for (xx = 0; xx < w; xx += 8) {
401 punpcklbw_r2r(mm7, mm2);
402 punpcklbw_r2r(mm7, mm3);
404 movq_m2r(CONST_16, mm4);
405 psubsw_r2r(mm4, mm0);
406 psubsw_r2r(mm4, mm1);
408 movq_m2r(CONST_128, mm5);
409 psubsw_r2r(mm5, mm2);
410 psubsw_r2r(mm5, mm3);
412 movq_m2r(CONST_YMUL, mm4);
413 pmullw_r2r(mm4, mm0);
414 pmullw_r2r(mm4, mm1);
416 movq_m2r(CONST_CRVCRV, mm7);
417 pmullw_r2r(mm3, mm7);
419 movq_m2r(CONST_CBUCBU, mm6);
420 pmullw_r2r(mm2, mm6);
422 movq_m2r(CONST_CGUCGU, mm5);
423 pmullw_r2r(mm2, mm5);
425 movq_m2r(CONST_CGVCGV, mm4);
426 pmullw_r2r(mm3, mm4);
429 paddsw_r2r(mm7, mm2);
430 paddsw_r2r(mm1, mm7);
434 packuswb_r2r(mm7, mm2);
438 punpckhbw_r2r(mm7, mm2);
439 punpcklbw_r2r(mm3, mm7);
443 psubsw_r2r(mm5, mm3);
444 psubsw_r2r(mm4, mm3);
445 paddsw_m2r(CONST_32, mm3);
448 psubsw_r2r(mm5, mm7);
449 psubsw_r2r(mm4, mm7);
450 paddsw_m2r(CONST_32, mm7);
454 packuswb_r2r(mm7, mm3);
458 punpckhbw_r2r(mm7, mm3);
459 punpcklbw_r2r(mm4, mm7);
462 movq_m2r(CONST_32, mm4);
463 paddsw_r2r(mm6, mm0);
464 paddsw_r2r(mm6, mm1);
465 paddsw_r2r(mm4, mm0);
466 paddsw_r2r(mm4, mm1);
469 packuswb_r2r(mm1, mm0);
473 punpckhbw_r2r(mm7, mm0);
474 punpcklbw_r2r(mm5, mm7);
481 punpckhbw_r2r(mm3, mm2);
482 punpcklbw_r2r(mm6, mm7);
483 punpckhbw_r2r(mm1, mm0);
484 punpcklbw_r2r(mm1, mm5);
487 punpckhwd_r2r(mm5, mm7);
488 punpcklwd_r2r(mm5, mm1);
491 punpckhwd_r2r(mm0, mm2);
492 punpcklwd_r2r(mm0, mm4);
494 movntq_r2m(mm1, *(dp1));
495 movntq_r2m(mm7, *(dp1 + 8));
496 movntq_r2m(mm4, *(dp1 + 16));
497 movntq_r2m(mm2, *(dp1 + 24));