From edc80b41c6480b7c80ec5f7c835c92b2debb3774 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Thu, 22 Oct 2009 05:45:54 +0300 Subject: [PATCH 5/6] Support of overlapping src/dst for pixman_blt_sse2 --- pixman/pixman-sse2.c | 55 +++++++++++++++++++++++++++++-------------------- 1 files changed, 32 insertions(+), 23 deletions(-) diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c index 78b0ad1..b84636b 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c @@ -5300,34 +5300,43 @@ pixman_blt_sse2 (uint32_t *src_bits, { uint8_t * src_bytes; uint8_t * dst_bytes; - int byte_width; + int bpp; - if (src_bpp != dst_bpp) + if (src_bpp != dst_bpp || src_bpp & 7) return FALSE; - if (src_bpp == 16) - { - src_stride = src_stride * (int) sizeof (uint32_t) / 2; - dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; - src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); - dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); - byte_width = 2 * width; - src_stride *= 2; - dst_stride *= 2; - } - else if (src_bpp == 32) + bpp = src_bpp >> 3; + width *= bpp; + src_stride *= 4; + dst_stride *= 4; + src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp; + dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp; + + if (src_bpp != 16 && src_bpp != 32) { - src_stride = src_stride * (int) sizeof (uint32_t) / 4; - dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; - src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); - dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); - byte_width = 4 * width; - src_stride *= 4; - dst_stride *= 4; + pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride, + width, height); + return TRUE; } - else + + if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes) { - return FALSE; + src_bytes += src_stride * height - src_stride; + dst_bytes += dst_stride * height - dst_stride; + dst_stride = -dst_stride; + src_stride = -src_stride; + + if (src_bytes + width > dst_bytes) + { + /* TODO: reverse scanline copy using SSE2 */ + while (--height >= 0) + { + memmove (dst_bytes, src_bytes, width); + dst_bytes += dst_stride; + src_bytes += src_stride; + } + return TRUE; + } } cache_prefetch ((__m128i*)src_bytes); @@ -5340,7 +5349,7 @@ pixman_blt_sse2 (uint32_t *src_bits, uint8_t *d = dst_bytes; src_bytes += src_stride; dst_bytes += dst_stride; - w = byte_width; + w = width; cache_prefetch_next ((__m128i*)s); cache_prefetch_next ((__m128i*)d); -- 1.6.2.4