aboutsummaryrefslogtreecommitdiffstats
path: root/recipes/xorg-lib/pixman
diff options
context:
space:
mode:
Diffstat (limited to 'recipes/xorg-lib/pixman')
-rw-r--r--recipes/xorg-lib/pixman/0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch114
-rw-r--r--recipes/xorg-lib/pixman/0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch91
-rw-r--r--recipes/xorg-lib/pixman/0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch91
-rw-r--r--recipes/xorg-lib/pixman/0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch94
-rw-r--r--recipes/xorg-lib/pixman/0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch169
-rw-r--r--recipes/xorg-lib/pixman/0006-Revert-ARM-SIMD-Try-without-any-CFLAGS-before-forcin.patch53
-rw-r--r--recipes/xorg-lib/pixman/calloc.patch23
-rw-r--r--recipes/xorg-lib/pixman/nearest-neighbour.patch1040
-rw-r--r--recipes/xorg-lib/pixman/over-n-8-0565.patch231
-rw-r--r--recipes/xorg-lib/pixman/pixman-0.13.2-neon1.patch1702
-rw-r--r--recipes/xorg-lib/pixman/pixman-28986.patch32
-rw-r--r--recipes/xorg-lib/pixman/pixman-arm.patch632
-rw-r--r--recipes/xorg-lib/pixman/pixman-x888-565.patch68
-rw-r--r--recipes/xorg-lib/pixman/prefetch.patch298
-rw-r--r--recipes/xorg-lib/pixman/remove-broken.patch826
-rw-r--r--recipes/xorg-lib/pixman/src-8888-0565.patch324
-rw-r--r--recipes/xorg-lib/pixman/tls.patch59
17 files changed, 0 insertions, 5847 deletions
diff --git a/recipes/xorg-lib/pixman/0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch b/recipes/xorg-lib/pixman/0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch
deleted file mode 100644
index a2cda2438e..0000000000
--- a/recipes/xorg-lib/pixman/0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch
+++ /dev/null
@@ -1,114 +0,0 @@
-From 8ea1a333de202018a862a7b04b94479d3109274b Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Tue, 16 Mar 2010 16:55:28 +0100
-Subject: [PATCH 1/5] Generic C implementation of pixman_blt with overlapping support
-
-Uses memcpy/memmove functions to copy pixels, can handle the
-case when both source and destination areas are in the same
-image (this is useful for scrolling).
-
-It is assumed that copying direction is only important when
-using the same image for both source and destination (and
-src_stride == dst_stride). Copying direction is undefined
-for the images with different source and destination stride
-which happen to be in the overlapped areas (but this is an
-unrealistic case anyway).
----
- pixman/pixman-general.c | 21 ++++++++++++++++++---
- pixman/pixman-private.h | 43 +++++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 61 insertions(+), 3 deletions(-)
-
-diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
-index bddf79a..f525744 100644
---- a/pixman/pixman-general.c
-+++ b/pixman/pixman-general.c
-@@ -285,9 +285,24 @@ general_blt (pixman_implementation_t *imp,
- int width,
- int height)
- {
-- /* We can't blit unless we have sse2 or mmx */
--
-- return FALSE;
-+ uint8_t *dst_bytes = (uint8_t *)dst_bits;
-+ uint8_t *src_bytes = (uint8_t *)src_bits;
-+ int bpp;
-+
-+ if (src_bpp != dst_bpp || src_bpp & 7)
-+ return FALSE;
-+
-+ bpp = src_bpp >> 3;
-+ width *= bpp;
-+ src_stride *= 4;
-+ dst_stride *= 4;
-+ pixman_blt_helper (src_bytes + src_y * src_stride + src_x * bpp,
-+ dst_bytes + dst_y * dst_stride + dst_x * bpp,
-+ src_stride,
-+ dst_stride,
-+ width,
-+ height);
-+ return TRUE;
- }
-
- static pixman_bool_t
-diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
-index d5767af..eeb677d 100644
---- a/pixman/pixman-private.h
-+++ b/pixman/pixman-private.h
-@@ -10,6 +10,7 @@
-
- #include "pixman.h"
- #include <time.h>
-+#include <string.h>
- #include <assert.h>
- #include <stdio.h>
- #include <string.h>
-@@ -867,4 +868,46 @@ void pixman_timer_register (pixman_timer_t *timer);
-
- #endif /* PIXMAN_TIMERS */
-
-+/* a helper function, can blit 8-bit images with src/dst overlapping support */
-+static inline void
-+pixman_blt_helper (uint8_t *src_bytes,
-+ uint8_t *dst_bytes,
-+ int src_stride,
-+ int dst_stride,
-+ int width,
-+ int height)
-+{
-+ /*
-+ * The second part of this check is not strictly needed, but it prevents
-+ * unnecessary upside-down processing of areas which belong to different
-+ * images. Upside-down processing can be slower with fixed-distance-ahead
-+ * prefetch and perceived as having more tearing.
-+ */
-+ if (src_bytes < dst_bytes + width &&
-+ src_bytes + src_stride * height > dst_bytes)
-+ {
-+ src_bytes += src_stride * height - src_stride;
-+ dst_bytes += dst_stride * height - dst_stride;
-+ dst_stride = -dst_stride;
-+ src_stride = -src_stride;
-+ /* Horizontal scrolling to the left needs memmove */
-+ if (src_bytes + width > dst_bytes)
-+ {
-+ while (--height >= 0)
-+ {
-+ memmove (dst_bytes, src_bytes, width);
-+ dst_bytes += dst_stride;
-+ src_bytes += src_stride;
-+ }
-+ return;
-+ }
-+ }
-+ while (--height >= 0)
-+ {
-+ memcpy (dst_bytes, src_bytes, width);
-+ dst_bytes += dst_stride;
-+ src_bytes += src_stride;
-+ }
-+}
-+
- #endif /* PIXMAN_PRIVATE_H */
---
-1.6.6.1
-
diff --git a/recipes/xorg-lib/pixman/0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch b/recipes/xorg-lib/pixman/0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch
deleted file mode 100644
index 003337f48d..0000000000
--- a/recipes/xorg-lib/pixman/0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From 3170d9f5e927681a2516bcec52b317d1d4785e25 Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Thu, 22 Oct 2009 05:45:47 +0300
-Subject: [PATCH 2/5] Support of overlapping src/dst for pixman_blt_mmx
-
----
- pixman/pixman-mmx.c | 55 +++++++++++++++++++++++++++++---------------------
- 1 files changed, 32 insertions(+), 23 deletions(-)
-
-diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
-index e084e7f..6212b31 100644
---- a/pixman/pixman-mmx.c
-+++ b/pixman/pixman-mmx.c
-@@ -2994,34 +2994,43 @@ pixman_blt_mmx (uint32_t *src_bits,
- {
- uint8_t * src_bytes;
- uint8_t * dst_bytes;
-- int byte_width;
-+ int bpp;
-
-- if (src_bpp != dst_bpp)
-+ if (src_bpp != dst_bpp || src_bpp & 7)
- return FALSE;
-
-- if (src_bpp == 16)
-- {
-- src_stride = src_stride * (int) sizeof (uint32_t) / 2;
-- dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
-- src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
-- dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
-- byte_width = 2 * width;
-- src_stride *= 2;
-- dst_stride *= 2;
-- }
-- else if (src_bpp == 32)
-+ bpp = src_bpp >> 3;
-+ width *= bpp;
-+ src_stride *= 4;
-+ dst_stride *= 4;
-+ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
-+ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
-+
-+ if (src_bpp != 16 && src_bpp != 32)
- {
-- src_stride = src_stride * (int) sizeof (uint32_t) / 4;
-- dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
-- src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
-- dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
-- byte_width = 4 * width;
-- src_stride *= 4;
-- dst_stride *= 4;
-+ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
-+ width, height);
-+ return TRUE;
- }
-- else
-+
-+ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
- {
-- return FALSE;
-+ src_bytes += src_stride * height - src_stride;
-+ dst_bytes += dst_stride * height - dst_stride;
-+ dst_stride = -dst_stride;
-+ src_stride = -src_stride;
-+
-+ if (src_bytes + width > dst_bytes)
-+ {
-+ /* TODO: reverse scanline copy using MMX */
-+ while (--height >= 0)
-+ {
-+ memmove (dst_bytes, src_bytes, width);
-+ dst_bytes += dst_stride;
-+ src_bytes += src_stride;
-+ }
-+ return TRUE;
-+ }
- }
-
- while (height--)
-@@ -3031,7 +3040,7 @@ pixman_blt_mmx (uint32_t *src_bits,
- uint8_t *d = dst_bytes;
- src_bytes += src_stride;
- dst_bytes += dst_stride;
-- w = byte_width;
-+ w = width;
-
- while (w >= 2 && ((unsigned long)d & 3))
- {
---
-1.6.6.1
-
diff --git a/recipes/xorg-lib/pixman/0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch b/recipes/xorg-lib/pixman/0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch
deleted file mode 100644
index 7e8f34f6bd..0000000000
--- a/recipes/xorg-lib/pixman/0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From f07cd58c643b490dcb1ef7be2642926cfeca1e69 Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Thu, 22 Oct 2009 05:45:54 +0300
-Subject: [PATCH 3/5] Support of overlapping src/dst for pixman_blt_sse2
-
----
- pixman/pixman-sse2.c | 55 +++++++++++++++++++++++++++++--------------------
- 1 files changed, 32 insertions(+), 23 deletions(-)
-
-diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
-index 946e7ba..66053ae 100644
---- a/pixman/pixman-sse2.c
-+++ b/pixman/pixman-sse2.c
-@@ -5299,34 +5299,43 @@ pixman_blt_sse2 (uint32_t *src_bits,
- {
- uint8_t * src_bytes;
- uint8_t * dst_bytes;
-- int byte_width;
-+ int bpp;
-
-- if (src_bpp != dst_bpp)
-+ if (src_bpp != dst_bpp || src_bpp & 7)
- return FALSE;
-
-- if (src_bpp == 16)
-- {
-- src_stride = src_stride * (int) sizeof (uint32_t) / 2;
-- dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
-- src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
-- dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
-- byte_width = 2 * width;
-- src_stride *= 2;
-- dst_stride *= 2;
-- }
-- else if (src_bpp == 32)
-+ bpp = src_bpp >> 3;
-+ width *= bpp;
-+ src_stride *= 4;
-+ dst_stride *= 4;
-+ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
-+ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
-+
-+ if (src_bpp != 16 && src_bpp != 32)
- {
-- src_stride = src_stride * (int) sizeof (uint32_t) / 4;
-- dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
-- src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
-- dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
-- byte_width = 4 * width;
-- src_stride *= 4;
-- dst_stride *= 4;
-+ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
-+ width, height);
-+ return TRUE;
- }
-- else
-+
-+ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
- {
-- return FALSE;
-+ src_bytes += src_stride * height - src_stride;
-+ dst_bytes += dst_stride * height - dst_stride;
-+ dst_stride = -dst_stride;
-+ src_stride = -src_stride;
-+
-+ if (src_bytes + width > dst_bytes)
-+ {
-+ /* TODO: reverse scanline copy using SSE2 */
-+ while (--height >= 0)
-+ {
-+ memmove (dst_bytes, src_bytes, width);
-+ dst_bytes += dst_stride;
-+ src_bytes += src_stride;
-+ }
-+ return TRUE;
-+ }
- }
-
- cache_prefetch ((__m128i*)src_bytes);
-@@ -5339,7 +5348,7 @@ pixman_blt_sse2 (uint32_t *src_bits,
- uint8_t *d = dst_bytes;
- src_bytes += src_stride;
- dst_bytes += dst_stride;
-- w = byte_width;
-+ w = width;
-
- cache_prefetch_next ((__m128i*)s);
- cache_prefetch_next ((__m128i*)d);
---
-1.6.6.1
-
diff --git a/recipes/xorg-lib/pixman/0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch b/recipes/xorg-lib/pixman/0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch
deleted file mode 100644
index 0ba5b843b5..0000000000
--- a/recipes/xorg-lib/pixman/0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch
+++ /dev/null
@@ -1,94 +0,0 @@
-From e0542866c466ad512d69292df098d4b880e35e52 Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Wed, 18 Nov 2009 06:08:48 +0200
-Subject: [PATCH 4/5] Support of overlapping src/dst for pixman_blt_neon
-
----
- pixman/pixman-arm-neon.c | 62 +++++++++++++++++++++++++++++++++++++--------
- 1 files changed, 51 insertions(+), 11 deletions(-)
-
-diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index 24ceeeb..134493d 100644
---- a/pixman/pixman-arm-neon.c
-+++ b/pixman/pixman-arm-neon.c
-@@ -360,26 +360,66 @@ pixman_blt_neon (uint32_t *src_bits,
- int width,
- int height)
- {
-- if (src_bpp != dst_bpp)
-+ uint8_t * src_bytes;
-+ uint8_t * dst_bytes;
-+ int bpp;
-+
-+ if (src_bpp != dst_bpp || src_bpp & 7)
- return FALSE;
-
-+ bpp = src_bpp >> 3;
-+ width *= bpp;
-+ src_stride *= 4;
-+ dst_stride *= 4;
-+ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
-+ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
-+
-+ if (src_bpp != 16 && src_bpp != 32)
-+ {
-+ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
-+ width, height);
-+ return TRUE;
-+ }
-+
-+ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
-+ {
-+ src_bytes += src_stride * height - src_stride;
-+ dst_bytes += dst_stride * height - dst_stride;
-+ dst_stride = -dst_stride;
-+ src_stride = -src_stride;
-+
-+ if (src_bytes + width > dst_bytes)
-+ {
-+ /* TODO: reverse scanline copy using NEON */
-+ while (--height >= 0)
-+ {
-+ memmove (dst_bytes, src_bytes, width);
-+ dst_bytes += dst_stride;
-+ src_bytes += src_stride;
-+ }
-+ return TRUE;
-+ }
-+ }
-+
- switch (src_bpp)
- {
- case 16:
- pixman_composite_src_0565_0565_asm_neon (
-- width, height,
-- (uint16_t *)(((char *) dst_bits) +
-- dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
-- (uint16_t *)(((char *) src_bits) +
-- src_y * src_stride * 4 + src_x * 2), src_stride * 2);
-+ width >> 1,
-+ height,
-+ (uint16_t *) dst_bytes,
-+ dst_stride >> 1,
-+ (uint16_t *) src_bytes,
-+ src_stride >> 1);
- return TRUE;
- case 32:
- pixman_composite_src_8888_8888_asm_neon (
-- width, height,
-- (uint32_t *)(((char *) dst_bits) +
-- dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
-- (uint32_t *)(((char *) src_bits) +
-- src_y * src_stride * 4 + src_x * 4), src_stride);
-+ width >> 2,
-+ height,
-+ (uint32_t *) dst_bytes,
-+ dst_stride >> 2,
-+ (uint32_t *) src_bytes,
-+ src_stride >> 2);
- return TRUE;
- default:
- return FALSE;
---
-1.6.6.1
-
diff --git a/recipes/xorg-lib/pixman/0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch b/recipes/xorg-lib/pixman/0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch
deleted file mode 100644
index 769ed2e7d0..0000000000
--- a/recipes/xorg-lib/pixman/0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch
+++ /dev/null
@@ -1,169 +0,0 @@
-From d51b10a2750d99543a0c92ca44802aa7a4d70e54 Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Thu, 10 Dec 2009 00:51:50 +0200
-Subject: [PATCH 5/5] ARM: added NEON optimizations for fetch/store r5g6b5 scanline
-
----
- pixman/pixman-access.c | 23 ++++++++++++++++++++++-
- pixman/pixman-arm-neon-asm.S | 20 ++++++++++++++++++++
- pixman/pixman-arm-neon.c | 41 +++++++++++++++++++++++++++++++++++++++++
- pixman/pixman-private.h | 5 +++++
- 4 files changed, 88 insertions(+), 1 deletions(-)
-
-diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
-index fa0a267..5bb3e09 100644
---- a/pixman/pixman-access.c
-+++ b/pixman/pixman-access.c
-@@ -2748,7 +2748,7 @@ typedef struct
- store_scanline_ ## format, store_scanline_generic_64 \
- }
-
--static const format_info_t accessors[] =
-+static format_info_t accessors[] =
- {
- /* 32 bpp formats */
- FORMAT_INFO (a8r8g8b8),
-@@ -2891,6 +2891,27 @@ _pixman_bits_image_setup_raw_accessors (bits_image_t *image)
- setup_accessors (image);
- }
-
-+void
-+_pixman_bits_override_accessors (pixman_format_code_t format,
-+ fetch_scanline_t fetch_func,
-+ store_scanline_t store_func)
-+{
-+ format_info_t *info = accessors;
-+
-+ while (info->format != PIXMAN_null)
-+ {
-+ if (info->format == format)
-+ {
-+ if (fetch_func)
-+ info->fetch_scanline_raw_32 = fetch_func;
-+ if (store_func)
-+ info->store_scanline_raw_32 = store_func;
-+ return;
-+ }
-+ info++;
-+ }
-+}
-+
- #else
-
- void
-diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
-index eb8cc4c..6ab3301 100644
---- a/pixman/pixman-arm-neon-asm.S
-+++ b/pixman/pixman-arm-neon-asm.S
-@@ -454,6 +454,16 @@ generate_composite_function \
- pixman_composite_src_8888_0565_process_pixblock_tail, \
- pixman_composite_src_8888_0565_process_pixblock_tail_head
-
-+generate_composite_function_single_scanline \
-+ pixman_store_scanline_r5g6b5_asm_neon, 32, 0, 16, \
-+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-+ 8, /* number of pixels, processed in a single block */ \
-+ default_init, \
-+ default_cleanup, \
-+ pixman_composite_src_8888_0565_process_pixblock_head, \
-+ pixman_composite_src_8888_0565_process_pixblock_tail, \
-+ pixman_composite_src_8888_0565_process_pixblock_tail_head
-+
- /******************************************************************************/
-
- .macro pixman_composite_src_0565_8888_process_pixblock_head
-@@ -489,6 +499,16 @@ generate_composite_function \
- pixman_composite_src_0565_8888_process_pixblock_tail, \
- pixman_composite_src_0565_8888_process_pixblock_tail_head
-
-+generate_composite_function_single_scanline \
-+ pixman_fetch_scanline_r5g6b5_asm_neon, 16, 0, 32, \
-+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-+ 8, /* number of pixels, processed in a single block */ \
-+ default_init, \
-+ default_cleanup, \
-+ pixman_composite_src_0565_8888_process_pixblock_head, \
-+ pixman_composite_src_0565_8888_process_pixblock_tail, \
-+ pixman_composite_src_0565_8888_process_pixblock_tail_head
-+
- /******************************************************************************/
-
- .macro pixman_composite_add_8000_8000_process_pixblock_head
-diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index 134493d..2245b52 100644
---- a/pixman/pixman-arm-neon.c
-+++ b/pixman/pixman-arm-neon.c
-@@ -567,6 +567,43 @@ neon_combine_##name##_u (pixman_implementation_t *imp, \
- BIND_COMBINE_U (over)
- BIND_COMBINE_U (add)
-
-+void
-+pixman_fetch_scanline_r5g6b5_asm_neon (int width,
-+ uint32_t *buffer,
-+ const uint16_t *pixel);
-+void
-+pixman_store_scanline_r5g6b5_asm_neon (int width,
-+ uint16_t *pixel,
-+ const uint32_t *values);
-+
-+static void
-+neon_fetch_scanline_r5g6b5 (pixman_image_t *image,
-+ int x,
-+ int y,
-+ int width,
-+ uint32_t * buffer,
-+ const uint32_t *mask,
-+ uint32_t mask_bits)
-+{
-+ const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
-+ const uint16_t *pixel = (const uint16_t *)bits + x;
-+
-+ pixman_fetch_scanline_r5g6b5_asm_neon (width, buffer, pixel);
-+}
-+
-+static void
-+neon_store_scanline_r5g6b5 (bits_image_t * image,
-+ int x,
-+ int y,
-+ int width,
-+ const uint32_t *values)
-+{
-+ uint32_t *bits = image->bits + image->rowstride * y;
-+ uint16_t *pixel = ((uint16_t *) bits) + x;
-+
-+ pixman_store_scanline_r5g6b5_asm_neon (width, pixel, values);
-+}
-+
- pixman_implementation_t *
- _pixman_implementation_create_arm_neon (void)
- {
-@@ -577,6 +614,10 @@ _pixman_implementation_create_arm_neon (void)
- imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
- imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
-
-+ _pixman_bits_override_accessors (PIXMAN_r5g6b5,
-+ neon_fetch_scanline_r5g6b5,
-+ neon_store_scanline_r5g6b5);
-+
- imp->blt = arm_neon_blt;
- imp->fill = arm_neon_fill;
-
-diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
-index eeb677d..ba2d401 100644
---- a/pixman/pixman-private.h
-+++ b/pixman/pixman-private.h
-@@ -220,6 +220,11 @@ void
- _pixman_bits_image_setup_raw_accessors (bits_image_t *image);
-
- void
-+_pixman_bits_override_accessors (pixman_format_code_t format,
-+ fetch_scanline_t fetch_func,
-+ store_scanline_t store_func);
-+
-+void
- _pixman_image_get_scanline_generic_64 (pixman_image_t *image,
- int x,
- int y,
---
-1.6.6.1
-
diff --git a/recipes/xorg-lib/pixman/0006-Revert-ARM-SIMD-Try-without-any-CFLAGS-before-forcin.patch b/recipes/xorg-lib/pixman/0006-Revert-ARM-SIMD-Try-without-any-CFLAGS-before-forcin.patch
deleted file mode 100644
index 3d8d4e8292..0000000000
--- a/recipes/xorg-lib/pixman/0006-Revert-ARM-SIMD-Try-without-any-CFLAGS-before-forcin.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From 7f0adaef68c5b0bb1c5eb9f5db5792b71b8b8beb Mon Sep 17 00:00:00 2001
-From: Koen Kooi <koen@dominion.thruhere.net>
-Date: Fri, 19 Mar 2010 10:44:09 +0100
-Subject: [PATCH 6/6] Revert "ARM: SIMD: Try without any CFLAGS before forcing -mcpu="
-
-This forces -marm that results in runtime SIGILL on thumb userspace
-
-This reverts commit 18f0de452dc7e12e4cb544d761a626d5c6031663.
----
- configure.ac | 20 +++++---------------
- 1 files changed, 5 insertions(+), 15 deletions(-)
-
-diff --git a/configure.ac b/configure.ac
-index fc3ee24..f84a4dc 100644
---- a/configure.ac
-+++ b/configure.ac
-@@ -363,28 +363,18 @@ AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
-
- dnl ===========================================================================
- dnl Check for ARM SIMD instructions
--ARM_SIMD_CFLAGS=""
-+ARM_SIMD_CFLAGS="-mcpu=arm1136j-s"
-
- have_arm_simd=no
- AC_MSG_CHECKING(whether to use ARM SIMD assembler)
--# check with default CFLAGS in case the toolchain turns on a sufficiently recent -mcpu=
-+xserver_save_CFLAGS=$CFLAGS
-+CFLAGS="$ARM_SIMD_CFLAGS $CFLAGS"
- AC_COMPILE_IFELSE([
- int main () {
- asm("uqadd8 r1, r1, r2");
- return 0;
--}], have_arm_simd=yes,
-- # check again with an explicit -mcpu= in case the toolchain defaults to an
-- # older one; note that uqadd8 isn't available in Thumb mode on arm1136j-s
-- # so we force ARM mode
-- ARM_SIMD_CFLAGS="-mcpu=arm1136j-s -marm"
-- xserver_save_CFLAGS=$CFLAGS
-- CFLAGS="$ARM_SIMD_CFLAGS $CFLAGS"
-- AC_COMPILE_IFELSE([
-- int main () {
-- asm("uqadd8 r1, r1, r2");
-- return 0;
-- }], have_arm_simd=yes)
-- CFLAGS=$xserver_save_CFLAGS)
-+}], have_arm_simd=yes)
-+CFLAGS=$xserver_save_CFLAGS
-
- AC_ARG_ENABLE(arm-simd,
- [AC_HELP_STRING([--disable-arm-simd],
---
-1.6.6.1
-
diff --git a/recipes/xorg-lib/pixman/calloc.patch b/recipes/xorg-lib/pixman/calloc.patch
deleted file mode 100644
index 4a60d7ef9a..0000000000
--- a/recipes/xorg-lib/pixman/calloc.patch
+++ /dev/null
@@ -1,23 +0,0 @@
-From 634ba33b5b1fcfd5a0e7910f9991b4ed4f674549 Mon Sep 17 00:00:00 2001
-From: Søren Sandmann Pedersen <ssp@redhat.com>
-Date: Wed, 07 Apr 2010 05:39:14 +0000
-Subject: Fix uninitialized cache when pthreads are used
-
-The thread local cache is allocated with malloc(), but we rely on it
-being initialized to zero, so allocate it with calloc() instead.
----
-diff --git a/pixman/pixman-compiler.h b/pixman/pixman-compiler.h
-index a4e3f88..cdac0d8 100644
---- a/pixman/pixman-compiler.h
-+++ b/pixman/pixman-compiler.h
-@@ -101,7 +101,7 @@
- static type * \
- tls_ ## name ## _alloc (key) \
- { \
-- type *value = malloc (sizeof (type)); \
-+ type *value = calloc (1, sizeof (type)); \
- if (value) \
- pthread_setspecific (key, value); \
- return value; \
---
-cgit v0.8.3-6-g21f6
diff --git a/recipes/xorg-lib/pixman/nearest-neighbour.patch b/recipes/xorg-lib/pixman/nearest-neighbour.patch
deleted file mode 100644
index 29b140faf9..0000000000
--- a/recipes/xorg-lib/pixman/nearest-neighbour.patch
+++ /dev/null
@@ -1,1040 +0,0 @@
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Fri, 17 Jul 2009 10:22:23 +0000 (+0300)
-Subject: Fastpath for nearest neighbour scaled compositing operations.
-X-Git-Url: http://siarhei.siamashka.name/gitweb/?p=pixman.git;a=commitdiff_plain;h=247531c6978725a88fd3706129b9d3e339026f54
-
-Fastpath for nearest neighbour scaled compositing operations.
-
-OVER 8888x8888, OVER 8888x0565, SRC 8888x8888, SRC 8888x0565
-and SRC 0565x0565 cases are supported.
----
-
-diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
-index 7f80578..7f3a6ad 100644
---- a/pixman/pixman-fast-path.c
-+++ b/pixman/pixman-fast-path.c
-@@ -1261,6 +1261,993 @@ fast_composite_src_scale_nearest (pixman_implementation_t *imp,
- }
- }
-
-+/*
-+ * Functions, which implement the core inner loops for the nearest neighbour
-+ * scaled fastpath compositing operations. The do not need to do clipping
-+ * checks, also the loops are unrolled to process two pixels per iteration
-+ * for better performance on most CPU architectures (superscalar processors
-+ * can issue several operations simultaneously, other processors can hide
-+ * instructions latencies by pipelining operations). Unrolling more
-+ * does not make much sense because the compiler will start running out
-+ * of spare registers soon.
-+ */
-+
-+#undef READ
-+#undef WRITE
-+#define READ(img,x) (*(x))
-+#define WRITE(img,ptr,v) ((*(ptr)) = (v))
-+
-+#define UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(x, a, y) do { \
-+ UN8x4_MUL_UN8_ADD_UN8x4(x, a, y); \
-+ x = CONVERT_8888_TO_0565(x); \
-+ } while (0)
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x0565 (
-+ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+ uint16_t *dstLine;
-+ uint32_t *srcFirstLine;
-+ uint32_t d;
-+ uint32_t s1, s2;
-+ uint8_t a1, a2;
-+ int w;
-+ int x1, x2, y;
-+ int32_t orig_vx = vx;
-+
-+ uint32_t *src;
-+ uint16_t *dst;
-+ int srcStride, dstStride;
-+ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+ * transformed from destination space to source space */
-+ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+ while (--height >= 0)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+
-+ y = vy >> 16;
-+ vy += unit_y;
-+
-+ if ((y < 0) || (y >= pSrc->bits.height)) {
-+ continue;
-+ }
-+
-+ src = srcFirstLine + srcStride * y;
-+
-+ w = width;
-+ vx = orig_vx;
-+ while ((w -= 2) >= 0)
-+ {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ s1 = READ(pSrc, src + x1);
-+
-+ x2 = vx >> 16;
-+ vx += unit_x;
-+ s2 = READ(pSrc, src + x2);
-+
-+ a1 = s1 >> 24;
-+ a2 = s2 >> 24;
-+
-+ if (a1 == 0xff)
-+ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+ else if (s1) {
-+ d = CONVERT_0565_TO_0888(READ(pDst, dst));
-+ a1 ^= 0xff;
-+ UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1);
-+ WRITE(pDst, dst, d);
-+ }
-+ dst++;
-+
-+ if (a2 == 0xff)
-+ WRITE(pDst, dst, CONVERT_8888_TO_0565(s2));
-+ else if (s2) {
-+ d = CONVERT_0565_TO_0888(READ(pDst, dst));
-+ a2 ^= 0xff;
-+ UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a2, s2);
-+ WRITE(pDst, dst, d);
-+ }
-+ dst++;
-+ }
-+ if (w & 1) {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ s1 = READ(pSrc, src + x1);
-+
-+ a1 = s1 >> 24;
-+ if (a1 == 0xff)
-+ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+ else if (s1) {
-+ d = CONVERT_0565_TO_0888(READ(pDst, dst));
-+ a1 ^= 0xff;
-+ UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1);
-+ WRITE(pDst, dst, d);
-+ }
-+ dst++;
-+ }
-+ }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x0565 (
-+ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+ uint16_t *dstLine;
-+ uint32_t *srcFirstLine;
-+ uint32_t d;
-+ uint32_t s1, s2;
-+ uint8_t a1, a2;
-+ int w;
-+ int x1, x2, y;
-+ int32_t orig_vx = vx;
-+ int32_t max_vx, max_vy;
-+
-+ uint32_t *src;
-+ uint16_t *dst;
-+ int srcStride, dstStride;
-+ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+ * transformed from destination space to source space */
-+ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+ max_vx = pSrc->bits.width << 16;
-+ max_vy = pSrc->bits.height << 16;
-+
-+ while (orig_vx < 0) orig_vx += max_vx;
-+ while (vy < 0) vy += max_vy;
-+ while (orig_vx >= max_vx) orig_vx -= max_vx;
-+ while (vy >= max_vy) vy -= max_vy;
-+
-+ while (--height >= 0)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+
-+ y = vy >> 16;
-+ vy += unit_y;
-+ while (vy >= max_vy) vy -= max_vy;
-+
-+ src = srcFirstLine + srcStride * y;
-+
-+ w = width;
-+ vx = orig_vx;
-+ while ((w -= 2) >= 0)
-+ {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ while (vx >= max_vx) vx -= max_vx;
-+ s1 = READ(pSrc, src + x1);
-+
-+ x2 = vx >> 16;
-+ vx += unit_x;
-+ while (vx >= max_vx) vx -= max_vx;
-+ s2 = READ(pSrc, src + x2);
-+
-+ a1 = s1 >> 24;
-+ a2 = s2 >> 24;
-+
-+ if (a1 == 0xff)
-+ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+ else if (s1) {
-+ d = CONVERT_0565_TO_0888(READ(pDst, dst));
-+ a1 ^= 0xff;
-+ UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1);
-+ WRITE(pDst, dst, d);
-+ }
-+ dst++;
-+
-+ if (a2 == 0xff)
-+ WRITE(pDst, dst, CONVERT_8888_TO_0565(s2));
-+ else if (s2) {
-+ d = CONVERT_0565_TO_0888(READ(pDst, dst));
-+ a2 ^= 0xff;
-+ UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a2, s2);
-+ WRITE(pDst, dst, d);
-+ }
-+ dst++;
-+ }
-+ if (w & 1) {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ while (vx >= max_vx) vx -= max_vx;
-+ s1 = READ(pSrc, src + x1);
-+
-+ a1 = s1 >> 24;
-+ if (a1 == 0xff)
-+ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+ else if (s1) {
-+ d = CONVERT_0565_TO_0888(READ(pDst, dst));
-+ a1 ^= 0xff;
-+ UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1);
-+ WRITE(pDst, dst, d);
-+ }
-+ dst++;
-+ }
-+ }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x8888 (
-+ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+ uint32_t *dstLine;
-+ uint32_t *srcFirstLine;
-+ uint32_t d;
-+ uint32_t s1, s2;
-+ uint8_t a1, a2;
-+ int w;
-+ int x1, x2, y;
-+ int32_t orig_vx = vx;
-+
-+ uint32_t *src, *dst;
-+ int srcStride, dstStride;
-+ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+ * transformed from destination space to source space */
-+ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+ while (--height >= 0)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+
-+ y = vy >> 16;
-+ vy += unit_y;
-+
-+ if ((y < 0) || (y >= pSrc->bits.height)) {
-+ continue;
-+ }
-+
-+ src = srcFirstLine + srcStride * y;
-+
-+ w = width;
-+ vx = orig_vx;
-+ while ((w -= 2) >= 0)
-+ {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ s1 = READ(pSrc, src + x1);
-+
-+ x2 = vx >> 16;
-+ vx += unit_x;
-+ s2 = READ(pSrc, src + x2);
-+
-+ a1 = s1 >> 24;
-+ a2 = s2 >> 24;
-+
-+ if (a1 == 0xff)
-+ WRITE(pDst, dst, s1);
-+ else if (s1) {
-+ d = READ(pDst, dst);
-+ a1 ^= 0xff;
-+ UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1);
-+ WRITE(pDst, dst, d);
-+ }
-+ dst++;
-+
-+ if (a2 == 0xff)
-+ WRITE(pDst, dst, s2);
-+ else if (s2) {
-+ d = READ(pDst, dst);
-+ a2 ^= 0xff;
-+ UN8x4_MUL_UN8_ADD_UN8x4(d, a2, s2);
-+ WRITE(pDst, dst, d);
-+ }
-+ dst++;
-+ }
-+ if (w & 1) {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ s1 = READ(pSrc, src + x1);
-+
-+ a1 = s1 >> 24;
-+ if (a1 == 0xff)
-+ WRITE(pDst, dst, s1);
-+ else if (s1) {
-+ d = READ(pDst, dst);
-+ a1 ^= 0xff;
-+ UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1);
-+ WRITE(pDst, dst, d);
-+ }
-+ dst++;
-+ }
-+ }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x8888 (
-+ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+ uint32_t *dstLine;
-+ uint32_t *srcFirstLine;
-+ uint32_t d;
-+ uint32_t s1, s2;
-+ uint8_t a1, a2;
-+ int w;
-+ int x1, x2, y;
-+ int32_t orig_vx = vx;
-+ int32_t max_vx, max_vy;
-+
-+ uint32_t *src, *dst;
-+ int srcStride, dstStride;
-+ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+ * transformed from destination space to source space */
-+ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+ max_vx = pSrc->bits.width << 16;
-+ max_vy = pSrc->bits.height << 16;
-+
-+ while (orig_vx < 0) orig_vx += max_vx;
-+ while (vy < 0) vy += max_vy;
-+ while (orig_vx >= max_vx) orig_vx -= max_vx;
-+ while (vy >= max_vy) vy -= max_vy;
-+
-+ while (--height >= 0)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+
-+ y = vy >> 16;
-+ vy += unit_y;
-+ while (vy >= max_vy) vy -= max_vy;
-+
-+ src = srcFirstLine + srcStride * y;
-+
-+ w = width;
-+ vx = orig_vx;
-+ while ((w -= 2) >= 0)
-+ {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ while (vx >= max_vx) vx -= max_vx;
-+ s1 = READ(pSrc, src + x1);
-+
-+ x2 = vx >> 16;
-+ vx += unit_x;
-+ while (vx >= max_vx) vx -= max_vx;
-+ s2 = READ(pSrc, src + x2);
-+
-+ a1 = s1 >> 24;
-+ a2 = s2 >> 24;
-+
-+ if (a1 == 0xff)
-+ WRITE(pDst, dst, s1);
-+ else if (s1) {
-+ d = READ(pDst, dst);
-+ a1 ^= 0xff;
-+ UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1);
-+ WRITE(pDst, dst, d);
-+ }
-+ dst++;
-+
-+ if (a2 == 0xff)
-+ WRITE(pDst, dst, s2);
-+ else if (s2) {
-+ d = READ(pDst, dst);
-+ a2 ^= 0xff;
-+ UN8x4_MUL_UN8_ADD_UN8x4(d, a2, s2);
-+ WRITE(pDst, dst, d);
-+ }
-+ dst++;
-+ }
-+ if (w & 1) {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ while (vx >= max_vx) vx -= max_vx;
-+ s1 = READ(pSrc, src + x1);
-+
-+ a1 = s1 >> 24;
-+ if (a1 == 0xff)
-+ WRITE(pDst, dst, s1);
-+ else if (s1) {
-+ d = READ(pDst, dst);
-+ a1 ^= 0xff;
-+ UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1);
-+ WRITE(pDst, dst, d);
-+ }
-+ dst++;
-+ }
-+ }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x8888 (
-+ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+ uint32_t *dstLine;
-+ uint32_t *srcFirstLine;
-+ uint32_t s1, s2;
-+ int w;
-+ int x1, x2, y;
-+ int32_t orig_vx = vx;
-+
-+ uint32_t *src, *dst;
-+ int srcStride, dstStride;
-+ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+ * transformed from destination space to source space */
-+ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+ while (--height >= 0)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+
-+ y = vy >> 16;
-+ vy += unit_y;
-+
-+ if ((y < 0) || (y >= pSrc->bits.height)) {
-+ memset(dst, 0, width * sizeof(*dst));
-+ continue;
-+ }
-+
-+ src = srcFirstLine + srcStride * y;
-+
-+ w = width;
-+ vx = orig_vx;
-+ while ((w -= 2) >= 0)
-+ {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ s1 = READ(pSrc, src + x1);
-+
-+ x2 = vx >> 16;
-+ vx += unit_x;
-+ s2 = READ(pSrc, src + x2);
-+
-+ WRITE(pDst, dst, s1);
-+ dst++;
-+ WRITE(pDst, dst, s2);
-+ dst++;
-+ }
-+ if (w & 1) {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ s1 = READ(pSrc, src + x1);
-+ WRITE(pDst, dst, s1);
-+ dst++;
-+ }
-+ }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x8888 (
-+ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+ uint32_t *dstLine;
-+ uint32_t *srcFirstLine;
-+ uint32_t s1, s2;
-+ int w;
-+ int x1, x2, y;
-+ int32_t orig_vx = vx;
-+ int32_t max_vx, max_vy;
-+
-+ uint32_t *src, *dst;
-+ int srcStride, dstStride;
-+ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+ * transformed from destination space to source space */
-+ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+ max_vx = pSrc->bits.width << 16;
-+ max_vy = pSrc->bits.height << 16;
-+
-+ while (orig_vx < 0) orig_vx += max_vx;
-+ while (vy < 0) vy += max_vy;
-+ while (orig_vx >= max_vx) orig_vx -= max_vx;
-+ while (vy >= max_vy) vy -= max_vy;
-+
-+ while (--height >= 0)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+
-+ y = vy >> 16;
-+ vy += unit_y;
-+ while (vy >= max_vy) vy -= max_vy;
-+
-+ src = srcFirstLine + srcStride * y;
-+
-+ w = width;
-+ vx = orig_vx;
-+ while ((w -= 2) >= 0)
-+ {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ while (vx >= max_vx) vx -= max_vx;
-+ s1 = READ(pSrc, src + x1);
-+
-+ x2 = vx >> 16;
-+ vx += unit_x;
-+ while (vx >= max_vx) vx -= max_vx;
-+ s2 = READ(pSrc, src + x2);
-+
-+ WRITE(pDst, dst, s1);
-+ dst++;
-+ WRITE(pDst, dst, s2);
-+ dst++;
-+ }
-+ if (w & 1) {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ while (vx >= max_vx) vx -= max_vx;
-+ s1 = READ(pSrc, src + x1);
-+
-+ WRITE(pDst, dst, s1);
-+ dst++;
-+ }
-+ }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_0565x0565 (
-+ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+ uint16_t *dstLine;
-+ uint16_t *srcFirstLine;
-+ uint16_t s1, s2;
-+ int w;
-+ int x1, x2, y;
-+ int32_t orig_vx = vx;
-+
-+ uint16_t *src, *dst;
-+ int srcStride, dstStride;
-+ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+ * transformed from destination space to source space */
-+ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint16_t, srcStride, srcFirstLine, 1);
-+
-+ while (--height >= 0)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+
-+ y = vy >> 16;
-+ vy += unit_y;
-+
-+ if ((y < 0) || (y >= pSrc->bits.height)) {
-+ memset(dst, 0, width * sizeof(*dst));
-+ continue;
-+ }
-+
-+ src = srcFirstLine + srcStride * y;
-+
-+ w = width;
-+ vx = orig_vx;
-+ while ((w -= 2) >= 0)
-+ {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ s1 = READ(pSrc, src + x1);
-+
-+ x2 = vx >> 16;
-+ vx += unit_x;
-+ s2 = READ(pSrc, src + x2);
-+
-+ WRITE(pDst, dst, s1);
-+ dst++;
-+ WRITE(pDst, dst, s2);
-+ dst++;
-+ }
-+ if (w & 1) {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ s1 = READ(pSrc, src + x1);
-+ WRITE(pDst, dst, s1);
-+ dst++;
-+ }
-+ }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_0565x0565 (
-+ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+ uint16_t *dstLine;
-+ uint16_t *srcFirstLine;
-+ uint16_t s1, s2;
-+ int w;
-+ int x1, x2, y;
-+ int32_t orig_vx = vx;
-+ int32_t max_vx, max_vy;
-+
-+ uint16_t *src, *dst;
-+ int srcStride, dstStride;
-+ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+ * transformed from destination space to source space */
-+ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint16_t, srcStride, srcFirstLine, 1);
-+
-+ max_vx = pSrc->bits.width << 16;
-+ max_vy = pSrc->bits.height << 16;
-+
-+ while (orig_vx < 0) orig_vx += max_vx;
-+ while (vy < 0) vy += max_vy;
-+ while (orig_vx >= max_vx) orig_vx -= max_vx;
-+ while (vy >= max_vy) vy -= max_vy;
-+
-+ while (--height >= 0)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+
-+ y = vy >> 16;
-+ vy += unit_y;
-+ while (vy >= max_vy) vy -= max_vy;
-+
-+ src = srcFirstLine + srcStride * y;
-+
-+ w = width;
-+ vx = orig_vx;
-+ while ((w -= 2) >= 0)
-+ {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ while (vx >= max_vx) vx -= max_vx;
-+ s1 = READ(pSrc, src + x1);
-+
-+ x2 = vx >> 16;
-+ vx += unit_x;
-+ while (vx >= max_vx) vx -= max_vx;
-+ s2 = READ(pSrc, src + x2);
-+
-+ WRITE(pDst, dst, s1);
-+ dst++;
-+ WRITE(pDst, dst, s2);
-+ dst++;
-+ }
-+ if (w & 1) {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ while (vx >= max_vx) vx -= max_vx;
-+ s1 = READ(pSrc, src + x1);
-+
-+ WRITE(pDst, dst, s1);
-+ dst++;
-+ }
-+ }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x0565 (
-+ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+ uint16_t *dstLine;
-+ uint32_t *srcFirstLine;
-+ uint32_t s1, s2;
-+ int w;
-+ int x1, x2, y;
-+ int32_t orig_vx = vx;
-+
-+ uint32_t *src;
-+ uint16_t *dst;
-+ int srcStride, dstStride;
-+ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+ * transformed from destination space to source space */
-+ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+ while (--height >= 0)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+
-+ y = vy >> 16;
-+ vy += unit_y;
-+
-+ if ((y < 0) || (y >= pSrc->bits.height)) {
-+ memset(dst, 0, width * sizeof(*dst));
-+ continue;
-+ }
-+
-+ src = srcFirstLine + srcStride * y;
-+
-+ w = width;
-+ vx = orig_vx;
-+ while ((w -= 2) >= 0)
-+ {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ s1 = READ(pSrc, src + x1);
-+
-+ x2 = vx >> 16;
-+ vx += unit_x;
-+ s2 = READ(pSrc, src + x2);
-+
-+ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+ dst++;
-+ WRITE(pDst, dst, CONVERT_8888_TO_0565(s2));
-+ dst++;
-+ }
-+ if (w & 1) {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ s1 = READ(pSrc, src + x1);
-+ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+ dst++;
-+ }
-+ }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x0565 (
-+ pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+ int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+ uint16_t *dstLine;
-+ uint32_t *srcFirstLine;
-+ uint32_t s1, s2;
-+ int w;
-+ int x1, x2, y;
-+ int32_t orig_vx = vx;
-+ int32_t max_vx, max_vy;
-+
-+ uint32_t *src;
-+ uint16_t *dst;
-+ int srcStride, dstStride;
-+ PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+ /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+ * transformed from destination space to source space */
-+ PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+ max_vx = pSrc->bits.width << 16;
-+ max_vy = pSrc->bits.height << 16;
-+
-+ while (orig_vx < 0) orig_vx += max_vx;
-+ while (vy < 0) vy += max_vy;
-+ while (orig_vx >= max_vx) orig_vx -= max_vx;
-+ while (vy >= max_vy) vy -= max_vy;
-+
-+ while (--height >= 0)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+
-+ y = vy >> 16;
-+ vy += unit_y;
-+ while (vy >= max_vy) vy -= max_vy;
-+
-+ src = srcFirstLine + srcStride * y;
-+
-+ w = width;
-+ vx = orig_vx;
-+ while ((w -= 2) >= 0)
-+ {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ while (vx >= max_vx) vx -= max_vx;
-+ s1 = READ(pSrc, src + x1);
-+
-+ x2 = vx >> 16;
-+ vx += unit_x;
-+ while (vx >= max_vx) vx -= max_vx;
-+ s2 = READ(pSrc, src + x2);
-+
-+ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+ dst++;
-+ WRITE(pDst, dst, CONVERT_8888_TO_0565(s2));
-+ dst++;
-+ }
-+ if (w & 1) {
-+ x1 = vx >> 16;
-+ vx += unit_x;
-+ while (vx >= max_vx) vx -= max_vx;
-+ s1 = READ(pSrc, src + x1);
-+
-+ WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+ dst++;
-+ }
-+ }
-+}
-+
-+/*
-+ * Check if the clipping boundary is crossed on horizontal scaling
-+ */
-+static inline pixman_bool_t
-+fbTransformVerifyHorizontalClipping(pixman_image_t *pict, int width, int32_t vx, int32_t unit_x)
-+{
-+ while (--width >= 0) {
-+ int x = vx >> 16;
-+ if ((x < 0) || (x >= pict->bits.width)) return 1;
-+ vx += unit_x;
-+ }
-+ return 0;
-+}
-+
-+/*
-+ * Check if the clipping boundary is crossed on vertical scaling
-+ */
-+static inline pixman_bool_t
-+fbTransformVerifyVerticalClipping(pixman_image_t *pict, int height, int32_t vy, int32_t unit_y)
-+{
-+ while (--height >= 0) {
-+ int y = vy >> 16;
-+ if ((y < 0) || (y >= pict->bits.height)) return 1;
-+ vy += unit_y;
-+ }
-+ return 0;
-+}
-+
-+/*
-+ * Easy case of transform without rotation or complex clipping
-+ * Returns 1 in the case if it was able to handle this operation and 0 otherwise
-+ */
-+static pixman_bool_t
-+fbCompositeTransformNonrotatedAffineTrivialclip (
-+ pixman_op_t op,
-+ pixman_image_t *pSrc,
-+ pixman_image_t *pMask,
-+ pixman_image_t *pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height)
-+{
-+ pixman_vector_t v, unit;
-+ int skipdst_x = 0, skipdst_y = 0;
-+
-+ /* Handle destination clipping */
-+ if (xDst < pDst->common.clip_region.extents.x1) {
-+ skipdst_x = pDst->common.clip_region.extents.x1 - xDst;
-+ if (skipdst_x >= (int)width)
-+ return 1;
-+ xDst = pDst->common.clip_region.extents.x1;
-+ width -= skipdst_x;
-+ }
-+
-+ if (yDst < pDst->common.clip_region.extents.y1) {
-+ skipdst_y = pDst->common.clip_region.extents.y1 - yDst;
-+ if (skipdst_y >= (int)height)
-+ return 1;
-+ yDst = pDst->common.clip_region.extents.y1;
-+ height -= skipdst_y;
-+ }
-+
-+ if (xDst >= pDst->common.clip_region.extents.x2 ||
-+ yDst >= pDst->common.clip_region.extents.y2)
-+ {
-+ return 1;
-+ }
-+
-+ if (xDst + width > pDst->common.clip_region.extents.x2)
-+ width = pDst->common.clip_region.extents.x2 - xDst;
-+ if (yDst + height > pDst->common.clip_region.extents.y2)
-+ height = pDst->common.clip_region.extents.y2 - yDst;
-+
-+ /* reference point is the center of the pixel */
-+ v.vector[0] = pixman_int_to_fixed(xSrc) + pixman_fixed_1 / 2;
-+ v.vector[1] = pixman_int_to_fixed(ySrc) + pixman_fixed_1 / 2;
-+ v.vector[2] = pixman_fixed_1;
-+
-+ if (!pixman_transform_point_3d (pSrc->common.transform, &v))
-+ return 0;
-+
-+ /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
-+ v.vector[0] -= pixman_fixed_e;
-+ v.vector[1] -= pixman_fixed_e;
-+
-+ unit.vector[0] = pSrc->common.transform->matrix[0][0];
-+ unit.vector[1] = pSrc->common.transform->matrix[1][1];
-+
-+ v.vector[0] += unit.vector[0] * skipdst_x;
-+ v.vector[1] += unit.vector[1] * skipdst_y;
-+
-+ /* Check for possible fixed point arithmetics problems/overflows */
-+ if (unit.vector[0] <= 0 || unit.vector[1] <= 0)
-+ return 0;
-+ if (width == 0 || height == 0)
-+ return 0;
-+ if ((uint32_t)width + (unit.vector[0] >> 16) >= 0x7FFF)
-+ return 0;
-+ if ((uint32_t)height + (unit.vector[1] >> 16) >= 0x7FFF)
-+ return 0;
-+
-+ /* Horizontal source clipping is only supported for NORMAL repeat */
-+ if (pSrc->common.repeat != PIXMAN_REPEAT_NORMAL
-+ && fbTransformVerifyHorizontalClipping(pSrc, width, v.vector[0], unit.vector[0])) {
-+ return 0;
-+ }
-+
-+ /* Vertical source clipping is only supported for NONE and NORMAL repeat */
-+ if (pSrc->common.repeat != PIXMAN_REPEAT_NONE && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL
-+ && fbTransformVerifyVerticalClipping(pSrc, height, v.vector[1], unit.vector[1])) {
-+ return 0;
-+ }
-+
-+ if (op == PIXMAN_OP_OVER && pSrc->bits.format == PIXMAN_a8r8g8b8
-+ && (pDst->bits.format == PIXMAN_x8r8g8b8 || pDst->bits.format == PIXMAN_a8r8g8b8))
-+ {
-+ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) {
-+ fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x8888(
-+ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+ return 1;
-+ }
-+ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) {
-+ fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x8888(
-+ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+ return 1;
-+ }
-+ }
-+
-+ if (op == PIXMAN_OP_SRC && (pSrc->bits.format == PIXMAN_x8r8g8b8 || pSrc->bits.format == PIXMAN_a8r8g8b8)
-+ && (pDst->bits.format == PIXMAN_x8r8g8b8 || pDst->bits.format == pSrc->bits.format))
-+ {
-+ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) {
-+ fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x8888(
-+ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+ return 1;
-+ }
-+ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) {
-+ fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x8888(
-+ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+ return 1;
-+ }
-+ }
-+
-+ if (op == PIXMAN_OP_OVER && pSrc->bits.format == PIXMAN_a8r8g8b8 && pDst->bits.format == PIXMAN_r5g6b5)
-+ {
-+ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) {
-+ fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x0565(
-+ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+ return 1;
-+ }
-+ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) {
-+ fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x0565(
-+ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+ return 1;
-+ }
-+ }
-+
-+ if (op == PIXMAN_OP_SRC && pSrc->bits.format == PIXMAN_r5g6b5 && pDst->bits.format == PIXMAN_r5g6b5)
-+ {
-+ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) {
-+ fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_0565x0565(
-+ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+ return 1;
-+ }
-+ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) {
-+ fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_0565x0565(
-+ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+ return 1;
-+ }
-+ }
-+
-+ if (op == PIXMAN_OP_SRC && (pSrc->bits.format == PIXMAN_x8r8g8b8 || pSrc->bits.format == PIXMAN_a8r8g8b8)
-+ && pDst->bits.format == PIXMAN_r5g6b5)
-+ {
-+ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) {
-+ fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x0565(
-+ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+ return 1;
-+ }
-+ if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) {
-+ fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x0565(
-+ pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+ v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+ return 1;
-+ }
-+ }
-+
-+ /* No fastpath scaling implemented for this case */
-+ return 0;
-+}
-+
- static void
- fast_path_composite (pixman_implementation_t *imp,
- pixman_op_t op,
-@@ -1279,6 +2266,30 @@ fast_path_composite (pixman_implementation_t *imp,
- if (src->type == BITS
- && src->common.transform
- && !mask
-+ && !src->common.alpha_map && !dest->common.alpha_map
-+ && (src->common.filter == PIXMAN_FILTER_NEAREST)
-+ && !src->bits.read_func && !src->bits.write_func
-+ && !dest->bits.read_func && !dest->bits.write_func)
-+ {
-+ /* ensure that the transform matrix only has a scale */
-+ if (src->common.transform->matrix[0][1] == 0 &&
-+ src->common.transform->matrix[1][0] == 0 &&
-+ src->common.transform->matrix[2][0] == 0 &&
-+ src->common.transform->matrix[2][1] == 0 &&
-+ src->common.transform->matrix[2][2] == pixman_fixed_1 &&
-+ dest->common.clip_region.data == NULL)
-+ {
-+ if (fbCompositeTransformNonrotatedAffineTrivialclip (op, src, mask, dest,
-+ src_x, src_y, mask_x, mask_y, dest_x, dest_y, width, height))
-+ {
-+ return;
-+ }
-+ }
-+ }
-+
-+ if (src->type == BITS
-+ && src->common.transform
-+ && !mask
- && op == PIXMAN_OP_SRC
- && !src->common.alpha_map && !dest->common.alpha_map
- && (src->common.filter == PIXMAN_FILTER_NEAREST)
diff --git a/recipes/xorg-lib/pixman/over-n-8-0565.patch b/recipes/xorg-lib/pixman/over-n-8-0565.patch
deleted file mode 100644
index 3911068d94..0000000000
--- a/recipes/xorg-lib/pixman/over-n-8-0565.patch
+++ /dev/null
@@ -1,231 +0,0 @@
-From de2221a32d0b6628116565563f7b4ccd0a44e8b6 Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Thu, 04 Mar 2010 23:20:25 +0000
-Subject: ARM: added 'armv6_composite_over_n_8_0565' fast path
-
-Provides ~3x performance improvement when working with
-data in L1 cache and memory. This fast path is important
-for fonts rendering when using 16bpp desktop.
-
-Microbenchmark from N800 (ARM11 @ 400MHz), measured in MPix/s:
-
-before:
-
- over_n_8_0565 = L1: 2.99 M: 2.86
-
-after:
-
- over_n_8_0565 = L1: 9.07 M: 8.05
----
-diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
-index 09a2888..c375c01 100644
---- a/pixman/pixman-arm-simd.c
-+++ b/pixman/pixman-arm-simd.c
-@@ -419,6 +419,193 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
- }
- }
-
-+#if defined(__ARM_EABI__) && defined(__linux__)
-+/*
-+ * ARMv6 assembly optimized version of 'composite_over_n_8_0565'. It is
-+ * a bare metal 'naked' function which uses all the available CPU registers
-+ * and is compatible with ARM EABI. It might (or might not) break when used
-+ * with a different ABI, anyway it is better to be safe than sorry.
-+ */
-+static void __attribute__((naked)) armv6_composite_over_n_8_0565_asm (
-+ uint16_t *dst, uint8_t *mask, uint32_t src, int w,
-+ int dst_stride_delta, int mask_stride_delta, int h)
-+{
-+ asm volatile (
-+ ".macro composite_internal_armv6_asm opaque_flag\n"
-+ /* save all registers (8 words) to stack */
-+ "stmdb sp!, {r4-r11, ip, lr}\n"
-+ /* some register aliases for better readability */
-+ "DST .req r0\n"
-+ "MASK .req r1\n"
-+ "S .req r2\n"
-+ "W .req r3\n"
-+ "A .req r8\n"
-+ "D .req r10\n"
-+ "C0000FF .req r11\n"
-+ "C00001F .req r9\n"
-+ "C800080 .req ip\n"
-+ "CE000E0 .req lr\n"
-+ /* precalculate some stuff and put it on stack */
-+ "mov r6, #0xF8\n"
-+ "mov r7, #0xFC\n"
-+
-+ "str W, [sp, #-8]!\n"
-+
-+ ".if \\opaque_flag\n"
-+ /* precalculate and save it to stack for later use:
-+ * ((src >> 3) & 0x001F) |
-+ * ((src >> 5) & 0x07E0) |
-+ * ((src >> 8) & 0xF800)
-+ */
-+ "mov A, #0x1F\n"
-+ "and D, A, S, lsr #3\n"
-+ "and r4, S, #0xF80000\n"
-+ "and r5, S, #0xFC00\n"
-+ "orr D, r4, lsr #8\n"
-+ "orr D, r5, lsr #5\n"
-+ "str D, [sp, #4]\n"
-+ ".endif\n"
-+
-+ "ldr D, [sp, #(8 + 10*4 + 8)]\n" /* h */
-+ "ldr A, =0xFF00FF\n"
-+ "ldr C800080, =0x800080\n"
-+ "ldr CE000E0, =0xE000E0\n"
-+ "ldr C0000FF, =0xFF\n"
-+ "ldr C00001F, =0x1F\n"
-+ "and r4, A, S\n" /* r4 = src & 0x00FF00FF */
-+ "and r5, A, S, lsr #8\n" /* r5 = (src >> 8) & 0x00FF00FF */
-+ "stmdb sp!, {r4, r5, r6, r7}\n"
-+ "0:\n"
-+ "subs D, D, #1\n"
-+ "blt 6f\n"
-+ "1:\n"
-+ "subs W, W, #1\n"
-+ "blt 5f\n"
-+ "2:\n"
-+ "ldrb A, [MASK], #1\n"
-+ "ldmia sp, {r4, r5, r6, r7}\n" /* load constants from stack */
-+ "add DST, DST, #2\n"
-+ "cmp A, #0\n"
-+ "beq 1b\n"
-+
-+ ".if \\opaque_flag\n"
-+ "cmp A, #0xFF\n"
-+ "bne 3f\n"
-+ "ldr D, [sp, #(4*4 + 4)]\n" /* load precalculated value */
-+ "subs W, #1\n"
-+ "strh D, [DST, #-2]\n"
-+ "bge 2b\n"
-+ ".endif\n"
-+
-+ "3:\n"
-+ "ldrh D, [DST, #-2]\n"
-+ "mla r4, A, r4, C800080\n"
-+ "mla r5, A, r5, C800080\n"
-+ "and r6, r6, D, lsl #3\n" /* & 0xF8 */
-+ "and r7, r7, D, lsr #3\n" /* & 0xFC */
-+ "and D, D, #0xF800\n"
-+ "bic S, r4, #0xFF0000\n"
-+ "bic A, r5, #0xFF0000\n"
-+ "add r4, r4, S, lsr #8\n"
-+ "add r5, r5, A, lsr #8\n"
-+
-+ "and S, r7, #0xC0\n"
-+ "orr r6, r6, D, lsl #8\n"
-+ "and D, r6, CE000E0\n"
-+ "eor A, C0000FF, r5, lsr #24\n"
-+ "orr r6, D, lsr #5\n"
-+ "orr r7, S, lsr #6\n"
-+
-+ "mla r6, A, r6, C800080\n"
-+ "mla r7, A, r7, C800080\n"
-+ "subs W, #1\n"
-+ "bic D, r6, #0xFF0000\n"
-+ "bic A, r7, #0xFF0000\n"
-+ "add r6, r6, D, lsr #8\n"
-+ "uqadd8 r4, r4, r6\n"
-+ "add r7, r7, A, lsr #8\n"
-+ "uqadd8 r5, r5, r7\n"
-+ "and D, C00001F, r4, lsr #11\n"
-+ "and r4, r4, #0xF8000000\n"
-+ "and r5, r5, #0xFC00\n"
-+ "orr D, r4, lsr #16\n"
-+ "orr D, r5, lsr #5\n"
-+ "strh D, [DST, #-2]\n"
-+ "bge 2b\n"
-+ "5:\n"
-+ "ldr r6, [sp, #(4*4 + 8 + 10*4 + 8)]\n" /* h */
-+ "ldr r4, [sp, #(4*4 + 8 + 10*4 + 4)]\n" /* mask stride */
-+ "ldr r5, [sp, #(4*4 + 8 + 10*4 + 0)]\n" /* dst stride */
-+ "ldr W, [sp, #(4*4)]\n"
-+ "subs r6, r6, #1\n" /* h */
-+ "str r6, [sp, #(4*4 + 8 + 10*4 + 8)]\n" /* h */
-+ "add MASK, MASK, r4\n"
-+ "add DST, DST, r5, lsl #1\n"
-+ "bgt 1b\n"
-+ "6:\n"
-+ "add sp, sp, #(4*4 + 8)\n"
-+ /* restore all registers and return */
-+ "ldmia sp!, {r4-r11, ip, pc}\n"
-+ ".unreq DST\n"
-+ ".unreq MASK\n"
-+ ".unreq S\n"
-+ ".unreq W\n"
-+ ".unreq A\n"
-+ ".unreq D\n"
-+ ".unreq C0000FF\n"
-+ ".unreq C00001F\n"
-+ ".unreq C800080\n"
-+ ".unreq CE000E0\n"
-+ ".endm\n"
-+
-+ "mov ip, r2, lsr #24\n"
-+ "cmp ip, #0xFF\n"
-+ "beq 9f\n"
-+ "composite_internal_armv6_asm 0\n"
-+ "9:\n"
-+ "composite_internal_armv6_asm 1\n"
-+ ".ltorg\n"
-+ ".purgem composite_internal_armv6_asm\n"
-+ );
-+}
-+
-+static void
-+armv6_composite_over_n_8_0565 (pixman_implementation_t * impl,
-+ pixman_op_t op,
-+ pixman_image_t * src_image,
-+ pixman_image_t * mask_image,
-+ pixman_image_t * dst_image,
-+ int32_t src_x,
-+ int32_t src_y,
-+ int32_t mask_x,
-+ int32_t mask_y,
-+ int32_t dest_x,
-+ int32_t dest_y,
-+ int32_t width,
-+ int32_t height)
-+{
-+ uint32_t src;
-+ uint16_t *dst;
-+ uint8_t *mask;
-+ int dst_stride, mask_stride;
-+
-+ src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-+
-+ /* bail out if fully transparent */
-+ if (src == 0)
-+ return;
-+
-+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
-+ dst_stride, dst, 1);
-+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t,
-+ mask_stride, mask, 1);
-+
-+ armv6_composite_over_n_8_0565_asm (dst, mask, src, width,
-+ dst_stride - width, mask_stride - width, height);
-+}
-+
-+#endif
-+
- static const pixman_fast_path_t arm_simd_fast_paths[] =
- {
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, arm_composite_over_8888_8888),
-@@ -434,7 +621,10 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, arm_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, arm_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, arm_composite_over_n_8_8888),
--
-+#if defined(__ARM_EABI__) && defined(__linux__)
-+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, armv6_composite_over_n_8_0565),
-+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, armv6_composite_over_n_8_0565),
-+#endif
- { PIXMAN_OP_NONE },
- };
-
---
-cgit v0.8.3-6-g21f6
diff --git a/recipes/xorg-lib/pixman/pixman-0.13.2-neon1.patch b/recipes/xorg-lib/pixman/pixman-0.13.2-neon1.patch
deleted file mode 100644
index b3bb762415..0000000000
--- a/recipes/xorg-lib/pixman/pixman-0.13.2-neon1.patch
+++ /dev/null
@@ -1,1702 +0,0 @@
-diff --git a/configure.ac b/configure.ac
-index 063f6eb..bada55c 100644
---- a/configure.ac
-+++ b/configure.ac
-@@ -278,11 +278,12 @@ AC_SUBST(VMX_CFLAGS)
- AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
-
- dnl Check for ARM SIMD instructions
-+ARM_SIMD_CFLAGS=""
-
- have_arm_simd=no
- AC_MSG_CHECKING(whether to use ARM SIMD assembler)
- xserver_save_CFLAGS=$CFLAGS
--CFLAGS="$CFLAGS $ARM_CFLAGS"
-+CFLAGS="$CFLAGS $ARM_SIMD_CFLAGS"
- AC_COMPILE_IFELSE([
- int main () {
- asm("uqadd8 r1, r1, r2");
-@@ -302,7 +303,7 @@ fi
- if test $have_arm_simd = yes ; then
- AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD compiler intrinsics])
- else
-- ARM_CFLAGS=
-+ ARM_SIMD_CFLAGS=
- fi
-
- AC_MSG_RESULT($have_arm_simd)
-@@ -310,9 +311,48 @@ if test $enable_arm_simd = yes && test $have_arm_simd = no ; then
- AC_MSG_ERROR([ARM SIMD intrinsics not detected])
- fi
-
--AC_SUBST(ARM_CFLAGS)
-+dnl Check for ARM NEON instructions
-+ARM_NEON_CFLAGS="-mcpu=cortex-a8 -mfpu=neon"
-+
-+have_arm_neon=no
-+AC_MSG_CHECKING(whether to use ARM NEON)
-+xserver_save_CFLAGS=$CFLAGS
-+CFLAGS="$CFLAGS $ARM_NEON_CFLAGS"
-+AC_COMPILE_IFELSE([
-+#include <arm_neon.h>
-+int main () {
-+ uint8x8_t neon_test=vmov_n_u8(0);
-+ return 0;
-+}], have_arm_neon=yes)
-+CFLAGS=$xserver_save_CFLAGS
-+
-+AC_ARG_ENABLE(arm-neon,
-+ [AC_HELP_STRING([--disable-arm-neon],
-+ [disable ARM NEON fast paths])],
-+ [enable_arm_neon=$enableval], [enable_arm_neon=auto])
-+
-+if test $enable_arm_neon = no ; then
-+ have_arm_neon=disabled
-+fi
-+
-+if test $have_arm_neon = yes ; then
-+ AC_DEFINE(USE_ARM_NEON, 1, [use ARM NEON compiler intrinsics])
-+else
-+ ARM_NEON_CFLAGS=
-+fi
-+
-+AC_MSG_RESULT($have_arm_neon)
-+if test $enable_arm_neon = yes && test $have_arm_neon = no ; then
-+ AC_MSG_ERROR([ARM NEON intrinsics not detected])
-+fi
-+
-+
-+AC_SUBST(ARM_SIMD_CFLAGS)
-+AC_SUBST(ARM_NEON_CFLAGS)
-
- AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
-+AM_CONDITIONAL(USE_ARM_NEON, test $have_arm_neon = yes)
-+
-
-
- AC_ARG_ENABLE(gtk,
-diff --git a/pixman/Makefile.am b/pixman/Makefile.am
-index c4612ea..4c1ec6b 100644
---- a/pixman/Makefile.am
-+++ b/pixman/Makefile.am
-@@ -80,15 +80,26 @@ libpixman_sse2_la_LIBADD = $(DEP_LIBS)
- libpixman_1_la_LIBADD += libpixman-sse2.la
- endif
-
--# arm code
-+# arm simd code
- if USE_ARM_SIMD
- noinst_LTLIBRARIES += libpixman-arm-simd.la
- libpixman_arm_simd_la_SOURCES = \
- pixman-arm-simd.c \
- pixman-arm-simd.h
--libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) $(ARM_CFLAGS)
-+libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) $(ARM_SIMD_CFLAGS)
- libpixman_arm_simd_la_LIBADD = $(DEP_LIBS)
- libpixman_1_la_LIBADD += libpixman-arm-simd.la
- endif
-
-+# arm neon code
-+if USE_ARM_NEON
-+noinst_LTLIBRARIES += libpixman-arm-neon.la
-+libpixman_arm_neon_la_SOURCES = \
-+ pixman-arm-neon.c \
-+ pixman-arm-neon.h
-+libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS) $(ARM_NEON_CFLAGS)
-+libpixman_arm_neon_la_LIBADD = $(DEP_LIBS)
-+libpixman_1_la_LIBADD += libpixman-arm-neon.la
-+endif
-+
-
-diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-new file mode 100644
-index 0000000..10050e4
---- /dev/null
-+++ b/pixman/pixman-arm-neon.c
-@@ -0,0 +1,1387 @@
-+/*
-+ * Copyright © 2009 Mozilla Corporation
-+ *
-+ * Permission to use, copy, modify, distribute, and sell this software and its
-+ * documentation for any purpose is hereby granted without fee, provided that
-+ * the above copyright notice appear in all copies and that both that
-+ * copyright notice and this permission notice appear in supporting
-+ * documentation, and that the name of Mozilla Corporation not be used in
-+ * advertising or publicity pertaining to distribution of the software without
-+ * specific, written prior permission. Mozilla Corporation makes no
-+ * representations about the suitability of this software for any purpose. It
-+ * is provided "as is" without express or implied warranty.
-+ *
-+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
-+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
-+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
-+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
-+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
-+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
-+ * SOFTWARE.
-+ *
-+ * Author: Ian Rickards (ian.rickards@arm.com)
-+ *
-+ */
-+
-+#ifdef HAVE_CONFIG_H
-+#include <config.h>
-+#endif
-+
-+#include "pixman-arm-neon.h"
-+
-+#include <arm_neon.h>
-+
-+
-+#if !defined(__ARMCC_VERSION) && !defined(FORCE_NO_NEON_INLINE_ASM)
-+// [both armcc & gcc set __GNUC__]
-+// Use GNU style inline asm on gcc, for best performance
-+// Use intrinsics on armcc
-+// This switch determines if any GNU style inline asm is allowed
-+#define USE_NEON_INLINE_ASM
-+#endif
-+
-+
-+static force_inline uint8x8x4_t unpack0565(uint16x8_t rgb)
-+{
-+ uint16x8_t gb, b;
-+ uint8x8x4_t res;
-+
-+ res.val[3] = vdup_n_u8(0);
-+ gb = vshrq_n_u16(rgb, 5);
-+ b = vshrq_n_u16(rgb, 5+6);
-+ res.val[0] = vmovn_u16(rgb); // get low 5 bits
-+ res.val[1] = vmovn_u16(gb); // get mid 6 bits
-+ res.val[2] = vmovn_u16(b); // get top 5 bits
-+
-+ res.val[0] = vshl_n_u8(res.val[0], 3); // shift to top
-+ res.val[1] = vshl_n_u8(res.val[1], 2); // shift to top
-+ res.val[2] = vshl_n_u8(res.val[2], 3); // shift to top
-+
-+ res.val[0] = vsri_n_u8(res.val[0], res.val[0], 5);
-+ res.val[1] = vsri_n_u8(res.val[1], res.val[1], 6);
-+ res.val[2] = vsri_n_u8(res.val[2], res.val[2], 5);
-+
-+ return res;
-+}
-+
-+static force_inline uint16x8_t pack0565(uint8x8x4_t s)
-+{
-+ uint16x8_t rgb, val_g, val_r;
-+
-+ rgb = vshll_n_u8(s.val[2],8);
-+ val_g = vshll_n_u8(s.val[1],8);
-+ val_r = vshll_n_u8(s.val[0],8);
-+ rgb = vsriq_n_u16(rgb, val_g, 5);
-+ rgb = vsriq_n_u16(rgb, val_r, 5+6);
-+
-+ return rgb;
-+}
-+
-+static force_inline uint8x8_t neon2mul(uint8x8_t x, uint8x8_t alpha)
-+{
-+ uint16x8_t tmp,tmp2;
-+ uint8x8_t res;
-+
-+ tmp = vmull_u8(x,alpha);
-+ tmp2 = vrshrq_n_u16(tmp,8);
-+ res = vraddhn_u16(tmp,tmp2);
-+
-+ return res;
-+}
-+
-+static force_inline uint8x8x4_t neon8mul(uint8x8x4_t x, uint8x8_t alpha)
-+{
-+ uint16x8x4_t tmp;
-+ uint8x8x4_t res;
-+ uint16x8_t qtmp1,qtmp2;
-+
-+ tmp.val[0] = vmull_u8(x.val[0],alpha);
-+ tmp.val[1] = vmull_u8(x.val[1],alpha);
-+ tmp.val[2] = vmull_u8(x.val[2],alpha);
-+ tmp.val[3] = vmull_u8(x.val[3],alpha);
-+
-+ qtmp1 = vrshrq_n_u16(tmp.val[0],8);
-+ qtmp2 = vrshrq_n_u16(tmp.val[1],8);
-+ res.val[0] = vraddhn_u16(tmp.val[0],qtmp1);
-+ qtmp1 = vrshrq_n_u16(tmp.val[2],8);
-+ res.val[1] = vraddhn_u16(tmp.val[1],qtmp2);
-+ qtmp2 = vrshrq_n_u16(tmp.val[3],8);
-+ res.val[2] = vraddhn_u16(tmp.val[2],qtmp1);
-+ res.val[3] = vraddhn_u16(tmp.val[3],qtmp2);
-+
-+ return res;
-+}
-+
-+static force_inline uint8x8x4_t neon8qadd(uint8x8x4_t x, uint8x8x4_t y)
-+{
-+ uint8x8x4_t res;
-+
-+ res.val[0] = vqadd_u8(x.val[0],y.val[0]);
-+ res.val[1] = vqadd_u8(x.val[1],y.val[1]);
-+ res.val[2] = vqadd_u8(x.val[2],y.val[2]);
-+ res.val[3] = vqadd_u8(x.val[3],y.val[3]);
-+
-+ return res;
-+}
-+
-+
-+void
-+fbCompositeSrcAdd_8000x8000neon (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height)
-+{
-+ uint8_t *dstLine, *dst;
-+ uint8_t *srcLine, *src;
-+ int dstStride, srcStride;
-+ uint16_t w;
-+
-+ fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
-+ fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
-+
-+ if (width>=8)
-+ {
-+ // Use overlapping 8-pixel method
-+ while (height--)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ src = srcLine;
-+ srcLine += srcStride;
-+ w = width;
-+
-+ uint8_t *keep_dst;
-+
-+#ifndef USE_NEON_INLINE_ASM
-+ uint8x8_t sval,dval,temp;
-+
-+ sval = vld1_u8((void*)src);
-+ dval = vld1_u8((void*)dst);
-+ keep_dst = dst;
-+
-+ temp = vqadd_u8(dval,sval);
-+
-+ src += (w & 7);
-+ dst += (w & 7);
-+ w -= (w & 7);
-+
-+ while (w)
-+ {
-+ sval = vld1_u8((void*)src);
-+ dval = vld1_u8((void*)dst);
-+
-+ vst1_u8((void*)keep_dst,temp);
-+ keep_dst = dst;
-+
-+ temp = vqadd_u8(dval,sval);
-+
-+ src+=8;
-+ dst+=8;
-+ w-=8;
-+ }
-+ vst1_u8((void*)keep_dst,temp);
-+#else
-+ asm volatile (
-+// avoid using d8-d15 (q4-q7) aapcs callee-save registers
-+ "vld1.8 {d0}, [%[src]]\n\t"
-+ "vld1.8 {d4}, [%[dst]]\n\t"
-+ "mov %[keep_dst], %[dst]\n\t"
-+
-+ "and ip, %[w], #7\n\t"
-+ "add %[src], %[src], ip\n\t"
-+ "add %[dst], %[dst], ip\n\t"
-+ "subs %[w], %[w], ip\n\t"
-+ "b 9f\n\t"
-+// LOOP
-+ "2:\n\t"
-+ "vld1.8 {d0}, [%[src]]!\n\t"
-+ "vld1.8 {d4}, [%[dst]]!\n\t"
-+ "vst1.8 {d20}, [%[keep_dst]]\n\t"
-+ "sub %[keep_dst], %[dst], #8\n\t"
-+ "subs %[w], %[w], #8\n\t"
-+ "9:\n\t"
-+ "vqadd.u8 d20, d0, d4\n\t"
-+
-+ "bne 2b\n\t"
-+
-+ "1:\n\t"
-+ "vst1.8 {d20}, [%[keep_dst]]\n\t"
-+
-+ : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "+r" (keep_dst)
-+ :
-+ : "ip", "cc", "memory", "d0","d4",
-+ "d20"
-+ );
-+#endif
-+ }
-+ }
-+ else
-+ {
-+ while (height--)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ src = srcLine;
-+ srcLine += srcStride;
-+ w = width;
-+ uint8x8_t sval, dval;
-+ uint8_t *dst4, *dst2;
-+
-+ if (w&4)
-+ {
-+ sval = vreinterpret_u8_u32(vld1_lane_u32((void*)src,vreinterpret_u32_u8(sval),1));
-+ dval = vreinterpret_u8_u32(vld1_lane_u32((void*)dst,vreinterpret_u32_u8(dval),1));
-+ dst4=dst;
-+ src+=4;
-+ dst+=4;
-+ }
-+ if (w&2)
-+ {
-+ sval = vreinterpret_u8_u16(vld1_lane_u16((void*)src,vreinterpret_u16_u8(sval),1));
-+ dval = vreinterpret_u8_u16(vld1_lane_u16((void*)dst,vreinterpret_u16_u8(dval),1));
-+ dst2=dst;
-+ src+=2;
-+ dst+=2;
-+ }
-+ if (w&1)
-+ {
-+ sval = vld1_lane_u8((void*)src,sval,1);
-+ dval = vld1_lane_u8((void*)dst,dval,1);
-+ }
-+
-+ dval = vqadd_u8(dval,sval);
-+
-+ if (w&1)
-+ vst1_lane_u8((void*)dst,dval,1);
-+ if (w&2)
-+ vst1_lane_u16((void*)dst2,vreinterpret_u16_u8(dval),1);
-+ if (w&4)
-+ vst1_lane_u32((void*)dst4,vreinterpret_u32_u8(dval),1);
-+ }
-+ }
-+}
-+
-+
-+void
-+fbCompositeSrc_8888x8888neon (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height)
-+{
-+ uint32_t *dstLine, *dst;
-+ uint32_t *srcLine, *src;
-+ int dstStride, srcStride;
-+ uint32_t w;
-+
-+ fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+ fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-+
-+ if (width>=8)
-+ {
-+ // Use overlapping 8-pixel method
-+ while (height--)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ src = srcLine;
-+ srcLine += srcStride;
-+ w = width;
-+
-+ uint32_t *keep_dst;
-+
-+#ifndef USE_NEON_INLINE_ASM
-+ uint8x8x4_t sval,dval,temp;
-+
-+ sval = vld4_u8((void*)src);
-+ dval = vld4_u8((void*)dst);
-+ keep_dst = dst;
-+
-+ temp = neon8mul(dval,vmvn_u8(sval.val[3]));
-+ temp = neon8qadd(sval,temp);
-+
-+ src += (w & 7);
-+ dst += (w & 7);
-+ w -= (w & 7);
-+
-+ while (w)
-+ {
-+ sval = vld4_u8((void*)src);
-+ dval = vld4_u8((void*)dst);
-+
-+ vst4_u8((void*)keep_dst,temp);
-+ keep_dst = dst;
-+
-+ temp = neon8mul(dval,vmvn_u8(sval.val[3]));
-+ temp = neon8qadd(sval,temp);
-+
-+ src+=8;
-+ dst+=8;
-+ w-=8;
-+ }
-+ vst4_u8((void*)keep_dst,temp);
-+#else
-+ asm volatile (
-+// avoid using d8-d15 (q4-q7) aapcs callee-save registers
-+ "vld4.8 {d0-d3}, [%[src]]\n\t"
-+ "vld4.8 {d4-d7}, [%[dst]]\n\t"
-+ "mov %[keep_dst], %[dst]\n\t"
-+
-+ "and ip, %[w], #7\n\t"
-+ "add %[src], %[src], ip, LSL#2\n\t"
-+ "add %[dst], %[dst], ip, LSL#2\n\t"
-+ "subs %[w], %[w], ip\n\t"
-+ "b 9f\n\t"
-+// LOOP
-+ "2:\n\t"
-+ "vld4.8 {d0-d3}, [%[src]]!\n\t"
-+ "vld4.8 {d4-d7}, [%[dst]]!\n\t"
-+ "vst4.8 {d20-d23}, [%[keep_dst]]\n\t"
-+ "sub %[keep_dst], %[dst], #8*4\n\t"
-+ "subs %[w], %[w], #8\n\t"
-+ "9:\n\t"
-+ "vmvn.8 d31, d3\n\t"
-+ "vmull.u8 q10, d31, d4\n\t"
-+ "vmull.u8 q11, d31, d5\n\t"
-+ "vmull.u8 q12, d31, d6\n\t"
-+ "vmull.u8 q13, d31, d7\n\t"
-+ "vrshr.u16 q8, q10, #8\n\t"
-+ "vrshr.u16 q9, q11, #8\n\t"
-+ "vraddhn.u16 d20, q10, q8\n\t"
-+ "vraddhn.u16 d21, q11, q9\n\t"
-+ "vrshr.u16 q8, q12, #8\n\t"
-+ "vrshr.u16 q9, q13, #8\n\t"
-+ "vraddhn.u16 d22, q12, q8\n\t"
-+ "vraddhn.u16 d23, q13, q9\n\t"
-+// result in d20-d23
-+ "vqadd.u8 d20, d0, d20\n\t"
-+ "vqadd.u8 d21, d1, d21\n\t"
-+ "vqadd.u8 d22, d2, d22\n\t"
-+ "vqadd.u8 d23, d3, d23\n\t"
-+
-+ "bne 2b\n\t"
-+
-+ "1:\n\t"
-+ "vst4.8 {d20-d23}, [%[keep_dst]]\n\t"
-+
-+ : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "+r" (keep_dst)
-+ :
-+ : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
-+ "d16","d17","d18","d19","d20","d21","d22","d23"
-+ );
-+#endif
-+ }
-+ }
-+ else
-+ {
-+ uint8x8_t alpha_selector=vreinterpret_u8_u64(vcreate_u64(0x0707070703030303ULL));
-+
-+ // Handle width<8
-+ while (height--)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ src = srcLine;
-+ srcLine += srcStride;
-+ w = width;
-+
-+ while (w>=2)
-+ {
-+ uint8x8_t sval,dval;
-+
-+ /* two 32-bit pixels packed into D-reg; ad-hoc vectorization */
-+ sval = vreinterpret_u8_u32(vld1_u32((void*)src));
-+ dval = vreinterpret_u8_u32(vld1_u32((void*)dst));
-+ dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval),alpha_selector));
-+ vst1_u8((void*)dst,vqadd_u8(sval,dval));
-+
-+ src+=2;
-+ dst+=2;
-+ w-=2;
-+ }
-+
-+ if (w)
-+ {
-+ uint8x8_t sval,dval;
-+
-+ /* single 32-bit pixel in lane 0 */
-+ sval = vreinterpret_u8_u32(vld1_dup_u32((void*)src)); // only interested in lane 0
-+ dval = vreinterpret_u8_u32(vld1_dup_u32((void*)dst)); // only interested in lane 0
-+ dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval),alpha_selector));
-+ vst1_lane_u32((void*)dst,vreinterpret_u32_u8(vqadd_u8(sval,dval)),0);
-+ }
-+ }
-+ }
-+}
-+
-+
-+
-+void
-+fbCompositeSrc_x888x0565neon (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height)
-+{
-+ uint16_t *dstLine, *dst;
-+ uint32_t *srcLine, *src;
-+ int dstStride, srcStride;
-+ uint32_t w;
-+
-+ fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-+ fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+
-+ if (width>=8)
-+ {
-+ while (height--)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ src = srcLine;
-+ srcLine += srcStride;
-+ w = width;
-+
-+ do {
-+ while (w>=8)
-+ {
-+#ifndef USE_NEON_INLINE_ASM
-+ vst1q_u16(dst, pack0565(vld4_u8((void*)src)));
-+#else
-+ asm volatile (
-+ "vld4.8 {d4-d7}, [%[src]]\n\t"
-+ "vshll.u8 q0, d6, #8\n\t"
-+ "vshll.u8 q1, d5, #8\n\t"
-+ "vsriq.u16 q0, q1, #5\t\n"
-+ "vshll.u8 q1, d4, #8\n\t"
-+ "vsriq.u16 q0, q1, #11\t\n"
-+ "vst1.16 {q0}, [%[dst]]\n\t"
-+ :
-+ : [dst] "r" (dst), [src] "r" (src)
-+ : "memory", "d0","d1","d2","d3","d4","d5","d6","d7"
-+ );
-+#endif
-+ src+=8;
-+ dst+=8;
-+ w-=8;
-+ }
-+ if (w != 0)
-+ {
-+ src -= (8-w);
-+ dst -= (8-w);
-+ w = 8; // do another vector
-+ }
-+ } while (w!=0);
-+ }
-+ }
-+ else
-+ {
-+ // Handle width<8
-+ while (height--)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ src = srcLine;
-+ srcLine += srcStride;
-+ w = width;
-+
-+ while (w>=2)
-+ {
-+ uint32x2_t sval, rgb, g, b;
-+ sval = vld1_u32(src);
-+ rgb = vshr_n_u32(sval,8-5); // r (5 bits)
-+ g = vshr_n_u32(sval,8+8-6); // g to bottom byte
-+ rgb = vsli_n_u32(rgb, g, 5);
-+ b = vshr_n_u32(sval,8+8+8-5); // b to bottom byte
-+ rgb = vsli_n_u32(rgb, b, 11);
-+ vst1_lane_u16(dst++,vreinterpret_u16_u32(rgb),0);
-+ vst1_lane_u16(dst++,vreinterpret_u16_u32(rgb),2);
-+ src+=2;
-+ w-=2;
-+ }
-+ if (w)
-+ {
-+ uint32x2_t sval, rgb, g, b;
-+ sval = vld1_dup_u32(src);
-+ rgb = vshr_n_u32(sval,8-5); // r (5 bits)
-+ g = vshr_n_u32(sval,8+8-6); // g to bottom byte
-+ rgb = vsli_n_u32(rgb, g, 5);
-+ b = vshr_n_u32(sval,8+8+8-5); // b to bottom byte
-+ rgb = vsli_n_u32(rgb, b, 11);
-+ vst1_lane_u16(dst++,vreinterpret_u16_u32(rgb),0);
-+ }
-+ }
-+ }
-+}
-+
-+
-+void
-+fbCompositeSrc_8888x8x8888neon (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height)
-+{
-+ uint32_t *dstLine, *dst;
-+ uint32_t *srcLine, *src;
-+ uint32_t mask;
-+ int dstStride, srcStride;
-+ uint32_t w;
-+ uint8x8_t mask_alpha;
-+
-+ fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+ fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-+
-+ fbComposeGetSolid (pMask, mask, pDst->bits.format);
-+ mask_alpha = vdup_n_u8((mask) >> 24);
-+
-+ if (width>=8)
-+ {
-+ // Use overlapping 8-pixel method
-+ while (height--)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ src = srcLine;
-+ srcLine += srcStride;
-+ w = width;
-+
-+ uint32_t *keep_dst;
-+
-+#ifndef USE_NEON_INLINE_ASM
-+ uint8x8x4_t sval,dval,temp;
-+
-+ sval = vld4_u8((void*)src);
-+ dval = vld4_u8((void*)dst);
-+ keep_dst = dst;
-+
-+ sval = neon8mul(sval,mask_alpha);
-+ temp = neon8mul(dval,vmvn_u8(sval.val[3]));
-+ temp = neon8qadd(sval,temp);
-+
-+ src += (w & 7);
-+ dst += (w & 7);
-+ w -= (w & 7);
-+
-+ while (w)
-+ {
-+ sval = vld4_u8((void*)src);
-+ dval = vld4_u8((void*)dst);
-+
-+ vst4_u8((void*)keep_dst,temp);
-+ keep_dst = dst;
-+
-+ sval = neon8mul(sval,mask_alpha);
-+ temp = neon8mul(dval,vmvn_u8(sval.val[3]));
-+ temp = neon8qadd(sval,temp);
-+
-+ src+=8;
-+ dst+=8;
-+ w-=8;
-+ }
-+ vst4_u8((void*)keep_dst,temp);
-+#else
-+ asm volatile (
-+// avoid using d8-d15 (q4-q7) aapcs callee-save registers
-+ "vdup.32 d30, %[mask]\n\t"
-+ "vdup.8 d30, d30[3]\n\t"
-+
-+ "vld4.8 {d0-d3}, [%[src]]\n\t"
-+ "vld4.8 {d4-d7}, [%[dst]]\n\t"
-+ "mov %[keep_dst], %[dst]\n\t"
-+
-+ "and ip, %[w], #7\n\t"
-+ "add %[src], %[src], ip, LSL#2\n\t"
-+ "add %[dst], %[dst], ip, LSL#2\n\t"
-+ "subs %[w], %[w], ip\n\t"
-+ "b 9f\n\t"
-+// LOOP
-+ "2:\n\t"
-+ "vld4.8 {d0-d3}, [%[src]]!\n\t"
-+ "vld4.8 {d4-d7}, [%[dst]]!\n\t"
-+ "vst4.8 {d20-d23}, [%[keep_dst]]\n\t"
-+ "sub %[keep_dst], %[dst], #8*4\n\t"
-+ "subs %[w], %[w], #8\n\t"
-+
-+ "9:\n\t"
-+ "vmull.u8 q10, d30, d0\n\t"
-+ "vmull.u8 q11, d30, d1\n\t"
-+ "vmull.u8 q12, d30, d2\n\t"
-+ "vmull.u8 q13, d30, d3\n\t"
-+ "vrshr.u16 q8, q10, #8\n\t"
-+ "vrshr.u16 q9, q11, #8\n\t"
-+ "vraddhn.u16 d0, q10, q8\n\t"
-+ "vraddhn.u16 d1, q11, q9\n\t"
-+ "vrshr.u16 q9, q13, #8\n\t"
-+ "vrshr.u16 q8, q12, #8\n\t"
-+ "vraddhn.u16 d3, q13, q9\n\t"
-+ "vraddhn.u16 d2, q12, q8\n\t"
-+
-+ "vmvn.8 d31, d3\n\t"
-+ "vmull.u8 q10, d31, d4\n\t"
-+ "vmull.u8 q11, d31, d5\n\t"
-+ "vmull.u8 q12, d31, d6\n\t"
-+ "vmull.u8 q13, d31, d7\n\t"
-+ "vrshr.u16 q8, q10, #8\n\t"
-+ "vrshr.u16 q9, q11, #8\n\t"
-+ "vraddhn.u16 d20, q10, q8\n\t"
-+ "vrshr.u16 q8, q12, #8\n\t"
-+ "vraddhn.u16 d21, q11, q9\n\t"
-+ "vrshr.u16 q9, q13, #8\n\t"
-+ "vraddhn.u16 d22, q12, q8\n\t"
-+ "vraddhn.u16 d23, q13, q9\n\t"
-+// result in d20-d23
-+ "vqadd.u8 d20, d0, d20\n\t"
-+ "vqadd.u8 d21, d1, d21\n\t"
-+ "vqadd.u8 d22, d2, d22\n\t"
-+ "vqadd.u8 d23, d3, d23\n\t"
-+
-+ "bne 2b\n\t"
-+
-+ "1:\n\t"
-+ "vst4.8 {d20-d23}, [%[keep_dst]]\n\t"
-+
-+ : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "+r" (keep_dst)
-+ : [mask] "r" (mask)
-+ : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
-+ "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27",
-+ "d30","d31"
-+ );
-+#endif
-+ }
-+ }
-+ else
-+ {
-+ uint8x8_t alpha_selector=vreinterpret_u8_u64(vcreate_u64(0x0707070703030303ULL));
-+
-+ // Handle width<8
-+ while (height--)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ src = srcLine;
-+ srcLine += srcStride;
-+ w = width;
-+
-+ while (w>=2)
-+ {
-+ uint8x8_t sval,dval;
-+
-+ sval = vreinterpret_u8_u32(vld1_u32((void*)src));
-+ dval = vreinterpret_u8_u32(vld1_u32((void*)dst));
-+
-+ /* sval * const alpha_mul */
-+ sval = neon2mul(sval,mask_alpha);
-+
-+ /* dval * 255-(src alpha) */
-+ dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval), alpha_selector));
-+
-+ vst1_u8((void*)dst,vqadd_u8(sval,dval));
-+
-+ src+=2;
-+ dst+=2;
-+ w-=2;
-+ }
-+
-+ if (w)
-+ {
-+ uint8x8_t sval,dval;
-+
-+ sval = vreinterpret_u8_u32(vld1_dup_u32((void*)src));
-+ dval = vreinterpret_u8_u32(vld1_dup_u32((void*)dst));
-+
-+ /* sval * const alpha_mul */
-+ sval = neon2mul(sval,mask_alpha);
-+
-+ /* dval * 255-(src alpha) */
-+ dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval), alpha_selector));
-+
-+ vst1_lane_u32((void*)dst,vreinterpret_u32_u8(vqadd_u8(sval,dval)),0);
-+ }
-+ }
-+ }
-+}
-+
-+
-+
-+void
-+fbCompositeSolidMask_nx8x0565neon (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height)
-+{
-+ uint32_t src, srca;
-+ uint16_t *dstLine, *dst;
-+ uint8_t *maskLine, *mask;
-+ int dstStride, maskStride;
-+ uint32_t w;
-+ uint8x8_t sval2;
-+ uint8x8x4_t sval8;
-+
-+ fbComposeGetSolid(pSrc, src, pDst->bits.format);
-+
-+ srca = src >> 24;
-+ if (src == 0)
-+ return;
-+
-+ sval2=vreinterpret_u8_u32(vdup_n_u32(src));
-+ sval8.val[0]=vdup_lane_u8(sval2,0);
-+ sval8.val[1]=vdup_lane_u8(sval2,1);
-+ sval8.val[2]=vdup_lane_u8(sval2,2);
-+ sval8.val[3]=vdup_lane_u8(sval2,3);
-+
-+ fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+ fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
-+
-+ if (width>=8)
-+ {
-+ // Use overlapping 8-pixel method, modified to avoid rewritten dest being reused
-+ while (height--)
-+ {
-+ uint16_t *keep_dst;
-+
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ mask = maskLine;
-+ maskLine += maskStride;
-+ w = width;
-+
-+#ifndef USE_NEON_INLINE_ASM
-+ uint8x8_t alpha;
-+ uint16x8_t dval, temp;
-+ uint8x8x4_t sval8temp;
-+
-+ alpha = vld1_u8((void*)mask);
-+ dval = vld1q_u16((void*)dst);
-+ keep_dst = dst;
-+
-+ sval8temp = neon8mul(sval8,alpha);
-+ temp = pack0565(neon8qadd(sval8temp,neon8mul(unpack0565(dval),vmvn_u8(sval8temp.val[3]))));
-+
-+ mask += (w & 7);
-+ dst += (w & 7);
-+ w -= (w & 7);
-+
-+ while (w)
-+ {
-+ dval = vld1q_u16((void*)dst);
-+ alpha = vld1_u8((void*)mask);
-+
-+ vst1q_u16((void*)keep_dst,temp);
-+ keep_dst = dst;
-+
-+ sval8temp = neon8mul(sval8,alpha);
-+ temp = pack0565(neon8qadd(sval8temp,neon8mul(unpack0565(dval),vmvn_u8(sval8temp.val[3]))));
-+
-+ mask+=8;
-+ dst+=8;
-+ w-=8;
-+ }
-+ vst1q_u16((void*)keep_dst,temp);
-+#else
-+ asm volatile (
-+ "vdup.32 d0, %[src]\n\t"
-+ "vdup.8 d1, d0[1]\n\t"
-+ "vdup.8 d2, d0[2]\n\t"
-+ "vdup.8 d3, d0[3]\n\t"
-+ "vdup.8 d0, d0[0]\n\t"
-+
-+ "vld1.8 {q12}, [%[dst]]\n\t"
-+ "vld1.8 {d31}, [%[mask]]\n\t"
-+ "mov %[keep_dst], %[dst]\n\t"
-+
-+ "and ip, %[w], #7\n\t"
-+ "add %[mask], %[mask], ip\n\t"
-+ "add %[dst], %[dst], ip, LSL#1\n\t"
-+ "subs %[w], %[w], ip\n\t"
-+ "b 9f\n\t"
-+// LOOP
-+ "2:\n\t"
-+
-+ "vld1.16 {q12}, [%[dst]]!\n\t"
-+ "vld1.8 {d31}, [%[mask]]!\n\t"
-+ "vst1.16 {q10}, [%[keep_dst]]\n\t"
-+ "sub %[keep_dst], %[dst], #8*2\n\t"
-+ "subs %[w], %[w], #8\n\t"
-+ "9:\n\t"
-+// expand 0565 q12 to 8888 {d4-d7}
-+ "vmovn.u16 d4, q12\t\n"
-+ "vshr.u16 q11, q12, #5\t\n"
-+ "vshr.u16 q10, q12, #6+5\t\n"
-+ "vmovn.u16 d5, q11\t\n"
-+ "vmovn.u16 d6, q10\t\n"
-+ "vshl.u8 d4, d4, #3\t\n"
-+ "vshl.u8 d5, d5, #2\t\n"
-+ "vshl.u8 d6, d6, #3\t\n"
-+ "vsri.u8 d4, d4, #5\t\n"
-+ "vsri.u8 d5, d5, #6\t\n"
-+ "vsri.u8 d6, d6, #5\t\n"
-+
-+ "vmull.u8 q10, d31, d0\n\t"
-+ "vmull.u8 q11, d31, d1\n\t"
-+ "vmull.u8 q12, d31, d2\n\t"
-+ "vmull.u8 q13, d31, d3\n\t"
-+ "vrshr.u16 q8, q10, #8\n\t"
-+ "vrshr.u16 q9, q11, #8\n\t"
-+ "vraddhn.u16 d20, q10, q8\n\t"
-+ "vraddhn.u16 d21, q11, q9\n\t"
-+ "vrshr.u16 q9, q13, #8\n\t"
-+ "vrshr.u16 q8, q12, #8\n\t"
-+ "vraddhn.u16 d23, q13, q9\n\t"
-+ "vraddhn.u16 d22, q12, q8\n\t"
-+
-+// duplicate in 4/2/1 & 8pix vsns
-+ "vmvn.8 d30, d23\n\t"
-+ "vmull.u8 q14, d30, d6\n\t"
-+ "vmull.u8 q13, d30, d5\n\t"
-+ "vmull.u8 q12, d30, d4\n\t"
-+ "vrshr.u16 q8, q14, #8\n\t"
-+ "vrshr.u16 q9, q13, #8\n\t"
-+ "vraddhn.u16 d6, q14, q8\n\t"
-+ "vrshr.u16 q8, q12, #8\n\t"
-+ "vraddhn.u16 d5, q13, q9\n\t"
-+ "vqadd.u8 d6, d6, d22\n\t" // moved up
-+ "vraddhn.u16 d4, q12, q8\n\t"
-+// intentionally don't calculate alpha
-+// result in d4-d6
-+
-+// "vqadd.u8 d6, d6, d22\n\t" ** moved up
-+ "vqadd.u8 d5, d5, d21\n\t"
-+ "vqadd.u8 d4, d4, d20\n\t"
-+
-+// pack 8888 {d20-d23} to 0565 q10
-+ "vshll.u8 q10, d6, #8\n\t"
-+ "vshll.u8 q3, d5, #8\n\t"
-+ "vshll.u8 q2, d4, #8\n\t"
-+ "vsri.u16 q10, q3, #5\t\n"
-+ "vsri.u16 q10, q2, #11\t\n"
-+
-+ "bne 2b\n\t"
-+
-+ "1:\n\t"
-+ "vst1.16 {q10}, [%[keep_dst]]\n\t"
-+
-+ : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [keep_dst] "+r" (keep_dst)
-+ : [src] "r" (src)
-+ : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
-+ "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29",
-+ "d30","d31"
-+ );
-+#endif
-+ }
-+ }
-+ else
-+ {
-+ while (height--)
-+ {
-+ void *dst4, *dst2;
-+
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ mask = maskLine;
-+ maskLine += maskStride;
-+ w = width;
-+
-+
-+#ifndef USE_NEON_INLINE_ASM
-+ uint8x8_t alpha;
-+ uint16x8_t dval, temp;
-+ uint8x8x4_t sval8temp;
-+
-+ if (w&4)
-+ {
-+ alpha = vreinterpret_u8_u32(vld1_lane_u32((void*)mask,vreinterpret_u32_u8(alpha),1));
-+ dval = vreinterpretq_u16_u64(vld1q_lane_u64((void*)dst,vreinterpretq_u64_u16(dval),1));
-+ dst4=dst;
-+ mask+=4;
-+ dst+=4;
-+ }
-+ if (w&2)
-+ {
-+ alpha = vreinterpret_u8_u16(vld1_lane_u16((void*)mask,vreinterpret_u16_u8(alpha),1));
-+ dval = vreinterpretq_u16_u32(vld1q_lane_u32((void*)dst,vreinterpretq_u32_u16(dval),1));
-+ dst2=dst;
-+ mask+=2;
-+ dst+=2;
-+ }
-+ if (w&1)
-+ {
-+ alpha = vld1_lane_u8((void*)mask,alpha,1);
-+ dval = vld1q_lane_u16((void*)dst,dval,1);
-+ }
-+
-+ sval8temp = neon8mul(sval8,alpha);
-+ temp = pack0565(neon8qadd(sval8temp,neon8mul(unpack0565(dval),vmvn_u8(sval8temp.val[3]))));
-+
-+ if (w&1)
-+ vst1q_lane_u16((void*)dst,temp,1);
-+ if (w&2)
-+ vst1q_lane_u32((void*)dst2,vreinterpretq_u32_u16(temp),1);
-+ if (w&4)
-+ vst1q_lane_u64((void*)dst4,vreinterpretq_u64_u16(temp),1);
-+#else
-+ asm volatile (
-+ "vdup.32 d0, %[src]\n\t"
-+ "vdup.8 d1, d0[1]\n\t"
-+ "vdup.8 d2, d0[2]\n\t"
-+ "vdup.8 d3, d0[3]\n\t"
-+ "vdup.8 d0, d0[0]\n\t"
-+
-+ "tst %[w], #4\t\n"
-+ "beq skip_load4\t\n"
-+
-+ "vld1.64 {d25}, [%[dst]]\n\t"
-+ "vld1.32 {d31[1]}, [%[mask]]\n\t"
-+ "mov %[dst4], %[dst]\t\n"
-+ "add %[mask], %[mask], #4\t\n"
-+ "add %[dst], %[dst], #4*2\t\n"
-+
-+ "skip_load4:\t\n"
-+ "tst %[w], #2\t\n"
-+ "beq skip_load2\t\n"
-+ "vld1.32 {d24[1]}, [%[dst]]\n\t"
-+ "vld1.16 {d31[1]}, [%[mask]]\n\t"
-+ "mov %[dst2], %[dst]\t\n"
-+ "add %[mask], %[mask], #2\t\n"
-+ "add %[dst], %[dst], #2*2\t\n"
-+
-+ "skip_load2:\t\n"
-+ "tst %[w], #1\t\n"
-+ "beq skip_load1\t\n"
-+ "vld1.16 {d24[1]}, [%[dst]]\n\t"
-+ "vld1.8 {d31[1]}, [%[mask]]\n\t"
-+
-+ "skip_load1:\t\n"
-+// expand 0565 q12 to 8888 {d4-d7}
-+ "vmovn.u16 d4, q12\t\n"
-+ "vshr.u16 q11, q12, #5\t\n"
-+ "vshr.u16 q10, q12, #6+5\t\n"
-+ "vmovn.u16 d5, q11\t\n"
-+ "vmovn.u16 d6, q10\t\n"
-+ "vshl.u8 d4, d4, #3\t\n"
-+ "vshl.u8 d5, d5, #2\t\n"
-+ "vshl.u8 d6, d6, #3\t\n"
-+ "vsri.u8 d4, d4, #5\t\n"
-+ "vsri.u8 d5, d5, #6\t\n"
-+ "vsri.u8 d6, d6, #5\t\n"
-+
-+ "vmull.u8 q10, d31, d0\n\t"
-+ "vmull.u8 q11, d31, d1\n\t"
-+ "vmull.u8 q12, d31, d2\n\t"
-+ "vmull.u8 q13, d31, d3\n\t"
-+ "vrshr.u16 q8, q10, #8\n\t"
-+ "vrshr.u16 q9, q11, #8\n\t"
-+ "vraddhn.u16 d20, q10, q8\n\t"
-+ "vraddhn.u16 d21, q11, q9\n\t"
-+ "vrshr.u16 q9, q13, #8\n\t"
-+ "vrshr.u16 q8, q12, #8\n\t"
-+ "vraddhn.u16 d23, q13, q9\n\t"
-+ "vraddhn.u16 d22, q12, q8\n\t"
-+
-+// duplicate in 4/2/1 & 8pix vsns
-+ "vmvn.8 d30, d23\n\t"
-+ "vmull.u8 q14, d30, d6\n\t"
-+ "vmull.u8 q13, d30, d5\n\t"
-+ "vmull.u8 q12, d30, d4\n\t"
-+ "vrshr.u16 q8, q14, #8\n\t"
-+ "vrshr.u16 q9, q13, #8\n\t"
-+ "vraddhn.u16 d6, q14, q8\n\t"
-+ "vrshr.u16 q8, q12, #8\n\t"
-+ "vraddhn.u16 d5, q13, q9\n\t"
-+ "vqadd.u8 d6, d6, d22\n\t" // moved up
-+ "vraddhn.u16 d4, q12, q8\n\t"
-+// intentionally don't calculate alpha
-+// result in d4-d6
-+
-+// "vqadd.u8 d6, d6, d22\n\t" ** moved up
-+ "vqadd.u8 d5, d5, d21\n\t"
-+ "vqadd.u8 d4, d4, d20\n\t"
-+
-+// pack 8888 {d20-d23} to 0565 q10
-+ "vshll.u8 q10, d6, #8\n\t"
-+ "vshll.u8 q3, d5, #8\n\t"
-+ "vshll.u8 q2, d4, #8\n\t"
-+ "vsri.u16 q10, q3, #5\t\n"
-+ "vsri.u16 q10, q2, #11\t\n"
-+
-+ "tst %[w], #1\n\t"
-+ "beq skip_store1\t\n"
-+ "vst1.16 {d20[1]}, [%[dst]]\t\n"
-+ "skip_store1:\t\n"
-+ "tst %[w], #2\n\t"
-+ "beq skip_store2\t\n"
-+ "vst1.32 {d20[1]}, [%[dst2]]\t\n"
-+ "skip_store2:\t\n"
-+ "tst %[w], #4\n\t"
-+ "beq skip_store4\t\n"
-+ "vst1.16 {d21}, [%[dst4]]\t\n"
-+ "skip_store4:\t\n"
-+
-+ : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [dst4] "+r" (dst4), [dst2] "+r" (dst2)
-+ : [src] "r" (src)
-+ : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
-+ "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29",
-+ "d30","d31"
-+ );
-+#endif
-+ }
-+ }
-+}
-+
-+
-+void
-+fbCompositeSolidMask_nx8x8888neon (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height)
-+{
-+ uint32_t src, srca;
-+ uint32_t *dstLine, *dst;
-+ uint8_t *maskLine, *mask;
-+ int dstStride, maskStride;
-+ uint32_t w;
-+ uint8x8_t sval2;
-+ uint8x8x4_t sval8;
-+ uint8x8_t mask_selector=vreinterpret_u8_u64(vcreate_u64(0x0101010100000000ULL));
-+ uint8x8_t alpha_selector=vreinterpret_u8_u64(vcreate_u64(0x0707070703030303ULL));
-+
-+ fbComposeGetSolid(pSrc, src, pDst->bits.format);
-+
-+ srca = src >> 24;
-+ if (src == 0)
-+ return;
-+
-+ sval2=vreinterpret_u8_u32(vdup_n_u32(src));
-+ sval8.val[0]=vdup_lane_u8(sval2,0);
-+ sval8.val[1]=vdup_lane_u8(sval2,1);
-+ sval8.val[2]=vdup_lane_u8(sval2,2);
-+ sval8.val[3]=vdup_lane_u8(sval2,3);
-+
-+ fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+ fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
-+
-+ if (width>=8)
-+ {
-+ // Use overlapping 8-pixel method, modified to avoid rewritten dest being reused
-+ while (height--)
-+ {
-+ uint32_t *keep_dst;
-+
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ mask = maskLine;
-+ maskLine += maskStride;
-+ w = width;
-+
-+#ifndef USE_NEON_INLINE_ASM
-+ uint8x8_t alpha;
-+ uint8x8x4_t dval, temp;
-+
-+ alpha = vld1_u8((void*)mask);
-+ dval = vld4_u8((void*)dst);
-+ keep_dst = dst;
-+
-+ temp = neon8mul(sval8,alpha);
-+ dval = neon8mul(dval,vmvn_u8(temp.val[3]));
-+ temp = neon8qadd(temp,dval);
-+
-+ mask += (w & 7);
-+ dst += (w & 7);
-+ w -= (w & 7);
-+
-+ while (w)
-+ {
-+ alpha = vld1_u8((void*)mask);
-+ dval = vld4_u8((void*)dst);
-+
-+ vst4_u8((void*)keep_dst,temp);
-+ keep_dst = dst;
-+
-+ temp = neon8mul(sval8,alpha);
-+ dval = neon8mul(dval,vmvn_u8(temp.val[3]));
-+ temp = neon8qadd(temp,dval);
-+
-+ mask+=8;
-+ dst+=8;
-+ w-=8;
-+ }
-+ vst4_u8((void*)keep_dst,temp);
-+#else
-+ asm volatile (
-+ "vdup.32 d0, %[src]\n\t"
-+ "vdup.8 d1, d0[1]\n\t"
-+ "vdup.8 d2, d0[2]\n\t"
-+ "vdup.8 d3, d0[3]\n\t"
-+ "vdup.8 d0, d0[0]\n\t"
-+
-+ "vld4.8 {d4-d7}, [%[dst]]\n\t"
-+ "vld1.8 {d31}, [%[mask]]\n\t"
-+ "mov %[keep_dst], %[dst]\n\t"
-+
-+ "and ip, %[w], #7\n\t"
-+ "add %[mask], %[mask], ip\n\t"
-+ "add %[dst], %[dst], ip, LSL#2\n\t"
-+ "subs %[w], %[w], ip\n\t"
-+ "b 9f\n\t"
-+// LOOP
-+ "2:\n\t"
-+ "vld4.8 {d4-d7}, [%[dst]]!\n\t"
-+ "vld1.8 {d31}, [%[mask]]!\n\t"
-+ "vst4.8 {d20-d23}, [%[keep_dst]]\n\t"
-+ "sub %[keep_dst], %[dst], #8*4\n\t"
-+ "subs %[w], %[w], #8\n\t"
-+ "9:\n\t"
-+
-+ "vmull.u8 q10, d31, d0\n\t"
-+ "vmull.u8 q11, d31, d1\n\t"
-+ "vmull.u8 q12, d31, d2\n\t"
-+ "vmull.u8 q13, d31, d3\n\t"
-+ "vrshr.u16 q8, q10, #8\n\t"
-+ "vrshr.u16 q9, q11, #8\n\t"
-+ "vraddhn.u16 d20, q10, q8\n\t"
-+ "vraddhn.u16 d21, q11, q9\n\t"
-+ "vrshr.u16 q9, q13, #8\n\t"
-+ "vrshr.u16 q8, q12, #8\n\t"
-+ "vraddhn.u16 d23, q13, q9\n\t"
-+ "vraddhn.u16 d22, q12, q8\n\t"
-+
-+ "vmvn.8 d30, d23\n\t"
-+ "vmull.u8 q12, d30, d4\n\t"
-+ "vmull.u8 q13, d30, d5\n\t"
-+ "vmull.u8 q14, d30, d6\n\t"
-+ "vmull.u8 q15, d30, d7\n\t"
-+
-+ "vrshr.u16 q8, q12, #8\n\t"
-+ "vrshr.u16 q9, q13, #8\n\t"
-+ "vraddhn.u16 d4, q12, q8\n\t"
-+ "vrshr.u16 q8, q14, #8\n\t"
-+ "vraddhn.u16 d5, q13, q9\n\t"
-+ "vrshr.u16 q9, q15, #8\n\t"
-+ "vraddhn.u16 d6, q14, q8\n\t"
-+ "vraddhn.u16 d7, q15, q9\n\t"
-+// result in d4-d7
-+
-+ "vqadd.u8 d20, d4, d20\n\t"
-+ "vqadd.u8 d21, d5, d21\n\t"
-+ "vqadd.u8 d22, d6, d22\n\t"
-+ "vqadd.u8 d23, d7, d23\n\t"
-+
-+ "bne 2b\n\t"
-+
-+ "1:\n\t"
-+ "vst4.8 {d20-d23}, [%[keep_dst]]\n\t"
-+
-+ : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [keep_dst] "+r" (keep_dst)
-+ : [src] "r" (src)
-+ : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
-+ "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29",
-+ "d30","d31"
-+ );
-+#endif
-+ }
-+ }
-+ else
-+ {
-+ while (height--)
-+ {
-+ uint8x8_t alpha;
-+
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ mask = maskLine;
-+ maskLine += maskStride;
-+ w = width;
-+
-+ while (w>=2)
-+ {
-+ uint8x8_t dval, temp, res;
-+
-+ alpha = vtbl1_u8(vreinterpret_u8_u16(vld1_dup_u16((void*)mask)), mask_selector);
-+ dval = vld1_u8((void*)dst);
-+
-+ temp = neon2mul(sval2,alpha);
-+ res = vqadd_u8(temp,neon2mul(dval,vtbl1_u8(vmvn_u8(temp), alpha_selector)));
-+
-+ vst1_u8((void*)dst,res);
-+
-+ mask+=2;
-+ dst+=2;
-+ w-=2;
-+ }
-+ if (w)
-+ {
-+ uint8x8_t dval, temp, res;
-+
-+ alpha = vtbl1_u8(vld1_dup_u8((void*)mask), mask_selector);
-+ dval = vreinterpret_u8_u32(vld1_dup_u32((void*)dst));
-+
-+ temp = neon2mul(sval2,alpha);
-+ res = vqadd_u8(temp,neon2mul(dval,vtbl1_u8(vmvn_u8(temp), alpha_selector)));
-+
-+ vst1_lane_u32((void*)dst,vreinterpret_u32_u8(res),0);
-+ }
-+ }
-+ }
-+}
-+
-+
-+void
-+fbCompositeSrcAdd_8888x8x8neon (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height)
-+{
-+ uint8_t *dstLine, *dst;
-+ uint8_t *maskLine, *mask;
-+ int dstStride, maskStride;
-+ uint32_t w;
-+ uint32_t src;
-+ uint8x8_t sa;
-+
-+ fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
-+ fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
-+ fbComposeGetSolid (pSrc, src, pDst->bits.format);
-+ sa = vdup_n_u8((src) >> 24);
-+
-+ if (width>=8)
-+ {
-+ // Use overlapping 8-pixel method, modified to avoid rewritten dest being reused
-+ while (height--)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ mask = maskLine;
-+ maskLine += maskStride;
-+ w = width;
-+
-+ uint8x8_t mval, dval, res;
-+ uint8_t *keep_dst;
-+
-+ mval = vld1_u8((void *)mask);
-+ dval = vld1_u8((void *)dst);
-+ keep_dst = dst;
-+
-+ res = vqadd_u8(neon2mul(mval,sa),dval);
-+
-+ mask += (w & 7);
-+ dst += (w & 7);
-+ w -= w & 7;
-+
-+ while (w)
-+ {
-+ mval = vld1_u8((void *)mask);
-+ dval = vld1_u8((void *)dst);
-+ vst1_u8((void *)keep_dst, res);
-+ keep_dst = dst;
-+
-+ res = vqadd_u8(neon2mul(mval,sa),dval);
-+
-+ mask += 8;
-+ dst += 8;
-+ w -= 8;
-+ }
-+ vst1_u8((void *)keep_dst, res);
-+ }
-+ }
-+ else
-+ {
-+ // Use 4/2/1 load/store method to handle 1-7 pixels
-+ while (height--)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ mask = maskLine;
-+ maskLine += maskStride;
-+ w = width;
-+
-+ uint8x8_t mval, dval, res;
-+ uint8_t *dst4, *dst2;
-+
-+ if (w&4)
-+ {
-+ mval = vreinterpret_u8_u32(vld1_lane_u32((void *)mask, vreinterpret_u32_u8(mval), 1));
-+ dval = vreinterpret_u8_u32(vld1_lane_u32((void *)dst, vreinterpret_u32_u8(dval), 1));
-+
-+ dst4 = dst;
-+ mask += 4;
-+ dst += 4;
-+ }
-+ if (w&2)
-+ {
-+ mval = vreinterpret_u8_u16(vld1_lane_u16((void *)mask, vreinterpret_u16_u8(mval), 1));
-+ dval = vreinterpret_u8_u16(vld1_lane_u16((void *)dst, vreinterpret_u16_u8(dval), 1));
-+ dst2 = dst;
-+ mask += 2;
-+ dst += 2;
-+ }
-+ if (w&1)
-+ {
-+ mval = vld1_lane_u8((void *)mask, mval, 1);
-+ dval = vld1_lane_u8((void *)dst, dval, 1);
-+ }
-+
-+ res = vqadd_u8(neon2mul(mval,sa),dval);
-+
-+ if (w&1)
-+ vst1_lane_u8((void *)dst, res, 1);
-+ if (w&2)
-+ vst1_lane_u16((void *)dst2, vreinterpret_u16_u8(res), 1);
-+ if (w&4)
-+ vst1_lane_u32((void *)dst4, vreinterpret_u32_u8(res), 1);
-+ }
-+ }
-+}
-+
-diff --git a/pixman/pixman-arm-neon.h b/pixman/pixman-arm-neon.h
-new file mode 100644
-index 0000000..bab4dee
---- /dev/null
-+++ b/pixman/pixman-arm-neon.h
-@@ -0,0 +1,137 @@
-+/*
-+ * Copyright © 2009 Mozilla Corporation
-+ *
-+ * Permission to use, copy, modify, distribute, and sell this software and its
-+ * documentation for any purpose is hereby granted without fee, provided that
-+ * the above copyright notice appear in all copies and that both that
-+ * copyright notice and this permission notice appear in supporting
-+ * documentation, and that the name of Mozilla Corporation not be used in
-+ * advertising or publicity pertaining to distribution of the software without
-+ * specific, written prior permission. Mozilla Corporation makes no
-+ * representations about the suitability of this software for any purpose. It
-+ * is provided "as is" without express or implied warranty.
-+ *
-+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
-+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
-+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
-+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
-+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
-+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
-+ * SOFTWARE.
-+ *
-+ * Author: Ian Rickards (ian.rickards@arm.com)
-+ *
-+ */
-+
-+#include "pixman-private.h"
-+
-+#ifdef USE_ARM_NEON
-+
-+static inline pixman_bool_t pixman_have_arm_neon(void) { return TRUE; }
-+
-+#else
-+#define pixman_have_arm_neon() FALSE
-+#endif
-+
-+#ifdef USE_ARM_NEON
-+
-+void
-+fbCompositeSrcAdd_8000x8000neon (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height);
-+
-+void
-+fbCompositeSrc_8888x8888neon (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height);
-+
-+void
-+fbCompositeSrc_8888x8x8888neon (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height);
-+
-+void
-+fbCompositeSolidMask_nx8x0565neon (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height);
-+
-+void
-+fbCompositeSolidMask_nx8x8888neon (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height);
-+
-+void
-+fbCompositeSrc_x888x0565neon (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height);
-+
-+void
-+fbCompositeSrcAdd_8888x8x8neon (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height);
-+
-+#endif /* USE_ARM_NEON */
-diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
-index 1388517..b13947a 100644
---- a/pixman/pixman-pict.c
-+++ b/pixman/pixman-pict.c
-@@ -34,6 +34,7 @@
- #include "pixman-mmx.h"
- #include "pixman-vmx.h"
- #include "pixman-sse2.h"
-+#include "pixman-arm-neon.h"
- #include "pixman-arm-simd.h"
- #include "pixman-combine32.h"
-
-@@ -1518,6 +1519,31 @@ static const FastPathInfo vmx_fast_paths[] =
- };
- #endif
-
-+#ifdef USE_ARM_NEON
-+static const FastPathInfo arm_neon_fast_paths[] =
-+{
-+ { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeSrcAdd_8888x8x8neon, 0 },
-+ { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcAdd_8000x8000neon, 0 },
-+ { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565neon, 0 },
-+ { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565neon, 0 },
-+ { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565neon, 0 },
-+ { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565neon, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888neon, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888neon, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888neon, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888neon, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888neon, NEED_SOLID_MASK },
-+ { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888neon, NEED_SOLID_MASK },
-+ { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, fbCompositeSolidMask_nx8x0565neon, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, fbCompositeSolidMask_nx8x0565neon, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888neon, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888neon, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888neon, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888neon, 0 },
-+ { PIXMAN_OP_NONE },
-+};
-+#endif
-+
- #ifdef USE_ARM_SIMD
- static const FastPathInfo arm_simd_fast_paths[] =
- {
-@@ -1893,6 +1919,11 @@ pixman_image_composite (pixman_op_t op,
- info = get_fast_path (vmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
- #endif
-
-+#ifdef USE_ARM_NEON
-+ if (!info && pixman_have_arm_neon())
-+ info = get_fast_path (arm_neon_fast_paths, op, pSrc, pMask, pDst, pixbuf);
-+#endif
-+
- #ifdef USE_ARM_SIMD
- if (!info && pixman_have_arm_simd())
- info = get_fast_path (arm_simd_fast_paths, op, pSrc, pMask, pDst, pixbuf);
diff --git a/recipes/xorg-lib/pixman/pixman-28986.patch b/recipes/xorg-lib/pixman/pixman-28986.patch
deleted file mode 100644
index f5ba4c302e..0000000000
--- a/recipes/xorg-lib/pixman/pixman-28986.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-From 7b7860d61fb1526acdf010dd8fd644bbf1396b9e Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Fri, 28 Aug 2009 22:34:21 +0300
-Subject: [PATCH] ARM: workaround for gcc bug in vshll_n_u8 intrinsic
-
-Some versions of gcc (cs2009q1, 4.4.1) incorrectly reject
-shift operand having value >= 8, claiming that it is out of
-range. So inline assembly is used as a workaround.
----
- pixman/pixman-arm-neon.c | 6 ++++++
- 1 files changed, 6 insertions(+), 0 deletions(-)
-
-diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index 4125d1b..3e7f566 100644
---- a/pixman/pixman-arm-neon.c
-+++ b/pixman/pixman-arm-neon.c
-@@ -64,6 +64,12 @@ unpack0565 (uint16x8_t rgb)
- return res;
- }
-
-+#ifdef USE_GCC_INLINE_ASM
-+/* Some versions of gcc have problems with vshll_n_u8 intrinsic (Bug 23576) */
-+#define vshll_n_u8(a, n) ({ uint16x8_t r; \
-+ asm ("vshll.u8 %q0, %P1, %2\n" : "=w" (r) : "w" (a), "i" (n)); r; })
-+#endif
-+
- static force_inline uint16x8_t
- pack0565 (uint8x8x4_t s)
- {
---
-1.5.4.3
-
diff --git a/recipes/xorg-lib/pixman/pixman-arm.patch b/recipes/xorg-lib/pixman/pixman-arm.patch
deleted file mode 100644
index 91dda03b7c..0000000000
--- a/recipes/xorg-lib/pixman/pixman-arm.patch
+++ /dev/null
@@ -1,632 +0,0 @@
-From: Jeff Muizelaar <jmuizelaar@mozilla.com>
-Date: Wed, 17 Sep 2008 19:53:20 +0000 (-0400)
-Subject: Add support for ARMv6 SIMD fastpaths.
-X-Git-Url: http://gitweb.freedesktop.org/?p=pixman.git;a=commitdiff;h=d0b181f347ef4720d130beee3f03196afbd28aba
-
-Add support for ARMv6 SIMD fastpaths.
----
-
---- a/configure.ac
-+++ b/configure.ac
-@@ -277,6 +277,44 @@ AC_SUBST(VMX_CFLAGS)
-
- AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
-
-+dnl Check for ARM
-+
-+have_armv6_simd=no
-+AC_MSG_CHECKING(whether to use ARM assembler)
-+xserver_save_CFLAGS=$CFLAGS
-+CFLAGS="$CFLAGS $ARM_CFLAGS"
-+AC_COMPILE_IFELSE([
-+int main () {
-+ asm("uqadd8 r1, r1, r2");
-+ return 0;
-+}], have_armv6_simd=yes)
-+CFLAGS=$xserver_save_CFLAGS
-+
-+AC_ARG_ENABLE(arm,
-+ [AC_HELP_STRING([--disable-arm],
-+ [disable ARM fast paths])],
-+ [enable_arm=$enableval], [enable_arm=auto])
-+
-+if test $enable_arm = no ; then
-+ have_armv6_simd=disabled
-+fi
-+
-+if test $have_armv6_simd = yes ; then
-+ AC_DEFINE(USE_ARM, 1, [use ARM compiler intrinsics])
-+else
-+ ARM_CFLAGS=
-+fi
-+
-+AC_MSG_RESULT($have_armv6_simd)
-+if test $enable_arm = yes && test $have_armv6_simd = no ; then
-+ AC_MSG_ERROR([ARM intrinsics not detected])
-+fi
-+
-+AC_SUBST(ARM_CFLAGS)
-+
-+AM_CONDITIONAL(USE_ARM, test $have_armv6_simd = yes)
-+
-+
- AC_ARG_ENABLE(gtk,
- [AC_HELP_STRING([--enable-gtk],
- [enable tests using GTK+ [default=auto]])],
---- a/pixman/Makefile.am
-+++ b/pixman/Makefile.am
-@@ -79,3 +79,15 @@ libpixman_sse2_la_LIBADD = $(DEP_LIBS)
- libpixman_1_la_LIBADD += libpixman-sse2.la
- endif
-
-+# arm code
-+if USE_ARM
-+noinst_LTLIBRARIES += libpixman-arm.la
-+libpixman_arm_la_SOURCES = \
-+ pixman-arm.c \
-+ pixman-arm.h
-+libpixman_arm_la_CFLAGS = $(DEP_CFLAGS) $(ARM_CFLAGS)
-+libpixman_arm_la_LIBADD = $(DEP_LIBS)
-+libpixman_1_la_LIBADD += libpixman-arm.la
-+endif
-+
-+
---- /dev/null
-+++ b/pixman/pixman-arm.c
-@@ -0,0 +1,409 @@
-+/*
-+ * Copyright © 2008 Mozilla Corporation
-+ *
-+ * Permission to use, copy, modify, distribute, and sell this software and its
-+ * documentation for any purpose is hereby granted without fee, provided that
-+ * the above copyright notice appear in all copies and that both that
-+ * copyright notice and this permission notice appear in supporting
-+ * documentation, and that the name of Mozilla Corporation not be used in
-+ * advertising or publicity pertaining to distribution of the software without
-+ * specific, written prior permission. Mozilla Corporation makes no
-+ * representations about the suitability of this software for any purpose. It
-+ * is provided "as is" without express or implied warranty.
-+ *
-+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
-+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
-+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
-+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
-+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
-+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
-+ * SOFTWARE.
-+ *
-+ * Author: Jeff Muizelaar (jeff@infidigm.net)
-+ *
-+ */
-+#ifdef HAVE_CONFIG_H
-+#include <config.h>
-+#endif
-+
-+#include "pixman-arm.h"
-+
-+void
-+fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height)
-+{
-+ uint8_t *dstLine, *dst;
-+ uint8_t *srcLine, *src;
-+ int dstStride, srcStride;
-+ uint16_t w;
-+ uint8_t s, d;
-+
-+ fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
-+ fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
-+
-+ while (height--)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ src = srcLine;
-+ srcLine += srcStride;
-+ w = width;
-+
-+ while (w && (unsigned long)dst & 3)
-+ {
-+ s = *src;
-+ d = *dst;
-+ asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
-+ *dst = d;
-+
-+ dst++;
-+ src++;
-+ w--;
-+ }
-+
-+ while (w >= 4)
-+ {
-+ asm("uqadd8 %0, %1, %2" : "=r"(*(uint32_t*)dst) : "r"(*(uint32_t*)src), "r"(*(uint32_t*)dst));
-+ dst += 4;
-+ src += 4;
-+ w -= 4;
-+ }
-+
-+ while (w)
-+ {
-+ s = *src;
-+ d = *dst;
-+ asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
-+ *dst = d;
-+
-+ dst++;
-+ src++;
-+ w--;
-+ }
-+ }
-+
-+}
-+
-+void
-+fbCompositeSrc_8888x8888arm (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height)
-+{
-+ uint32_t *dstLine, *dst;
-+ uint32_t *srcLine, *src;
-+ int dstStride, srcStride;
-+ uint16_t w;
-+ uint32_t component_half = 0x800080;
-+ uint32_t upper_component_mask = 0xff00ff00;
-+ uint32_t alpha_mask = 0xff;
-+
-+ fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+ fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-+
-+ while (height--)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ src = srcLine;
-+ srcLine += srcStride;
-+ w = width;
-+
-+//#define inner_branch
-+ asm volatile (
-+ "cmp %[w], #0\n\t"
-+ "beq 2f\n\t"
-+ "1:\n\t"
-+ /* load dest */
-+ "ldr r5, [%[src]], #4\n\t"
-+#ifdef inner_branch
-+ /* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
-+ * The 0x0 case also allows us to avoid doing an unecessary data
-+ * write which is more valuable so we only check for that */
-+ "cmp r5, #0x1000000\n\t"
-+ "blt 3f\n\t"
-+
-+ /* = 255 - alpha */
-+ "sub r8, %[alpha_mask], r5, lsr #24\n\t"
-+
-+ "ldr r4, [%[dest]] \n\t"
-+
-+#else
-+ "ldr r4, [%[dest]] \n\t"
-+
-+ /* = 255 - alpha */
-+ "sub r8, %[alpha_mask], r5, lsr #24\n\t"
-+#endif
-+ "uxtb16 r6, r4\n\t"
-+ "uxtb16 r7, r4, ror #8\n\t"
-+
-+ /* multiply by 257 and divide by 65536 */
-+ "mla r6, r6, r8, %[component_half]\n\t"
-+ "mla r7, r7, r8, %[component_half]\n\t"
-+
-+ "uxtab16 r6, r6, r6, ror #8\n\t"
-+ "uxtab16 r7, r7, r7, ror #8\n\t"
-+
-+ /* recombine the 0xff00ff00 bytes of r6 and r7 */
-+ "and r7, %[upper_component_mask]\n\t"
-+ "uxtab16 r6, r7, r6, ror #8\n\t"
-+
-+ "uqadd8 r5, r6, r5\n\t"
-+
-+#ifdef inner_branch
-+ "3:\n\t"
-+
-+#endif
-+ "str r5, [%[dest]], #4\n\t"
-+ /* increment counter and jmp to top */
-+ "subs %[w], %[w], #1\n\t"
-+ "bne 1b\n\t"
-+ "2:\n\t"
-+ : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
-+ : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask),
-+ [alpha_mask] "r" (alpha_mask)
-+ : "r4", "r5", "r6", "r7", "r8", "cc", "memory"
-+ );
-+ }
-+}
-+
-+void
-+fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height)
-+{
-+ uint32_t *dstLine, *dst;
-+ uint32_t *srcLine, *src;
-+ uint32_t mask;
-+ int dstStride, srcStride;
-+ uint16_t w;
-+ uint32_t component_half = 0x800080;
-+ uint32_t alpha_mask = 0xff;
-+
-+ fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+ fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-+
-+ fbComposeGetSolid (pMask, mask, pDst->bits.format);
-+ mask = (mask) >> 24;
-+
-+ while (height--)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ src = srcLine;
-+ srcLine += srcStride;
-+ w = width;
-+
-+//#define inner_branch
-+ asm volatile (
-+ "cmp %[w], #0\n\t"
-+ "beq 2f\n\t"
-+ "1:\n\t"
-+ /* load dest */
-+ "ldr r5, [%[src]], #4\n\t"
-+#ifdef inner_branch
-+ /* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
-+ * The 0x0 case also allows us to avoid doing an unecessary data
-+ * write which is more valuable so we only check for that */
-+ "cmp r5, #0x1000000\n\t"
-+ "blt 3f\n\t"
-+
-+#endif
-+ "ldr r4, [%[dest]] \n\t"
-+
-+ "uxtb16 r6, r5\n\t"
-+ "uxtb16 r7, r5, ror #8\n\t"
-+
-+ /* multiply by alpha (r8) then by 257 and divide by 65536 */
-+ "mla r6, r6, %[mask_alpha], %[component_half]\n\t"
-+ "mla r7, r7, %[mask_alpha], %[component_half]\n\t"
-+
-+ "uxtab16 r6, r6, r6, ror #8\n\t"
-+ "uxtab16 r7, r7, r7, ror #8\n\t"
-+
-+ "uxtb16 r6, r6, ror #8\n\t"
-+ "uxtb16 r7, r7, ror #8\n\t"
-+
-+ /* recombine */
-+ "orr r5, r6, r7, lsl #8\n\t"
-+
-+ "uxtb16 r6, r4\n\t"
-+ "uxtb16 r7, r4, ror #8\n\t"
-+
-+ /* 255 - alpha */
-+ "sub r8, %[alpha_mask], r5, lsr #24\n\t"
-+
-+ /* multiply by alpha (r8) then by 257 and divide by 65536 */
-+ "mla r6, r6, r8, %[component_half]\n\t"
-+ "mla r7, r7, r8, %[component_half]\n\t"
-+
-+ "uxtab16 r6, r6, r6, ror #8\n\t"
-+ "uxtab16 r7, r7, r7, ror #8\n\t"
-+
-+ "uxtb16 r6, r6, ror #8\n\t"
-+ "uxtb16 r7, r7, ror #8\n\t"
-+
-+ /* recombine */
-+ "orr r6, r6, r7, lsl #8\n\t"
-+
-+ "uqadd8 r5, r6, r5\n\t"
-+
-+#ifdef inner_branch
-+ "3:\n\t"
-+
-+#endif
-+ "str r5, [%[dest]], #4\n\t"
-+ /* increment counter and jmp to top */
-+ "subs %[w], %[w], #1\n\t"
-+ "bne 1b\n\t"
-+ "2:\n\t"
-+ : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
-+ : [component_half] "r" (component_half), [mask_alpha] "r" (mask),
-+ [alpha_mask] "r" (alpha_mask)
-+ : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory"
-+ );
-+ }
-+}
-+
-+void
-+fbCompositeSolidMask_nx8x8888arm (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height)
-+{
-+ uint32_t src, srca;
-+ uint32_t *dstLine, *dst;
-+ uint8_t *maskLine, *mask;
-+ int dstStride, maskStride;
-+ uint16_t w;
-+
-+ fbComposeGetSolid(pSrc, src, pDst->bits.format);
-+
-+ srca = src >> 24;
-+ if (src == 0)
-+ return;
-+
-+ uint32_t component_mask = 0xff00ff;
-+ uint32_t component_half = 0x800080;
-+
-+ uint32_t src_hi = (src >> 8) & component_mask;
-+ uint32_t src_lo = src & component_mask;
-+
-+ fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+ fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
-+
-+ while (height--)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ mask = maskLine;
-+ maskLine += maskStride;
-+ w = width;
-+
-+//#define inner_branch
-+ asm volatile (
-+ "cmp %[w], #0\n\t"
-+ "beq 2f\n\t"
-+ "1:\n\t"
-+ /* load mask */
-+ "ldrb r5, [%[mask]], #1\n\t"
-+#ifdef inner_branch
-+ /* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
-+ * The 0x0 case also allows us to avoid doing an unecessary data
-+ * write which is more valuable so we only check for that */
-+ /* 0x1000000 is the least value that contains alpha all values
-+ * less than it have a 0 alpha value */
-+ "cmp r5, #0x0\n\t"
-+ "beq 3f\n\t"
-+
-+#endif
-+ "ldr r4, [%[dest]] \n\t"
-+
-+ /* multiply by alpha (r8) then by 257 and divide by 65536 */
-+ "mla r6, %[src_lo], r5, %[component_half]\n\t"
-+ "mla r7, %[src_hi], r5, %[component_half]\n\t"
-+
-+ "uxtab16 r6, r6, r6, ror #8\n\t"
-+ "uxtab16 r7, r7, r7, ror #8\n\t"
-+
-+ "uxtb16 r6, r6, ror #8\n\t"
-+ "uxtb16 r7, r7, ror #8\n\t"
-+
-+ /* recombine */
-+ "orr r5, r6, r7, lsl #8\n\t"
-+
-+ "uxtb16 r6, r4\n\t"
-+ "uxtb16 r7, r4, ror #8\n\t"
-+
-+ /* we could simplify this to use 'sub' if we were
-+ * willing to give up a register for alpha_mask */
-+ "mvn r8, r5\n\t"
-+ "mov r8, r8, lsr #24\n\t"
-+
-+ /* multiply by alpha (r8) then by 257 and divide by 65536 */
-+ "mla r6, r6, r8, %[component_half]\n\t"
-+ "mla r7, r7, r8, %[component_half]\n\t"
-+
-+ "uxtab16 r6, r6, r6, ror #8\n\t"
-+ "uxtab16 r7, r7, r7, ror #8\n\t"
-+
-+ "uxtb16 r6, r6, ror #8\n\t"
-+ "uxtb16 r7, r7, ror #8\n\t"
-+
-+ /* recombine */
-+ "orr r6, r6, r7, lsl #8\n\t"
-+
-+ "uqadd8 r5, r6, r5\n\t"
-+
-+#ifdef inner_branch
-+ "3:\n\t"
-+
-+#endif
-+ "str r5, [%[dest]], #4\n\t"
-+ /* increment counter and jmp to top */
-+ "subs %[w], %[w], #1\n\t"
-+ "bne 1b\n\t"
-+ "2:\n\t"
-+ : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask)
-+ : [component_half] "r" (component_half),
-+ [src_hi] "r" (src_hi), [src_lo] "r" (src_lo)
-+ : "r4", "r5", "r6", "r7", "r8", "cc", "memory"
-+ );
-+ }
-+}
---- /dev/null
-+++ b/pixman/pixman-arm.h
-@@ -0,0 +1,94 @@
-+/*
-+ * Copyright © 2008 Mozilla Corporation
-+ *
-+ * Permission to use, copy, modify, distribute, and sell this software and its
-+ * documentation for any purpose is hereby granted without fee, provided that
-+ * the above copyright notice appear in all copies and that both that
-+ * copyright notice and this permission notice appear in supporting
-+ * documentation, and that the name of Mozilla Corporation not be used in
-+ * advertising or publicity pertaining to distribution of the software without
-+ * specific, written prior permission. Mozilla Corporation makes no
-+ * representations about the suitability of this software for any purpose. It
-+ * is provided "as is" without express or implied warranty.
-+ *
-+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
-+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
-+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
-+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
-+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
-+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
-+ * SOFTWARE.
-+ *
-+ * Author: Jeff Muizelaar (jeff@infidigm.net)
-+ *
-+ */
-+
-+#include "pixman-private.h"
-+
-+#ifdef USE_ARM
-+
-+static inline pixman_bool_t pixman_have_arm(void) { return TRUE; }
-+
-+#else
-+#define pixman_have_arm() FALSE
-+#endif
-+
-+#ifdef USE_ARM
-+
-+void
-+fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height);
-+void
-+fbCompositeSrc_8888x8888arm (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height);
-+
-+void
-+fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height);
-+void
-+fbCompositeSolidMask_nx8x8888arm (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height);
-+
-+
-+#endif /* USE_ARM */
---- a/pixman/pixman-pict.c
-+++ b/pixman/pixman-pict.c
-@@ -34,6 +34,7 @@
- #include "pixman-mmx.h"
- #include "pixman-vmx.h"
- #include "pixman-sse2.h"
-+#include "pixman-arm.h"
- #include "pixman-combine32.h"
-
- #ifdef __GNUC__
-@@ -1479,6 +1480,26 @@ static const FastPathInfo vmx_fast_paths
- };
- #endif
-
-+#ifdef USE_ARM
-+static const FastPathInfo arm_fast_paths[] =
-+{
-+ { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888arm, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888arm, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888arm, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888arm, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888arm, NEED_SOLID_MASK },
-+ { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888arm, NEED_SOLID_MASK },
-+
-+ { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcAdd_8000x8000arm, 0 },
-+
-+ { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888arm, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888arm, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888arm, 0 },
-+ { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888arm, 0 },
-+
-+ { PIXMAN_OP_NONE },
-+};
-+#endif
-
- static const FastPathInfo c_fast_paths[] =
- {
-@@ -1829,6 +1850,12 @@ pixman_image_composite (pixman_op_t
- if (!info && pixman_have_vmx())
- info = get_fast_path (vmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
- #endif
-+
-+#ifdef USE_ARM
-+ if (!info && pixman_have_arm())
-+ info = get_fast_path (arm_fast_paths, op, pSrc, pMask, pDst, pixbuf);
-+#endif
-+
- if (!info)
- info = get_fast_path (c_fast_paths, op, pSrc, pMask, pDst, pixbuf);
-
diff --git a/recipes/xorg-lib/pixman/pixman-x888-565.patch b/recipes/xorg-lib/pixman/pixman-x888-565.patch
deleted file mode 100644
index a3fa331710..0000000000
--- a/recipes/xorg-lib/pixman/pixman-x888-565.patch
+++ /dev/null
@@ -1,68 +0,0 @@
-From: Vladimir Vukicevic <vladimir@slide.(none)>
-Date: Wed, 17 Sep 2008 20:01:31 +0000 (-0400)
-Subject: Add SRC x888x0565 C fast path
-X-Git-Url: http://gitweb.freedesktop.org/?p=pixman.git;a=commitdiff;h=7180230d4d87c55dfef1e17a0cc3b125d45aa3a0
-
-Add SRC x888x0565 C fast path
----
-
---- a/pixman/pixman-pict.c
-+++ b/pixman/pixman-pict.c
-@@ -759,6 +759,46 @@ fbCompositeSrc_8888x0565 (pixman_op_t op
- }
- }
-
-+
-+void
-+fbCompositeSrc_x888x0565 (pixman_op_t op,
-+ pixman_image_t * pSrc,
-+ pixman_image_t * pMask,
-+ pixman_image_t * pDst,
-+ int16_t xSrc,
-+ int16_t ySrc,
-+ int16_t xMask,
-+ int16_t yMask,
-+ int16_t xDst,
-+ int16_t yDst,
-+ uint16_t width,
-+ uint16_t height)
-+{
-+ uint16_t *dstLine, *dst;
-+ uint32_t *srcLine, *src, s;
-+ int dstStride, srcStride;
-+ uint16_t w;
-+
-+ fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-+ fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+
-+ while (height--)
-+ {
-+ dst = dstLine;
-+ dstLine += dstStride;
-+ src = srcLine;
-+ srcLine += srcStride;
-+ w = width;
-+
-+ while (w--)
-+ {
-+ s = READ(pSrc, src++);
-+ WRITE(pDst, dst, cvt8888to0565(s));
-+ dst++;
-+ }
-+ }
-+}
-+
- void
- fbCompositeSrcAdd_8000x8000 (pixman_op_t op,
- pixman_image_t * pSrc,
-@@ -1568,6 +1608,10 @@ static const FastPathInfo c_fast_paths[]
- { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrcSrc_nxn, 0 },
- { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrcSrc_nxn, 0 },
- #endif
-+ { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565, 0 },
-+ { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565, 0 },
-+ { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565, 0 },
-+ { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565, 0 },
- { PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcIn_8x8, 0 },
- { PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeSolidMaskIn_nx8x8, 0 },
- { PIXMAN_OP_NONE },
diff --git a/recipes/xorg-lib/pixman/prefetch.patch b/recipes/xorg-lib/pixman/prefetch.patch
deleted file mode 100644
index c2e856ec25..0000000000
--- a/recipes/xorg-lib/pixman/prefetch.patch
+++ /dev/null
@@ -1,298 +0,0 @@
-From d0044bfbd596f22ed1560579ea6537b39f3dc1af Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Thu, 29 Oct 2009 19:06:42 +0000
-Subject: ARM: Don't emit prefetch code if prefetch distance is set to 0
-
-Also it is now possible to disable prefetch globally with
-a configuration macro
----
-diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
-index bca499a..35e6a7e 100644
---- a/pixman/pixman-arm-neon-asm.S
-+++ b/pixman/pixman-arm-neon-asm.S
-@@ -219,33 +219,33 @@
- vshrn.u16 d7, q2, #3
- vsli.u16 q2, q2, #5
- vshll.u8 q14, d16, #8
-- add PF_X, PF_X, #8
-+ PF add PF_X, PF_X, #8
- vshll.u8 q8, d19, #8
-- tst PF_CTL, #0xF
-+ PF tst PF_CTL, #0xF
- vsri.u8 d6, d6, #5
-- addne PF_X, PF_X, #8
-+ PF addne PF_X, PF_X, #8
- vmvn.8 d3, d3
-- subne PF_CTL, PF_CTL, #1
-+ PF subne PF_CTL, PF_CTL, #1
- vsri.u8 d7, d7, #6
- vshrn.u16 d30, q2, #2
- vmull.u8 q10, d3, d6
-- pld [PF_SRC, PF_X, lsl #src_bpp_shift]
-+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- vmull.u8 q11, d3, d7
- vmull.u8 q12, d3, d30
-- pld [PF_DST, PF_X, lsl #dst_bpp_shift]
-+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- vsri.u16 q14, q8, #5
-- cmp PF_X, ORIG_W
-+ PF cmp PF_X, ORIG_W
- vshll.u8 q9, d18, #8
- vrshr.u16 q13, q10, #8
-- subge PF_X, PF_X, ORIG_W
-+ PF subge PF_X, PF_X, ORIG_W
- vrshr.u16 q3, q11, #8
- vrshr.u16 q15, q12, #8
-- subges PF_CTL, PF_CTL, #0x10
-+ PF subges PF_CTL, PF_CTL, #0x10
- vsri.u16 q14, q9, #11
-- ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- vraddhn.u16 d20, q10, q13
- vraddhn.u16 d23, q11, q3
-- ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- vraddhn.u16 d22, q12, q15
- vst1.16 {d28, d29}, [DST_W, :128]!
- .endm
-@@ -323,20 +323,20 @@ generate_composite_function \
-
- .macro pixman_composite_src_8888_0565_process_pixblock_tail_head
- vsri.u16 q14, q8, #5
-- add PF_X, PF_X, #8
-- tst PF_CTL, #0xF
-+ PF add PF_X, PF_X, #8
-+ PF tst PF_CTL, #0xF
- vld4.8 {d0, d1, d2, d3}, [SRC]!
-- addne PF_X, PF_X, #8
-- subne PF_CTL, PF_CTL, #1
-+ PF addne PF_X, PF_X, #8
-+ PF subne PF_CTL, PF_CTL, #1
- vsri.u16 q14, q9, #11
-- cmp PF_X, ORIG_W
-- pld [PF_SRC, PF_X, lsl #src_bpp_shift]
-+ PF cmp PF_X, ORIG_W
-+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- vshll.u8 q8, d1, #8
- vst1.16 {d28, d29}, [DST_W, :128]!
-- subge PF_X, PF_X, ORIG_W
-- subges PF_CTL, PF_CTL, #0x10
-+ PF subge PF_X, PF_X, ORIG_W
-+ PF subges PF_CTL, PF_CTL, #0x10
- vshll.u8 q14, d2, #8
-- ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- vshll.u8 q9, d0, #8
- .endm
-
-@@ -363,20 +363,20 @@ generate_composite_function \
-
- .macro pixman_composite_add_8000_8000_process_pixblock_tail_head
- vld1.8 {d0, d1, d2, d3}, [SRC]!
-- add PF_X, PF_X, #32
-- tst PF_CTL, #0xF
-+ PF add PF_X, PF_X, #32
-+ PF tst PF_CTL, #0xF
- vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
-- addne PF_X, PF_X, #32
-- subne PF_CTL, PF_CTL, #1
-+ PF addne PF_X, PF_X, #32
-+ PF subne PF_CTL, PF_CTL, #1
- vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
-- cmp PF_X, ORIG_W
-- pld [PF_SRC, PF_X, lsl #src_bpp_shift]
-- pld [PF_DST, PF_X, lsl #dst_bpp_shift]
-- subge PF_X, PF_X, ORIG_W
-- subges PF_CTL, PF_CTL, #0x10
-+ PF cmp PF_X, ORIG_W
-+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-+ PF subge PF_X, PF_X, ORIG_W
-+ PF subges PF_CTL, PF_CTL, #0x10
- vqadd.u8 q14, q0, q2
-- ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-- ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- vqadd.u8 q15, q1, q3
- .endm
-
-@@ -418,32 +418,32 @@ generate_composite_function \
- .macro pixman_composite_over_8888_8888_process_pixblock_tail_head
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- vrshr.u16 q14, q8, #8
-- add PF_X, PF_X, #8
-- tst PF_CTL, #0xF
-+ PF add PF_X, PF_X, #8
-+ PF tst PF_CTL, #0xF
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
-- addne PF_X, PF_X, #8
-- subne PF_CTL, PF_CTL, #1
-+ PF addne PF_X, PF_X, #8
-+ PF subne PF_CTL, PF_CTL, #1
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
-- cmp PF_X, ORIG_W
-+ PF cmp PF_X, ORIG_W
- vraddhn.u16 d30, q12, q10
- vraddhn.u16 d31, q13, q11
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
- vld4.8 {d0, d1, d2, d3}, [SRC]!
-- pld [PF_SRC, PF_X, lsl #src_bpp_shift]
-+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- vmvn.8 d22, d3
-- pld [PF_DST, PF_X, lsl #dst_bpp_shift]
-+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
-- subge PF_X, PF_X, ORIG_W
-+ PF subge PF_X, PF_X, ORIG_W
- vmull.u8 q8, d22, d4
-- subges PF_CTL, PF_CTL, #0x10
-+ PF subges PF_CTL, PF_CTL, #0x10
- vmull.u8 q9, d22, d5
-- ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- vmull.u8 q10, d22, d6
-- ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- vmull.u8 q11, d22, d7
- .endm
-
-diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
-index d276ab9..a2941ae 100644
---- a/pixman/pixman-arm-neon-asm.h
-+++ b/pixman/pixman-arm-neon-asm.h
-@@ -58,6 +58,11 @@
- #define RESPECT_STRICT_ALIGNMENT 1
-
- /*
-+ * If set to nonzero value, prefetch is globally disabled
-+ */
-+#define PREFETCH_GLOBALLY_DISABLED 0
-+
-+/*
- * Definitions of supplementary pixld/pixst macros (for partial load/store of
- * pixel data)
- */
-@@ -218,37 +223,43 @@
- * pixels processing like simple copy. Anyway, having prefetch is a must
- * when working with graphics data.
- */
-+.macro PF a, x:vararg
-+.if (ADVANCED_PREFETCH_ENABLED != 0) && (PREFETCH_GLOBALLY_DISABLED == 0)
-+ a x
-+.endif
-+.endm
-+
- .macro cache_preload std_increment, boost_increment
- .if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0)
- .if regs_shortage
-- ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */
-+ PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */
- .endif
- .if std_increment != 0
-- add PF_X, PF_X, #std_increment
-+ PF add PF_X, PF_X, #std_increment
- .endif
-- tst PF_CTL, #0xF
-- addne PF_X, PF_X, #boost_increment
-- subne PF_CTL, PF_CTL, #1
-- cmp PF_X, ORIG_W
-+ PF tst PF_CTL, #0xF
-+ PF addne PF_X, PF_X, #boost_increment
-+ PF subne PF_CTL, PF_CTL, #1
-+ PF cmp PF_X, ORIG_W
- .if src_bpp_shift >= 0
-- pld [PF_SRC, PF_X, lsl #src_bpp_shift]
-+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- .endif
- .if dst_r_bpp != 0
-- pld [PF_DST, PF_X, lsl #dst_bpp_shift]
-+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- .endif
- .if mask_bpp_shift >= 0
-- pld [PF_MASK, PF_X, lsl #mask_bpp_shift]
-+ PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift]
- .endif
-- subge PF_X, PF_X, ORIG_W
-- subges PF_CTL, PF_CTL, #0x10
-+ PF subge PF_X, PF_X, ORIG_W
-+ PF subges PF_CTL, PF_CTL, #0x10
- .if src_bpp_shift >= 0
-- ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- .endif
- .if dst_r_bpp != 0
-- ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- .endif
- .if mask_bpp_shift >= 0
-- ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
-+ PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
- .endif
- .endif
- .endm
-@@ -297,6 +308,12 @@ fname:
- PF_DST .req r12
- PF_MASK .req r14
-
-+.if prefetch_distance == 0
-+ .set ADVANCED_PREFETCH_ENABLED, 0
-+.else
-+ .set ADVANCED_PREFETCH_ENABLED, 1
-+.endif
-+
- .if mask_bpp == 0
- ORIG_W .req r7 /* saved original width */
- DUMMY .req r8 /* temporary register */
-@@ -374,12 +391,12 @@ fname:
- ldr MASK_STRIDE, [sp, #52]
- .endif
- mov DST_R, DST_W
-- mov PF_SRC, SRC
-- mov PF_DST, DST_R
-- mov PF_MASK, MASK
-- mov PF_CTL, H, lsl #4
-- /* pf_ctl = 10 | ((h - 1) << 4) */
-- add PF_CTL, #(prefetch_distance - 0x10)
-+ PF mov PF_SRC, SRC
-+ PF mov PF_DST, DST_R
-+ PF mov PF_MASK, MASK
-+ /* PF_CTL = prefetch_distance | ((h - 1) << 4) */
-+ PF mov PF_CTL, H, lsl #4
-+ PF add PF_CTL, #(prefetch_distance - 0x10)
-
- init
- .if regs_shortage
-@@ -412,7 +429,7 @@ fname:
- .else
- add DST_R, DST_R, #lowbit
- .endif
-- add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp)
-+ PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp)
- sub W, W, #(lowbit * 8 / dst_w_bpp)
- 1:
- .endif
-@@ -444,7 +461,7 @@ fname:
- (src_basereg - pixblock_size * src_bpp / 64), SRC
- pixld pixblock_size, mask_bpp, \
- (mask_basereg - pixblock_size * mask_bpp / 64), MASK
-- add PF_X, PF_X, #pixblock_size
-+ PF add PF_X, PF_X, #pixblock_size
- process_pixblock_head
- cache_preload 0, pixblock_size
- subs W, W, #(pixblock_size * 2)
-@@ -468,7 +485,7 @@ fname:
- pixld chunk_size, src_bpp, src_basereg, SRC
- pixld chunk_size, mask_bpp, mask_basereg, MASK
- pixld_a chunk_size, dst_r_bpp, dst_r_basereg, DST_R
-- add PF_X, PF_X, #chunk_size
-+ PF add PF_X, PF_X, #chunk_size
- 1:
- .endif
- .endr
---
-cgit v0.8.2
diff --git a/recipes/xorg-lib/pixman/remove-broken.patch b/recipes/xorg-lib/pixman/remove-broken.patch
deleted file mode 100644
index fd025b4bbd..0000000000
--- a/recipes/xorg-lib/pixman/remove-broken.patch
+++ /dev/null
@@ -1,826 +0,0 @@
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Sun, 26 Jul 2009 22:21:26 +0000 (+0300)
-Subject: ARM: Removal of unused/broken NEON code
-X-Git-Url: http://siarhei.siamashka.name/gitweb/?p=pixman.git;a=commitdiff_plain;h=7ef2322eefcccc28a2d45c0da22c0fee88b8f464
-
-ARM: Removal of unused/broken NEON code
----
-
-diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index 4125d1b..9404c70 100644
---- a/pixman/pixman-arm-neon.c
-+++ b/pixman/pixman-arm-neon.c
-@@ -1895,710 +1895,6 @@ pixman_fill_neon (uint32_t *bits,
- #endif
- }
-
--/* TODO: is there a more generic way of doing this being introduced? */
--#define NEON_SCANLINE_BUFFER_PIXELS (1024)
--
--static inline void
--neon_quadword_copy (void * dst,
-- void * src,
-- uint32_t count, /* of quadwords */
-- uint32_t trailer_count /* of bytes */)
--{
-- uint8_t *t_dst = dst, *t_src = src;
--
-- /* Uses aligned multi-register loads to maximise read bandwidth
-- * on uncached memory such as framebuffers
-- * The accesses do not have the aligned qualifiers, so that the copy
-- * may convert between aligned-uncached and unaligned-cached memory.
-- * It is assumed that the CPU can infer alignedness from the address.
-- */
--
--#ifdef USE_GCC_INLINE_ASM
--
-- asm volatile (
-- " cmp %[count], #8 \n"
-- " blt 1f @ skip oversized fragments \n"
-- "0: @ start with eight quadwords at a time \n"
-- " sub %[count], %[count], #8 \n"
-- " vld1.8 {d16, d17, d18, d19}, [%[src]]! \n"
-- " vld1.8 {d20, d21, d22, d23}, [%[src]]! \n"
-- " vld1.8 {d24, d25, d26, d27}, [%[src]]! \n"
-- " vld1.8 {d28, d29, d30, d31}, [%[src]]! \n"
-- " cmp %[count], #8 \n"
-- " vst1.8 {d16, d17, d18, d19}, [%[dst]]! \n"
-- " vst1.8 {d20, d21, d22, d23}, [%[dst]]! \n"
-- " vst1.8 {d24, d25, d26, d27}, [%[dst]]! \n"
-- " vst1.8 {d28, d29, d30, d31}, [%[dst]]! \n"
-- " bge 0b \n"
-- "1: @ four quadwords \n"
-- " tst %[count], #4 \n"
-- " beq 2f @ skip oversized fragment \n"
-- " vld1.8 {d16, d17, d18, d19}, [%[src]]! \n"
-- " vld1.8 {d20, d21, d22, d23}, [%[src]]! \n"
-- " vst1.8 {d16, d17, d18, d19}, [%[dst]]! \n"
-- " vst1.8 {d20, d21, d22, d23}, [%[dst]]! \n"
-- "2: @ two quadwords \n"
-- " tst %[count], #2 \n"
-- " beq 3f @ skip oversized fragment \n"
-- " vld1.8 {d16, d17, d18, d19}, [%[src]]! \n"
-- " vst1.8 {d16, d17, d18, d19}, [%[dst]]! \n"
-- "3: @ one quadword \n"
-- " tst %[count], #1 \n"
-- " beq 4f @ skip oversized fragment \n"
-- " vld1.8 {d16, d17}, [%[src]]! \n"
-- " vst1.8 {d16, d17}, [%[dst]]! \n"
-- "4: @ end \n"
--
-- /* Clobbered input registers marked as input/outputs */
-- : [dst] "+r" (t_dst), [src] "+r" (t_src), [count] "+r" (count)
--
-- /* No unclobbered inputs */
-- :
--
-- /* Clobbered vector registers */
-- : "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25",
-- "d26", "d27", "d28", "d29", "d30", "d31", "cc", "memory");
--
--#else
--
-- while (count >= 8)
-- {
-- uint8x16x4_t t1 = vld4q_u8 (t_src);
-- uint8x16x4_t t2 = vld4q_u8 (t_src + sizeof(uint8x16x4_t));
--
-- t_src += sizeof(uint8x16x4_t) * 2;
-- vst4q_u8 (t_dst, t1);
-- vst4q_u8 (t_dst + sizeof(uint8x16x4_t), t2);
-- t_dst += sizeof(uint8x16x4_t) * 2;
-- count -= 8;
-- }
--
-- if (count & 4)
-- {
-- uint8x16x4_t t1 = vld4q_u8 (t_src);
--
-- t_src += sizeof(uint8x16x4_t);
-- vst4q_u8 (t_dst, t1);
-- t_dst += sizeof(uint8x16x4_t);
-- }
--
-- if (count & 2)
-- {
-- uint8x8x4_t t1 = vld4_u8 (t_src);
--
-- t_src += sizeof(uint8x8x4_t);
-- vst4_u8 (t_dst, t1);
-- t_dst += sizeof(uint8x8x4_t);
-- }
--
-- if (count & 1)
-- {
-- uint8x16_t t1 = vld1q_u8 (t_src);
--
-- t_src += sizeof(uint8x16_t);
-- vst1q_u8 (t_dst, t1);
-- t_dst += sizeof(uint8x16_t);
-- }
--
--#endif /* !USE_GCC_INLINE_ASM */
--
-- if (trailer_count)
-- {
-- if (trailer_count & 8)
-- {
-- uint8x8_t t1 = vld1_u8 (t_src);
--
-- t_src += sizeof(uint8x8_t);
-- vst1_u8 (t_dst, t1);
-- t_dst += sizeof(uint8x8_t);
-- }
--
-- if (trailer_count & 4)
-- {
-- *((uint32_t*) t_dst) = *((uint32_t*) t_src);
--
-- t_dst += 4;
-- t_src += 4;
-- }
--
-- if (trailer_count & 2)
-- {
-- *((uint16_t*) t_dst) = *((uint16_t*) t_src);
--
-- t_dst += 2;
-- t_src += 2;
-- }
--
-- if (trailer_count & 1)
-- {
-- *t_dst++ = *t_src++;
-- }
-- }
--}
--
--static inline void
--solid_over_565_8_pix_neon (uint32_t glyph_colour,
-- uint16_t *dest,
-- uint8_t * in_mask,
-- uint32_t dest_stride, /* bytes, not elements */
-- uint32_t mask_stride,
-- uint32_t count /* 8-pixel groups */)
--{
-- /* Inner loop of glyph blitter (solid colour, alpha mask) */
--
--#ifdef USE_GCC_INLINE_ASM
--
-- asm volatile (
-- " vld4.8 {d20[], d21[], d22[], d23[]}, [%[glyph_colour]] @ splat solid colour components \n"
-- "0: @ loop \n"
-- " vld1.16 {d0, d1}, [%[dest]] @ load first pixels from framebuffer \n"
-- " vld1.8 {d17}, [%[in_mask]] @ load alpha mask of glyph \n"
-- " vmull.u8 q9, d17, d23 @ apply glyph colour alpha to mask \n"
-- " vshrn.u16 d17, q9, #8 @ reformat it to match original mask \n"
-- " vmvn d18, d17 @ we need the inverse mask for the background \n"
-- " vsli.u16 q3, q0, #5 @ duplicate framebuffer blue bits \n"
-- " vshrn.u16 d2, q0, #8 @ unpack red from framebuffer pixels \n"
-- " vshrn.u16 d4, q0, #3 @ unpack green \n"
-- " vsri.u8 d2, d2, #5 @ duplicate red bits (extend 5 to 8) \n"
-- " vshrn.u16 d6, q3, #2 @ unpack extended blue (truncate 10 to 8) \n"
-- " vsri.u8 d4, d4, #6 @ duplicate green bits (extend 6 to 8) \n"
-- " vmull.u8 q1, d2, d18 @ apply inverse mask to background red... \n"
-- " vmull.u8 q2, d4, d18 @ ...green... \n"
-- " vmull.u8 q3, d6, d18 @ ...blue \n"
-- " subs %[count], %[count], #1 @ decrement/test loop counter \n"
-- " vmlal.u8 q1, d17, d22 @ add masked foreground red... \n"
-- " vmlal.u8 q2, d17, d21 @ ...green... \n"
-- " vmlal.u8 q3, d17, d20 @ ...blue \n"
-- " add %[in_mask], %[in_mask], %[mask_stride] @ advance mask pointer, while we wait \n"
-- " vsri.16 q1, q2, #5 @ pack green behind red \n"
-- " vsri.16 q1, q3, #11 @ pack blue into pixels \n"
-- " vst1.16 {d2, d3}, [%[dest]] @ store composited pixels \n"
-- " add %[dest], %[dest], %[dest_stride] @ advance framebuffer pointer \n"
-- " bne 0b @ next please \n"
--
-- /* Clobbered registers marked as input/outputs */
-- : [dest] "+r" (dest), [in_mask] "+r" (in_mask), [count] "+r" (count)
--
-- /* Inputs */
-- : [dest_stride] "r" (dest_stride), [mask_stride] "r" (mask_stride), [glyph_colour] "r" (&glyph_colour)
--
-- /* Clobbers, including the inputs we modify, and potentially lots of memory */
-- : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d17", "d18", "d19",
-- "d20", "d21", "d22", "d23", "d24", "d25", "cc", "memory"
-- );
--
--#else
--
-- uint8x8x4_t solid_colour = vld4_dup_u8 ((uint8_t*) &glyph_colour);
--
-- while (count--)
-- {
-- uint16x8_t pixels = vld1q_u16 (dest);
-- uint8x8_t mask = vshrn_n_u16 (vmull_u8 (solid_colour.val[3], vld1_u8 (in_mask)), 8);
-- uint8x8_t mask_image = vmvn_u8 (mask);
--
-- uint8x8_t t_red = vshrn_n_u16 (pixels, 8);
-- uint8x8_t t_green = vshrn_n_u16 (pixels, 3);
-- uint8x8_t t_blue = vshrn_n_u16 (vsli_n_u8 (pixels, pixels, 5), 2);
--
-- uint16x8_t s_red = vmull_u8 (vsri_n_u8 (t_red, t_red, 5), mask_image);
-- uint16x8_t s_green = vmull_u8 (vsri_n_u8 (t_green, t_green, 6), mask_image);
-- uint16x8_t s_blue = vmull_u8 (t_blue, mask_image);
--
-- s_red = vmlal (s_red, mask, solid_colour.val[2]);
-- s_green = vmlal (s_green, mask, solid_colour.val[1]);
-- s_blue = vmlal (s_blue, mask, solid_colour.val[0]);
--
-- pixels = vsri_n_u16 (s_red, s_green, 5);
-- pixels = vsri_n_u16 (pixels, s_blue, 11);
-- vst1q_u16 (dest, pixels);
--
-- dest += dest_stride;
-- mask += mask_stride;
-- }
--
--#endif
--}
--
--#if 0 /* this is broken currently */
--static void
--neon_composite_over_n_8_0565 (pixman_implementation_t * impl,
-- pixman_op_t op,
-- pixman_image_t * src_image,
-- pixman_image_t * mask_image,
-- pixman_image_t * dst_image,
-- int32_t src_x,
-- int32_t src_y,
-- int32_t mask_x,
-- int32_t mask_y,
-- int32_t dest_x,
-- int32_t dest_y,
-- int32_t width,
-- int32_t height)
--{
-- uint32_t src, srca;
-- uint16_t *dst_line, *aligned_line;
-- uint8_t *mask_line;
-- uint32_t dst_stride, mask_stride;
-- uint32_t kernel_count, copy_count, copy_tail;
-- uint8_t kernel_offset, copy_offset;
--
-- src = _pixman_image_get_solid (src_image, dst_image->bits.format);
--
-- /* bail out if fully transparent or degenerate */
-- srca = src >> 24;
-- if (src == 0)
-- return;
--
-- if (width == 0 || height == 0)
-- return;
--
-- if (width > NEON_SCANLINE_BUFFER_PIXELS)
-- {
-- /* split the blit, so we can use a fixed-size scanline buffer
-- * TODO: there must be a more elegant way of doing this.
-- */
-- int x;
-- for (x = 0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS)
-- {
-- neon_composite_over_n_8_0565 (
-- impl, op,
-- src_image, mask_image, dst_image,
-- src_x + x, src_y, mask_x + x, mask_y, dest_x + x, dest_y,
-- (x + NEON_SCANLINE_BUFFER_PIXELS > width) ? width - x : NEON_SCANLINE_BUFFER_PIXELS, height);
-- }
--
-- return;
-- }
--
-- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
--
-- /* keep within minimum number of aligned quadwords on width
-- * while also keeping the minimum number of columns to process
-- */
-- {
-- unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF;
-- unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF;
-- unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF;
--
-- /* the fast copy should be quadword aligned */
-- copy_offset = dst_line - ((uint16_t*) aligned_left);
-- aligned_line = dst_line - copy_offset;
-- copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4);
-- copy_tail = 0;
--
-- if (aligned_right - aligned_left > ceiling_length)
-- {
-- /* unaligned routine is tightest */
-- kernel_count = (uint32_t) (ceiling_length >> 4);
-- kernel_offset = copy_offset;
-- }
-- else
-- {
-- /* aligned routine is equally tight, so it is safer to align */
-- kernel_count = copy_count;
-- kernel_offset = 0;
-- }
--
-- /* We should avoid reading beyond scanline ends for safety */
-- if (aligned_line < (dst_line - dest_x) ||
-- (aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width))
-- {
-- /* switch to precise read */
-- copy_offset = kernel_offset = 0;
-- aligned_line = dst_line;
-- kernel_count = (uint32_t) (ceiling_length >> 4);
-- copy_count = (width * sizeof(*dst_line)) >> 4;
-- copy_tail = (width * sizeof(*dst_line)) & 0xF;
-- }
-- }
--
-- {
-- uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8]; /* deliberately not initialised */
-- uint8_t glyph_line[NEON_SCANLINE_BUFFER_PIXELS + 8];
-- int y = height;
--
-- /* row-major order */
-- /* left edge, middle block, right edge */
-- for ( ; y--; mask_line += mask_stride, aligned_line += dst_stride, dst_line += dst_stride)
-- {
-- /* We don't want to overrun the edges of the glyph,
-- * so realign the edge data into known buffers
-- */
-- neon_quadword_copy (glyph_line + copy_offset, mask_line, width >> 4, width & 0xF);
--
-- /* Uncached framebuffer access is really, really slow
-- * if we do it piecemeal. It should be much faster if we
-- * grab it all at once. One scanline should easily fit in
-- * L1 cache, so this should not waste RAM bandwidth.
-- */
-- neon_quadword_copy (scan_line, aligned_line, copy_count, copy_tail);
--
-- /* Apply the actual filter */
-- solid_over_565_8_pix_neon (
-- src, scan_line + kernel_offset,
-- glyph_line + kernel_offset, 8 * sizeof(*dst_line),
-- 8, kernel_count);
--
-- /* Copy the modified scanline back */
-- neon_quadword_copy (dst_line, scan_line + copy_offset,
-- width >> 3, (width & 7) * 2);
-- }
-- }
--}
--#endif
--
--#ifdef USE_GCC_INLINE_ASM
--
--static inline void
--plain_over_565_8_pix_neon (uint32_t colour,
-- uint16_t *dest,
-- uint32_t dest_stride, /* bytes, not elements */
-- uint32_t count /* 8-pixel groups */)
--{
-- /* Inner loop for plain translucent rects
-- * (solid colour without alpha mask)
-- */
-- asm volatile (
-- " vld4.8 {d20[], d21[], d22[], d23[]}, [%[colour]] @ solid colour load/splat \n"
-- " vmull.u8 q12, d23, d22 @ premultiply alpha red \n"
-- " vmull.u8 q13, d23, d21 @ premultiply alpha green \n"
-- " vmull.u8 q14, d23, d20 @ premultiply alpha blue \n"
-- " vmvn d18, d23 @ inverse alpha for background \n"
-- "0: @ loop\n"
-- " vld1.16 {d0, d1}, [%[dest]] @ load first pixels from framebuffer \n"
-- " vshrn.u16 d2, q0, #8 @ unpack red from framebuffer pixels \n"
-- " vshrn.u16 d4, q0, #3 @ unpack green \n"
-- " vsli.u16 q3, q0, #5 @ duplicate framebuffer blue bits \n"
-- " vsri.u8 d2, d2, #5 @ duplicate red bits (extend 5 to 8) \n"
-- " vsri.u8 d4, d4, #6 @ duplicate green bits (extend 6 to 8) \n"
-- " vshrn.u16 d6, q3, #2 @ unpack extended blue (truncate 10 to 8) \n"
-- " vmov q0, q12 @ retrieve foreground red \n"
-- " vmlal.u8 q0, d2, d18 @ blend red - my kingdom for a four-operand MLA \n"
-- " vmov q1, q13 @ retrieve foreground green \n"
-- " vmlal.u8 q1, d4, d18 @ blend green \n"
-- " vmov q2, q14 @ retrieve foreground blue \n"
-- " vmlal.u8 q2, d6, d18 @ blend blue \n"
-- " subs %[count], %[count], #1 @ decrement/test loop counter \n"
-- " vsri.16 q0, q1, #5 @ pack green behind red \n"
-- " vsri.16 q0, q2, #11 @ pack blue into pixels \n"
-- " vst1.16 {d0, d1}, [%[dest]] @ store composited pixels \n"
-- " add %[dest], %[dest], %[dest_stride] @ advance framebuffer pointer \n"
-- " bne 0b @ next please \n"
--
-- /* Clobbered registers marked as input/outputs */
-- : [dest] "+r" (dest), [count] "+r" (count)
--
-- /* Inputs */
-- : [dest_stride] "r" (dest_stride), [colour] "r" (&colour)
--
-- /* Clobbers, including the inputs we modify, and
-- * potentially lots of memory
-- */
-- : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d18", "d19",
-- "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29",
-- "cc", "memory"
-- );
--}
--
--static void
--neon_composite_over_n_0565 (pixman_implementation_t * impl,
-- pixman_op_t op,
-- pixman_image_t * src_image,
-- pixman_image_t * mask_image,
-- pixman_image_t * dst_image,
-- int32_t src_x,
-- int32_t src_y,
-- int32_t mask_x,
-- int32_t mask_y,
-- int32_t dest_x,
-- int32_t dest_y,
-- int32_t width,
-- int32_t height)
--{
-- uint32_t src, srca;
-- uint16_t *dst_line, *aligned_line;
-- uint32_t dst_stride;
-- uint32_t kernel_count, copy_count, copy_tail;
-- uint8_t kernel_offset, copy_offset;
--
-- src = _pixman_image_get_solid (src_image, dst_image->bits.format);
--
-- /* bail out if fully transparent */
-- srca = src >> 24;
-- if (src == 0)
-- return;
--
-- if (width == 0 || height == 0)
-- return;
--
-- if (width > NEON_SCANLINE_BUFFER_PIXELS)
-- {
-- /* split the blit, so we can use a fixed-size scanline buffer *
-- * TODO: there must be a more elegant way of doing this.
-- */
-- int x;
--
-- for (x = 0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS)
-- {
-- neon_composite_over_n_0565 (
-- impl, op,
-- src_image, mask_image, dst_image,
-- src_x + x, src_y, mask_x + x, mask_y, dest_x + x, dest_y,
-- (x + NEON_SCANLINE_BUFFER_PIXELS > width) ? width - x : NEON_SCANLINE_BUFFER_PIXELS, height);
-- }
-- return;
-- }
--
-- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
--
-- /* keep within minimum number of aligned quadwords on width
-- * while also keeping the minimum number of columns to process
-- */
-- {
-- unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF;
-- unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF;
-- unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF;
--
-- /* the fast copy should be quadword aligned */
-- copy_offset = dst_line - ((uint16_t*) aligned_left);
-- aligned_line = dst_line - copy_offset;
-- copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4);
-- copy_tail = 0;
--
-- if (aligned_right - aligned_left > ceiling_length)
-- {
-- /* unaligned routine is tightest */
-- kernel_count = (uint32_t) (ceiling_length >> 4);
-- kernel_offset = copy_offset;
-- }
-- else
-- {
-- /* aligned routine is equally tight, so it is safer to align */
-- kernel_count = copy_count;
-- kernel_offset = 0;
-- }
--
-- /* We should avoid reading beyond scanline ends for safety */
-- if (aligned_line < (dst_line - dest_x) ||
-- (aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width))
-- {
-- /* switch to precise read */
-- copy_offset = kernel_offset = 0;
-- aligned_line = dst_line;
-- kernel_count = (uint32_t) (ceiling_length >> 4);
-- copy_count = (width * sizeof(*dst_line)) >> 4;
-- copy_tail = (width * sizeof(*dst_line)) & 0xF;
-- }
-- }
--
-- {
-- uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8]; /* deliberately not initialised */
--
-- /* row-major order */
-- /* left edge, middle block, right edge */
-- for ( ; height--; aligned_line += dst_stride, dst_line += dst_stride)
-- {
-- /* Uncached framebuffer access is really, really slow if we do it piecemeal.
-- * It should be much faster if we grab it all at once.
-- * One scanline should easily fit in L1 cache, so this should
-- * not waste RAM bandwidth.
-- */
-- neon_quadword_copy (scan_line, aligned_line, copy_count, copy_tail);
--
-- /* Apply the actual filter */
-- plain_over_565_8_pix_neon (
-- src, scan_line + kernel_offset, 8 * sizeof(*dst_line), kernel_count);
--
-- /* Copy the modified scanline back */
-- neon_quadword_copy (
-- dst_line, scan_line + copy_offset, width >> 3, (width & 7) * 2);
-- }
-- }
--}
--
--static inline void
--ARGB8_over_565_8_pix_neon (uint32_t *src,
-- uint16_t *dest,
-- uint32_t src_stride, /* bytes, not elements */
-- uint32_t count /* 8-pixel groups */)
--{
-- asm volatile (
-- "0: @ loop\n"
-- " pld [%[src], %[src_stride]] @ preload from next scanline \n"
-- " vld1.16 {d0, d1}, [%[dest]] @ load pixels from framebuffer \n"
-- " vld4.8 {d20, d21, d22, d23},[%[src]]! @ load source image pixels \n"
-- " vsli.u16 q3, q0, #5 @ duplicate framebuffer blue bits \n"
-- " vshrn.u16 d2, q0, #8 @ unpack red from framebuffer pixels \n"
-- " vshrn.u16 d4, q0, #3 @ unpack green \n"
-- " vmvn d18, d23 @ we need the inverse alpha for the background \n"
-- " vsri.u8 d2, d2, #5 @ duplicate red bits (extend 5 to 8) \n"
-- " vshrn.u16 d6, q3, #2 @ unpack extended blue (truncate 10 to 8) \n"
-- " vsri.u8 d4, d4, #6 @ duplicate green bits (extend 6 to 8) \n"
-- " vmull.u8 q1, d2, d18 @ apply inverse alpha to background red... \n"
-- " vmull.u8 q2, d4, d18 @ ...green... \n"
-- " vmull.u8 q3, d6, d18 @ ...blue \n"
-- " subs %[count], %[count], #1 @ decrement/test loop counter \n"
-- " vmlal.u8 q1, d23, d22 @ add blended foreground red... \n"
-- " vmlal.u8 q2, d23, d21 @ ...green... \n"
-- " vmlal.u8 q3, d23, d20 @ ...blue \n"
-- " vsri.16 q1, q2, #5 @ pack green behind red \n"
-- " vsri.16 q1, q3, #11 @ pack blue into pixels \n"
-- " vst1.16 {d2, d3}, [%[dest]]! @ store composited pixels \n"
-- " bne 0b @ next please \n"
--
-- /* Clobbered registers marked as input/outputs */
-- : [dest] "+r" (dest), [src] "+r" (src), [count] "+r" (count)
--
-- /* Inputs */
-- : [src_stride] "r" (src_stride)
--
-- /* Clobbers, including the inputs we modify, and potentially lots of memory */
-- : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d17", "d18", "d20",
-- "d21", "d22", "d23", "cc", "memory"
-- );
--}
--
--static void
--neon_composite_over_8888_0565 (pixman_implementation_t * impl,
-- pixman_op_t op,
-- pixman_image_t * src_image,
-- pixman_image_t * mask_image,
-- pixman_image_t * dst_image,
-- int32_t src_x,
-- int32_t src_y,
-- int32_t mask_x,
-- int32_t mask_y,
-- int32_t dest_x,
-- int32_t dest_y,
-- int32_t width,
-- int32_t height)
--{
-- uint32_t *src_line;
-- uint16_t *dst_line, *aligned_line;
-- uint32_t dst_stride, src_stride;
-- uint32_t kernel_count, copy_count, copy_tail;
-- uint8_t kernel_offset, copy_offset;
--
-- /* we assume mask is opaque
-- * so the only alpha to deal with is embedded in src
-- */
-- if (width > NEON_SCANLINE_BUFFER_PIXELS)
-- {
-- /* split the blit, so we can use a fixed-size scanline buffer */
-- int x;
-- for (x = 0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS)
-- {
-- neon_composite_over_8888_0565 (
-- impl, op,
-- src_image, mask_image, dst_image,
-- src_x + x, src_y, mask_x + x, mask_y, dest_x + x, dest_y,
-- (x + NEON_SCANLINE_BUFFER_PIXELS > width) ? width - x : NEON_SCANLINE_BUFFER_PIXELS, height);
-- }
-- return;
-- }
--
-- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
--
-- /* keep within minimum number of aligned quadwords on width
-- * while also keeping the minimum number of columns to process
-- */
-- {
-- unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF;
-- unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF;
-- unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF;
--
-- /* the fast copy should be quadword aligned */
-- copy_offset = dst_line - ((uint16_t*) aligned_left);
-- aligned_line = dst_line - copy_offset;
-- copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4);
-- copy_tail = 0;
--
-- if (aligned_right - aligned_left > ceiling_length)
-- {
-- /* unaligned routine is tightest */
-- kernel_count = (uint32_t) (ceiling_length >> 4);
-- kernel_offset = copy_offset;
-- }
-- else
-- {
-- /* aligned routine is equally tight, so it is safer to align */
-- kernel_count = copy_count;
-- kernel_offset = 0;
-- }
--
-- /* We should avoid reading beyond scanline ends for safety */
-- if (aligned_line < (dst_line - dest_x) ||
-- (aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width))
-- {
-- /* switch to precise read */
-- copy_offset = kernel_offset = 0;
-- aligned_line = dst_line;
-- kernel_count = (uint32_t) (ceiling_length >> 4);
-- copy_count = (width * sizeof(*dst_line)) >> 4;
-- copy_tail = (width * sizeof(*dst_line)) & 0xF;
-- }
-- }
--
-- /* Preload the first input scanline */
-- {
-- uint8_t *src_ptr = (uint8_t*) src_line;
-- uint32_t count = (width + 15) / 16;
--
--#ifdef USE_GCC_INLINE_ASM
-- asm volatile (
-- "0: @ loop \n"
-- " subs %[count], %[count], #1 \n"
-- " pld [%[src]] \n"
-- " add %[src], %[src], #64 \n"
-- " bgt 0b \n"
--
-- /* Clobbered input registers marked as input/outputs */
-- : [src] "+r" (src_ptr), [count] "+r" (count)
-- : /* no unclobbered inputs */
-- : "cc"
-- );
--#else
-- do
-- {
-- __pld (src_ptr);
-- src_ptr += 64;
-- }
-- while (--count);
--#endif
-- }
--
-- {
-- uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8]; /* deliberately not initialised */
--
-- /* row-major order */
-- /* left edge, middle block, right edge */
-- for ( ; height--; src_line += src_stride, aligned_line += dst_stride)
-- {
-- /* Uncached framebuffer access is really, really slow if we do
-- * it piecemeal. It should be much faster if we grab it all at
-- * once. One scanline should easily fit in L1 cache, so this
-- * should not waste RAM bandwidth.
-- */
-- neon_quadword_copy (scan_line, aligned_line, copy_count, copy_tail);
--
-- /* Apply the actual filter */
-- ARGB8_over_565_8_pix_neon (
-- src_line, scan_line + kernel_offset,
-- src_stride * sizeof(*src_line), kernel_count);
--
-- /* Copy the modified scanline back */
-- neon_quadword_copy (dst_line,
-- scan_line + copy_offset,
-- width >> 3, (width & 7) * 2);
-- }
-- }
--}
--
--#endif /* USE_GCC_INLINE_ASM */
--
- static const pixman_fast_path_t arm_neon_fast_path_array[] =
- {
- { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, neon_composite_add_8888_8_8, 0 },
-@@ -2612,12 +1908,6 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
- #ifdef USE_GCC_INLINE_ASM
- { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_src_16_16, 0 },
- { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_src_16_16, 0 },
--#if 0 /* this code has some bugs */
-- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_n_0565, 0 },
-- { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_over_n_0565, 0 },
-- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_8888_0565, 0 },
-- { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, neon_composite_over_8888_0565, 0 },
--#endif
- #endif
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, neon_composite_over_8888_8888, 0 },
- { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, neon_composite_over_8888_8888, 0 },
-@@ -2668,79 +1958,6 @@ arm_neon_composite (pixman_implementation_t *imp,
- }
-
- static pixman_bool_t
--pixman_blt_neon (void *src_bits,
-- void *dst_bits,
-- int src_stride,
-- int dst_stride,
-- int src_bpp,
-- int dst_bpp,
-- int src_x,
-- int src_y,
-- int dst_x,
-- int dst_y,
-- int width,
-- int height)
--{
-- if (!width || !height)
-- return TRUE;
--
-- /* accelerate only straight copies involving complete bytes */
-- if (src_bpp != dst_bpp || (src_bpp & 7))
-- return FALSE;
--
-- {
-- uint32_t bytes_per_pixel = src_bpp >> 3;
-- uint32_t byte_width = width * bytes_per_pixel;
-- /* parameter is in words for some reason */
-- int32_t src_stride_bytes = src_stride * 4;
-- int32_t dst_stride_bytes = dst_stride * 4;
-- uint8_t *src_bytes = ((uint8_t*) src_bits) +
-- src_y * src_stride_bytes + src_x * bytes_per_pixel;
-- uint8_t *dst_bytes = ((uint8_t*) dst_bits) +
-- dst_y * dst_stride_bytes + dst_x * bytes_per_pixel;
-- uint32_t quadword_count = byte_width / 16;
-- uint32_t offset = byte_width % 16;
--
-- while (height--)
-- {
-- neon_quadword_copy (dst_bytes, src_bytes, quadword_count, offset);
-- src_bytes += src_stride_bytes;
-- dst_bytes += dst_stride_bytes;
-- }
-- }
--
-- return TRUE;
--}
--
--static pixman_bool_t
--arm_neon_blt (pixman_implementation_t *imp,
-- uint32_t * src_bits,
-- uint32_t * dst_bits,
-- int src_stride,
-- int dst_stride,
-- int src_bpp,
-- int dst_bpp,
-- int src_x,
-- int src_y,
-- int dst_x,
-- int dst_y,
-- int width,
-- int height)
--{
-- if (pixman_blt_neon (
-- src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
-- src_x, src_y, dst_x, dst_y, width, height))
-- {
-- return TRUE;
-- }
--
-- return _pixman_implementation_blt (
-- imp->delegate,
-- src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
-- src_x, src_y, dst_x, dst_y, width, height);
--}
--
--static pixman_bool_t
- arm_neon_fill (pixman_implementation_t *imp,
- uint32_t * bits,
- int stride,
-@@ -2765,9 +1982,6 @@ _pixman_implementation_create_arm_neon (void)
- pixman_implementation_t *imp = _pixman_implementation_create (simd);
-
- imp->composite = arm_neon_composite;
--#if 0 /* this code has some bugs */
-- imp->blt = arm_neon_blt;
--#endif
- imp->fill = arm_neon_fill;
-
- return imp;
diff --git a/recipes/xorg-lib/pixman/src-8888-0565.patch b/recipes/xorg-lib/pixman/src-8888-0565.patch
deleted file mode 100644
index c544225f65..0000000000
--- a/recipes/xorg-lib/pixman/src-8888-0565.patch
+++ /dev/null
@@ -1,324 +0,0 @@
-From 6494f9ae8820078d0e6109bf8f294156f7a5da4c Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Fri, 05 Mar 2010 00:40:34 +0000
-Subject: ARM: added 'armv6_composite_src_8888_0565' fast path
-
-Provides ~3x performance improvement when working with
-data in L1 cache, and ~80% performace improvement when working
-with memory. This fast path is important for 32bpp -> 16bpp
-color format conversion and is commonly used with 16bpp desktop.
-
-Microbenchmark from N800 (ARM11 @ 400MHz), measured in MPix/s:
-
-before:
-
- src_8888_0565 = L1: 21.54 M: 15.62
-
-after (armv4):
-
- src_8888_0565 = L1: 45.26 M: 23.29
-
-after (armv6):
-
- src_8888_0565 = L1: 60.62 M: 28.37
----
-diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
-index c375c01..69243c1 100644
---- a/pixman/pixman-arm-simd.c
-+++ b/pixman/pixman-arm-simd.c
-@@ -604,6 +604,282 @@ armv6_composite_over_n_8_0565 (pixman_implementation_t * impl,
- dst_stride - width, mask_stride - width, height);
- }
-
-+static inline void
-+armv4_composite_src_8888_0565_asm (
-+ uint16_t *dst, uint32_t *src, int w, int dst_stride,
-+ int src_stride, int h)
-+{
-+ uint32_t a, x, y, c1F001F = 0x1F001F, cFFFF = 0xFFFF;
-+ int backup_w = w;
-+ while (h--)
-+ {
-+ w = backup_w;
-+ if (w > 0 && (uintptr_t)dst & 2)
-+ {
-+ x = *src++;
-+
-+ a = (x >> 3) & c1F001F;
-+ x &= 0xFC00;
-+ a |= a >> 5;
-+ a |= x >> 5;
-+
-+ *dst++ = a;
-+ w--;
-+ }
-+
-+ asm volatile(
-+ "subs %[w], %[w], #2\n"
-+ "blt 2f\n"
-+ "1:\n"
-+ "ldr %[x], [%[src]], #4\n"
-+ "ldr %[y], [%[src]], #4\n"
-+ "subs %[w], %[w], #2\n"
-+
-+ "and %[a], %[c1F001F], %[x], lsr #3\n"
-+ "and %[x], %[x], #0xFC00\n\n"
-+ "orr %[a], %[a], %[a], lsr #5\n"
-+ "orr %[x], %[a], %[x], lsr #5\n"
-+
-+ "and %[a], %[c1F001F], %[y], lsr #3\n"
-+ "and %[y], %[y], #0xFC00\n\n"
-+ "orr %[a], %[a], %[a], lsr #5\n"
-+ "orr %[y], %[a], %[y], lsr #5\n"
-+ /*
-+ * Writing single 32-bit value is much faster than two
-+ * separate 16-bit values for older CPUs without (efficient)
-+ * write combining, even though it costs an extra instruction.
-+ */
-+ "and %[x], %[x], %[cFFFF]\n"
-+ "orr %[x], %[x], %[y], lsl #16\n"
-+ "str %[x], [%[dst]], #4\n"
-+ "bge 1b\n"
-+ "2:\n"
-+ : [c1F001F] "+&r" (c1F001F), [cFFFF] "+&r" (cFFFF),
-+ [src] "+&r" (src), [dst] "+&r" (dst), [a] "=&r" (a),
-+ [x] "=&r" (x), [y] "=&r" (y), [w] "+&r" (w)
-+ );
-+
-+ if (w & 1)
-+ {
-+ x = *src++;
-+
-+ a = (x >> 3) & c1F001F;
-+ x = x & 0xFC00;
-+ a |= a >> 5;
-+ a |= x >> 5;
-+
-+ *dst++ = a;
-+ }
-+
-+ src += src_stride - backup_w;
-+ dst += dst_stride - backup_w;
-+ }
-+}
-+
-+/*
-+ * Conversion x8r8g8b8 -> r5g6b5
-+ *
-+ * Note: 'w' must be >= 7 here
-+ */
-+static void __attribute__((naked))
-+armv6_composite_src_8888_0565_asm (
-+ uint16_t *dst, uint32_t *src, int w, int dst_stride,
-+ int src_stride, int h)
-+{
-+ asm volatile(
-+ /* define supplementary macros */
-+ ".macro cvt8888to565 PIX\n"
-+ "and A, C1F001F, \\PIX, lsr #3\n"
-+ "and \\PIX, \\PIX, #0xFC00\n\n"
-+ "orr A, A, A, lsr #5\n"
-+ "orr \\PIX, A, \\PIX, lsr #5\n"
-+ ".endm\n"
-+
-+ ".macro combine_pixels_pair PIX1, PIX2\n"
-+ /* Note: assume little endian byte order */
-+ "pkhbt \\PIX1, \\PIX1, \\PIX2, lsl #16\n"
-+ ".endm\n"
-+
-+ /* function entry, save all registers (10 words) to stack */
-+ "stmdb sp!, {r4-r11, ip, lr}\n"
-+
-+ /* define some aliases */
-+ "DST .req r0\n"
-+ "SRC .req r1\n"
-+ "W .req r2\n"
-+ "H .req r3\n"
-+
-+ "TMP1 .req r4\n"
-+ "TMP2 .req r5\n"
-+ "TMP3 .req r6\n"
-+ "TMP4 .req r7\n"
-+ "TMP5 .req r8\n"
-+ "TMP6 .req r9\n"
-+ "TMP7 .req r10\n"
-+ "TMP8 .req r11\n"
-+
-+ "C1F001F .req ip\n"
-+ "A .req lr\n"
-+
-+ "ldr TMP1, [sp, #(10*4+0)]\n" /* load src_stride */
-+ "ldr C1F001F, =0x1F001F\n"
-+ "sub r3, r3, W\n"
-+ "str r3, [sp, #(10*4+0)]\n" /* store (dst_stride-w) */
-+ "ldr r3, [sp, #(10*4+4)]\n" /* load h */
-+ "sub TMP1, TMP1, W\n"
-+ "str TMP1, [sp, #(10*4+4)]\n" /* store (src_stride-w) */
-+
-+ "str W, [sp, #(8*4)]\n" /* saved ip = W */
-+
-+ "0:\n"
-+ "subs H, H, #1\n"
-+ "blt 6f\n"
-+ "1:\n"
-+ /* align DST at 4 byte boundary */
-+ "tst DST, #2\n"
-+ "beq 2f\n"
-+ "ldr TMP1, [SRC], #4\n"
-+ "sub W, W, #1\n"
-+ "cvt8888to565 TMP1\n"
-+ "strh TMP1, [DST], #2\n"
-+ "2:"
-+ /* align DST at 8 byte boundary */
-+ "tst DST, #4\n"
-+ "beq 2f\n"
-+ "ldmia SRC!, {TMP1, TMP2}\n"
-+ "sub W, W, #2\n"
-+ "cvt8888to565 TMP1\n"
-+ "cvt8888to565 TMP2\n"
-+ "combine_pixels_pair TMP1, TMP2\n"
-+ "str TMP1, [DST], #4\n"
-+ "2:"
-+ /* align DST at 16 byte boundary */
-+ "tst DST, #8\n"
-+ "beq 2f\n"
-+ "ldmia SRC!, {TMP1, TMP2, TMP3, TMP4}\n"
-+ "sub W, W, #4\n"
-+ "cvt8888to565 TMP1\n"
-+ "cvt8888to565 TMP2\n"
-+ "cvt8888to565 TMP3\n"
-+ "cvt8888to565 TMP4\n"
-+ "combine_pixels_pair TMP1, TMP2\n"
-+ "combine_pixels_pair TMP3, TMP4\n"
-+ "stmia DST!, {TMP1, TMP3}\n"
-+ "2:"
-+ /* inner loop, process 8 pixels per iteration */
-+ "subs W, W, #8\n"
-+ "blt 4f\n"
-+ "3:\n"
-+ "ldmia SRC!, {TMP1, TMP2, TMP3, TMP4, TMP5, TMP6, TMP7, TMP8}\n"
-+ "subs W, W, #8\n"
-+ "cvt8888to565 TMP1\n"
-+ "cvt8888to565 TMP2\n"
-+ "cvt8888to565 TMP3\n"
-+ "cvt8888to565 TMP4\n"
-+ "cvt8888to565 TMP5\n"
-+ "cvt8888to565 TMP6\n"
-+ "cvt8888to565 TMP7\n"
-+ "cvt8888to565 TMP8\n"
-+ "combine_pixels_pair TMP1, TMP2\n"
-+ "combine_pixels_pair TMP3, TMP4\n"
-+ "combine_pixels_pair TMP5, TMP6\n"
-+ "combine_pixels_pair TMP7, TMP8\n"
-+ "stmia DST!, {TMP1, TMP3, TMP5, TMP7}\n"
-+ "bge 3b\n"
-+ "4:\n"
-+
-+ /* process the remaining pixels */
-+ "tst W, #4\n"
-+ "beq 4f\n"
-+ "ldmia SRC!, {TMP1, TMP2, TMP3, TMP4}\n"
-+ "cvt8888to565 TMP1\n"
-+ "cvt8888to565 TMP2\n"
-+ "cvt8888to565 TMP3\n"
-+ "cvt8888to565 TMP4\n"
-+ "combine_pixels_pair TMP1, TMP2\n"
-+ "combine_pixels_pair TMP3, TMP4\n"
-+ "stmia DST!, {TMP1, TMP3}\n"
-+ "4:\n"
-+ "tst W, #2\n"
-+ "beq 4f\n"
-+ "ldmia SRC!, {TMP1, TMP2}\n"
-+ "cvt8888to565 TMP1\n"
-+ "cvt8888to565 TMP2\n"
-+ "combine_pixels_pair TMP1, TMP2\n"
-+ "str TMP1, [DST], #4\n"
-+ "4:\n"
-+ "tst W, #1\n"
-+ "beq 4f\n"
-+ "ldr TMP1, [SRC], #4\n"
-+ "cvt8888to565 TMP1\n"
-+ "strh TMP1, [DST], #2\n"
-+ "4:\n"
-+ "ldr TMP1, [sp, #(10*4+0)]\n" /* (dst_stride-w) */
-+ "ldr TMP2, [sp, #(10*4+4)]\n" /* (src_stride-w) */
-+ "ldr W, [sp, #(8*4)]\n"
-+ "subs H, H, #1\n"
-+ "add DST, DST, TMP1, lsl #1\n"
-+ "add SRC, SRC, TMP2, lsl #2\n"
-+ "bge 1b\n"
-+ "6:\n"
-+ /* restore all registers and return */
-+ "ldmia sp!, {r4-r11, ip, pc}\n"
-+ ".ltorg\n"
-+
-+ ".unreq DST\n"
-+ ".unreq SRC\n"
-+ ".unreq W\n"
-+ ".unreq H\n"
-+
-+ ".unreq TMP1\n"
-+ ".unreq TMP2\n"
-+ ".unreq TMP3\n"
-+ ".unreq TMP4\n"
-+ ".unreq TMP5\n"
-+ ".unreq TMP6\n"
-+ ".unreq TMP7\n"
-+ ".unreq TMP8\n"
-+
-+ ".unreq C1F001F\n"
-+ ".unreq A\n"
-+
-+ ".purgem cvt8888to565\n"
-+ ".purgem combine_pixels_pair\n"
-+ );
-+}
-+
-+static void
-+armv6_composite_src_8888_0565 (pixman_implementation_t * impl,
-+ pixman_op_t op,
-+ pixman_image_t * src_image,
-+ pixman_image_t * mask_image,
-+ pixman_image_t * dst_image,
-+ int32_t src_x,
-+ int32_t src_y,
-+ int32_t mask_x,
-+ int32_t mask_y,
-+ int32_t dest_x,
-+ int32_t dest_y,
-+ int32_t width,
-+ int32_t height)
-+{
-+ uint32_t *src;
-+ uint16_t *dst;
-+ int src_stride, dst_stride;
-+
-+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
-+ dst_stride, dst, 1);
-+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t,
-+ src_stride, src, 1);
-+
-+ if (width < 7)
-+ armv4_composite_src_8888_0565_asm (dst, src, width,
-+ dst_stride, src_stride, height);
-+ else
-+ armv6_composite_src_8888_0565_asm (dst, src, width,
-+ dst_stride, src_stride, height);
-+}
-+
- #endif
-
- static const pixman_fast_path_t arm_simd_fast_paths[] =
-@@ -624,6 +900,10 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
- #if defined(__ARM_EABI__) && defined(__linux__)
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, armv6_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, armv6_composite_over_n_8_0565),
-+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, armv6_composite_src_8888_0565),
-+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, armv6_composite_src_8888_0565),
-+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, armv6_composite_src_8888_0565),
-+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, armv6_composite_src_8888_0565),
- #endif
- { PIXMAN_OP_NONE },
- };
---
-cgit v0.8.3-6-g21f6
diff --git a/recipes/xorg-lib/pixman/tls.patch b/recipes/xorg-lib/pixman/tls.patch
deleted file mode 100644
index 316caed65f..0000000000
--- a/recipes/xorg-lib/pixman/tls.patch
+++ /dev/null
@@ -1,59 +0,0 @@
-From 714559dccda3165a72f0a9935c1edc3aef535f30 Mon Sep 17 00:00:00 2001
-From: Søren Sandmann Pedersen <ssp@redhat.com>
-Date: Wed, 07 Apr 2010 05:44:12 +0000
-Subject: Fixes for pthread thread local storage.
-
-The tls_name_key variable is passed to tls_name_get(), and the first
-time this happens it isn't initialized. tls_name_get() then passes it
-on to tls_name_alloc() which passes it on to pthread_setspecific()
-leading to undefined behavior.
-
-None of this is actually necessary at all because there is only one
-such variable per thread local variable, so it doesn't need to passed
-as a parameter at all.
-
-All of this was pointed out by Tor Lillqvist on the cairo mailing
-list.
----
-diff --git a/pixman/pixman-compiler.h b/pixman/pixman-compiler.h
-index cdac0d8..531c8c9 100644
---- a/pixman/pixman-compiler.h
-+++ b/pixman/pixman-compiler.h
-@@ -99,16 +99,16 @@
- } \
- \
- static type * \
-- tls_ ## name ## _alloc (key) \
-+ tls_ ## name ## _alloc (void) \
- { \
- type *value = calloc (1, sizeof (type)); \
- if (value) \
-- pthread_setspecific (key, value); \
-+ pthread_setspecific (tls_ ## name ## _key, value); \
- return value; \
- } \
- \
- static force_inline type * \
-- tls_ ## name ## _get (key) \
-+ tls_ ## name ## _get (void) \
- { \
- type *value = NULL; \
- if (pthread_once (&tls_ ## name ## _once_control, \
-@@ -116,13 +116,13 @@
- { \
- value = pthread_getspecific (tls_ ## name ## _key); \
- if (!value) \
-- value = tls_ ## name ## _alloc (key); \
-+ value = tls_ ## name ## _alloc (); \
- } \
- return value; \
- }
-
- # define PIXMAN_GET_THREAD_LOCAL(name) \
-- tls_ ## name ## _get (tls_ ## name ## _key)
-+ tls_ ## name ## _get ()
-
- #else
-
---
-cgit v0.8.3-6-g21f6