17 files changed, 0 insertions, 5847 deletions
diff --git a/recipes/xorg-lib/pixman/0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch b/recipes/xorg-lib/pixman/0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch
deleted file mode 100644
index a2cda2438e..0000000000
--- a/recipes/xorg-lib/pixman/0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch
+++ /dev/null
@@ -1,114 +0,0 @@
-From 8ea1a333de202018a862a7b04b94479d3109274b Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Tue, 16 Mar 2010 16:55:28 +0100
-Subject: [PATCH 1/5] Generic C implementation of pixman_blt with overlapping support
-
-Uses memcpy/memmove functions to copy pixels, can handle the
-case when both source and destination areas are in the same
-image (this is useful for scrolling).
-
-It is assumed that copying direction is only important when
-using the same image for both source and destination (and
-src_stride == dst_stride). Copying direction is undefined
-for the images with different source and destination stride
-which happen to be in the overlapped areas (but this is an
-unrealistic case anyway).
----
- pixman/pixman-general.c |   21 ++++++++++++++++++---
- pixman/pixman-private.h |   43 +++++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 61 insertions(+), 3 deletions(-)
-
-diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
-index bddf79a..f525744 100644
---- a/pixman/pixman-general.c
-+++ b/pixman/pixman-general.c
-@@ -285,9 +285,24 @@ general_blt (pixman_implementation_t *imp,
-              int                      width,
-              int                      height)
- {
--    /* We can't blit unless we have sse2 or mmx */
--
--    return FALSE;
-+    uint8_t *dst_bytes = (uint8_t *)dst_bits;
-+    uint8_t *src_bytes = (uint8_t *)src_bits;
-+    int bpp;
-+
-+    if (src_bpp != dst_bpp || src_bpp & 7)
-+	return FALSE;
-+
-+    bpp = src_bpp >> 3;
-+    width *= bpp;
-+    src_stride *= 4;
-+    dst_stride *= 4;
-+    pixman_blt_helper (src_bytes + src_y * src_stride + src_x * bpp,
-+                       dst_bytes + dst_y * dst_stride + dst_x * bpp,
-+                       src_stride,
-+                       dst_stride,
-+                       width,
-+                       height);
-+    return TRUE;
- }
- 
- static pixman_bool_t
-diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
-index d5767af..eeb677d 100644
---- a/pixman/pixman-private.h
-+++ b/pixman/pixman-private.h
-@@ -10,6 +10,7 @@
- 
- #include "pixman.h"
- #include <time.h>
-+#include <string.h>
- #include <assert.h>
- #include <stdio.h>
- #include <string.h>
-@@ -867,4 +868,46 @@ void pixman_timer_register (pixman_timer_t *timer);
- 
- #endif /* PIXMAN_TIMERS */
- 
-+/* a helper function, can blit 8-bit images with src/dst overlapping support */
-+static inline void
-+pixman_blt_helper (uint8_t *src_bytes,
-+                   uint8_t *dst_bytes,
-+                   int      src_stride,
-+                   int      dst_stride,
-+                   int      width,
-+                   int      height)
-+{
-+    /*
-+     * The second part of this check is not strictly needed, but it prevents
-+     * unnecessary upside-down processing of areas which belong to different
-+     * images. Upside-down processing can be slower with fixed-distance-ahead
-+     * prefetch and perceived as having more tearing.
-+     */
-+    if (src_bytes < dst_bytes + width &&
-+	src_bytes + src_stride * height > dst_bytes)
-+    {
-+	src_bytes += src_stride * height - src_stride;
-+	dst_bytes += dst_stride * height - dst_stride;
-+	dst_stride = -dst_stride;
-+	src_stride = -src_stride;
-+	/* Horizontal scrolling to the left needs memmove */
-+	if (src_bytes + width > dst_bytes)
-+	{
-+	    while (--height >= 0)
-+	    {
-+		memmove (dst_bytes, src_bytes, width);
-+		dst_bytes += dst_stride;
-+		src_bytes += src_stride;
-+	    }
-+	    return;
-+	}
-+    }
-+    while (--height >= 0)
-+    {
-+	memcpy (dst_bytes, src_bytes, width);
-+	dst_bytes += dst_stride;
-+	src_bytes += src_stride;
-+    }
-+}
-+
- #endif /* PIXMAN_PRIVATE_H */
--- 
-1.6.6.1
-
diff --git a/recipes/xorg-lib/pixman/0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch b/recipes/xorg-lib/pixman/0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch
deleted file mode 100644
index 003337f48d..0000000000
--- a/recipes/xorg-lib/pixman/0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From 3170d9f5e927681a2516bcec52b317d1d4785e25 Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Thu, 22 Oct 2009 05:45:47 +0300
-Subject: [PATCH 2/5] Support of overlapping src/dst for pixman_blt_mmx
-
----
- pixman/pixman-mmx.c |   55 +++++++++++++++++++++++++++++---------------------
- 1 files changed, 32 insertions(+), 23 deletions(-)
-
-diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
-index e084e7f..6212b31 100644
---- a/pixman/pixman-mmx.c
-+++ b/pixman/pixman-mmx.c
-@@ -2994,34 +2994,43 @@ pixman_blt_mmx (uint32_t *src_bits,
- {
-     uint8_t *   src_bytes;
-     uint8_t *   dst_bytes;
--    int byte_width;
-+    int         bpp;
- 
--    if (src_bpp != dst_bpp)
-+    if (src_bpp != dst_bpp || src_bpp & 7)
- 	return FALSE;
- 
--    if (src_bpp == 16)
--    {
--	src_stride = src_stride * (int) sizeof (uint32_t) / 2;
--	dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
--	src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
--	dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
--	byte_width = 2 * width;
--	src_stride *= 2;
--	dst_stride *= 2;
--    }
--    else if (src_bpp == 32)
-+    bpp = src_bpp >> 3;
-+    width *= bpp;
-+    src_stride *= 4;
-+    dst_stride *= 4;
-+    src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
-+    dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
-+
-+    if (src_bpp != 16 && src_bpp != 32)
-     {
--	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
--	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
--	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
--	dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
--	byte_width = 4 * width;
--	src_stride *= 4;
--	dst_stride *= 4;
-+	pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
-+	                   width, height);
-+	return TRUE;
-     }
--    else
-+
-+    if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
-     {
--	return FALSE;
-+	src_bytes += src_stride * height - src_stride;
-+	dst_bytes += dst_stride * height - dst_stride;
-+	dst_stride = -dst_stride;
-+	src_stride = -src_stride;
-+
-+	if (src_bytes + width > dst_bytes)
-+	{
-+	    /* TODO: reverse scanline copy using MMX */
-+	    while (--height >= 0)
-+	    {
-+		memmove (dst_bytes, src_bytes, width);
-+		dst_bytes += dst_stride;
-+		src_bytes += src_stride;
-+	    }
-+	    return TRUE;
-+	}
-     }
- 
-     while (height--)
-@@ -3031,7 +3040,7 @@ pixman_blt_mmx (uint32_t *src_bits,
- 	uint8_t *d = dst_bytes;
- 	src_bytes += src_stride;
- 	dst_bytes += dst_stride;
--	w = byte_width;
-+	w = width;
- 
- 	while (w >= 2 && ((unsigned long)d & 3))
- 	{
--- 
-1.6.6.1
-
diff --git a/recipes/xorg-lib/pixman/0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch b/recipes/xorg-lib/pixman/0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch
deleted file mode 100644
index 7e8f34f6bd..0000000000
--- a/recipes/xorg-lib/pixman/0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From f07cd58c643b490dcb1ef7be2642926cfeca1e69 Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Thu, 22 Oct 2009 05:45:54 +0300
-Subject: [PATCH 3/5] Support of overlapping src/dst for pixman_blt_sse2
-
----
- pixman/pixman-sse2.c |   55 +++++++++++++++++++++++++++++--------------------
- 1 files changed, 32 insertions(+), 23 deletions(-)
-
-diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
-index 946e7ba..66053ae 100644
---- a/pixman/pixman-sse2.c
-+++ b/pixman/pixman-sse2.c
-@@ -5299,34 +5299,43 @@ pixman_blt_sse2 (uint32_t *src_bits,
- {
-     uint8_t *   src_bytes;
-     uint8_t *   dst_bytes;
--    int byte_width;
-+    int         bpp;
- 
--    if (src_bpp != dst_bpp)
-+    if (src_bpp != dst_bpp || src_bpp & 7)
- 	return FALSE;
- 
--    if (src_bpp == 16)
--    {
--	src_stride = src_stride * (int) sizeof (uint32_t) / 2;
--	dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
--	src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
--	dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
--	byte_width = 2 * width;
--	src_stride *= 2;
--	dst_stride *= 2;
--    }
--    else if (src_bpp == 32)
-+    bpp = src_bpp >> 3;
-+    width *= bpp;
-+    src_stride *= 4;
-+    dst_stride *= 4;
-+    src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
-+    dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
-+
-+    if (src_bpp != 16 && src_bpp != 32)
-     {
--	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
--	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
--	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
--	dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
--	byte_width = 4 * width;
--	src_stride *= 4;
--	dst_stride *= 4;
-+	pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
-+	                   width, height);
-+	return TRUE;
-     }
--    else
-+
-+    if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
-     {
--	return FALSE;
-+	src_bytes += src_stride * height - src_stride;
-+	dst_bytes += dst_stride * height - dst_stride;
-+	dst_stride = -dst_stride;
-+	src_stride = -src_stride;
-+
-+	if (src_bytes + width > dst_bytes)
-+	{
-+	    /* TODO: reverse scanline copy using SSE2 */
-+	    while (--height >= 0)
-+	    {
-+		memmove (dst_bytes, src_bytes, width);
-+		dst_bytes += dst_stride;
-+		src_bytes += src_stride;
-+	    }
-+	    return TRUE;
-+	}
-     }
- 
-     cache_prefetch ((__m128i*)src_bytes);
-@@ -5339,7 +5348,7 @@ pixman_blt_sse2 (uint32_t *src_bits,
- 	uint8_t *d = dst_bytes;
- 	src_bytes += src_stride;
- 	dst_bytes += dst_stride;
--	w = byte_width;
-+	w = width;
- 
- 	cache_prefetch_next ((__m128i*)s);
- 	cache_prefetch_next ((__m128i*)d);
--- 
-1.6.6.1
-
diff --git a/recipes/xorg-lib/pixman/0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch b/recipes/xorg-lib/pixman/0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch
deleted file mode 100644
index 0ba5b843b5..0000000000
--- a/recipes/xorg-lib/pixman/0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch
+++ /dev/null
@@ -1,94 +0,0 @@
-From e0542866c466ad512d69292df098d4b880e35e52 Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Wed, 18 Nov 2009 06:08:48 +0200
-Subject: [PATCH 4/5] Support of overlapping src/dst for pixman_blt_neon
-
----
- pixman/pixman-arm-neon.c |   62 +++++++++++++++++++++++++++++++++++++--------
- 1 files changed, 51 insertions(+), 11 deletions(-)
-
-diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index 24ceeeb..134493d 100644
---- a/pixman/pixman-arm-neon.c
-+++ b/pixman/pixman-arm-neon.c
-@@ -360,26 +360,66 @@ pixman_blt_neon (uint32_t *src_bits,
-                  int       width,
-                  int       height)
- {
--    if (src_bpp != dst_bpp)
-+    uint8_t *   src_bytes;
-+    uint8_t *   dst_bytes;
-+    int         bpp;
-+
-+    if (src_bpp != dst_bpp || src_bpp & 7)
- 	return FALSE;
- 
-+    bpp = src_bpp >> 3;
-+    width *= bpp;
-+    src_stride *= 4;
-+    dst_stride *= 4;
-+    src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
-+    dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
-+
-+    if (src_bpp != 16 && src_bpp != 32)
-+    {
-+	pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
-+	                   width, height);
-+	return TRUE;
-+    }
-+
-+    if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
-+    {
-+	src_bytes += src_stride * height - src_stride;
-+	dst_bytes += dst_stride * height - dst_stride;
-+	dst_stride = -dst_stride;
-+	src_stride = -src_stride;
-+
-+	if (src_bytes + width > dst_bytes)
-+	{
-+	    /* TODO: reverse scanline copy using NEON */
-+	    while (--height >= 0)
-+	    {
-+		memmove (dst_bytes, src_bytes, width);
-+		dst_bytes += dst_stride;
-+		src_bytes += src_stride;
-+	    }
-+	    return TRUE;
-+	}
-+    }
-+
-     switch (src_bpp)
-     {
-     case 16:
- 	pixman_composite_src_0565_0565_asm_neon (
--		width, height,
--		(uint16_t *)(((char *) dst_bits) +
--		dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
--		(uint16_t *)(((char *) src_bits) +
--		src_y * src_stride * 4 + src_x * 2), src_stride * 2);
-+		width >> 1,
-+		height,
-+		(uint16_t *) dst_bytes,
-+		dst_stride >> 1,
-+		(uint16_t *) src_bytes,
-+		src_stride >> 1);
- 	return TRUE;
-     case 32:
- 	pixman_composite_src_8888_8888_asm_neon (
--		width, height,
--		(uint32_t *)(((char *) dst_bits) +
--		dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
--		(uint32_t *)(((char *) src_bits) +
--		src_y * src_stride * 4 + src_x * 4), src_stride);
-+		width >> 2,
-+		height,
-+		(uint32_t *) dst_bytes,
-+		dst_stride >> 2,
-+		(uint32_t *) src_bytes,
-+		src_stride >> 2);
- 	return TRUE;
-     default:
- 	return FALSE;
--- 
-1.6.6.1
-
diff --git a/recipes/xorg-lib/pixman/0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch b/recipes/xorg-lib/pixman/0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch
deleted file mode 100644
index 769ed2e7d0..0000000000
--- a/recipes/xorg-lib/pixman/0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch
+++ /dev/null
@@ -1,169 +0,0 @@
-From d51b10a2750d99543a0c92ca44802aa7a4d70e54 Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Thu, 10 Dec 2009 00:51:50 +0200
-Subject: [PATCH 5/5] ARM: added NEON optimizations for fetch/store r5g6b5 scanline
-
----
- pixman/pixman-access.c       |   23 ++++++++++++++++++++++-
- pixman/pixman-arm-neon-asm.S |   20 ++++++++++++++++++++
- pixman/pixman-arm-neon.c     |   41 +++++++++++++++++++++++++++++++++++++++++
- pixman/pixman-private.h      |    5 +++++
- 4 files changed, 88 insertions(+), 1 deletions(-)
-
-diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
-index fa0a267..5bb3e09 100644
---- a/pixman/pixman-access.c
-+++ b/pixman/pixman-access.c
-@@ -2748,7 +2748,7 @@ typedef struct
- 	    store_scanline_ ## format, store_scanline_generic_64	\
-     }
- 
--static const format_info_t accessors[] =
-+static format_info_t accessors[] =
- {
- /* 32 bpp formats */
-     FORMAT_INFO (a8r8g8b8),
-@@ -2891,6 +2891,27 @@ _pixman_bits_image_setup_raw_accessors (bits_image_t *image)
- 	setup_accessors (image);
- }
- 
-+void
-+_pixman_bits_override_accessors (pixman_format_code_t format,
-+                                 fetch_scanline_t     fetch_func,
-+                                 store_scanline_t     store_func)
-+{
-+    format_info_t *info = accessors;
-+
-+    while (info->format != PIXMAN_null)
-+    {
-+	if (info->format == format)
-+	{
-+	    if (fetch_func)
-+		info->fetch_scanline_raw_32 = fetch_func;
-+	    if (store_func)
-+		info->store_scanline_raw_32 = store_func;
-+	    return;
-+	}
-+	info++;
-+    }
-+}
-+
- #else
- 
- void
-diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
-index eb8cc4c..6ab3301 100644
---- a/pixman/pixman-arm-neon-asm.S
-+++ b/pixman/pixman-arm-neon-asm.S
-@@ -454,6 +454,16 @@ generate_composite_function \
-     pixman_composite_src_8888_0565_process_pixblock_tail, \
-     pixman_composite_src_8888_0565_process_pixblock_tail_head
- 
-+generate_composite_function_single_scanline \
-+    pixman_store_scanline_r5g6b5_asm_neon, 32, 0, 16, \
-+    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-+    8, /* number of pixels, processed in a single block */ \
-+    default_init, \
-+    default_cleanup, \
-+    pixman_composite_src_8888_0565_process_pixblock_head, \
-+    pixman_composite_src_8888_0565_process_pixblock_tail, \
-+    pixman_composite_src_8888_0565_process_pixblock_tail_head
-+
- /******************************************************************************/
- 
- .macro pixman_composite_src_0565_8888_process_pixblock_head
-@@ -489,6 +499,16 @@ generate_composite_function \
-     pixman_composite_src_0565_8888_process_pixblock_tail, \
-     pixman_composite_src_0565_8888_process_pixblock_tail_head
- 
-+generate_composite_function_single_scanline \
-+    pixman_fetch_scanline_r5g6b5_asm_neon, 16, 0, 32, \
-+    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
-+    8, /* number of pixels, processed in a single block */ \
-+    default_init, \
-+    default_cleanup, \
-+    pixman_composite_src_0565_8888_process_pixblock_head, \
-+    pixman_composite_src_0565_8888_process_pixblock_tail, \
-+    pixman_composite_src_0565_8888_process_pixblock_tail_head
-+
- /******************************************************************************/
- 
- .macro pixman_composite_add_8000_8000_process_pixblock_head
-diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index 134493d..2245b52 100644
---- a/pixman/pixman-arm-neon.c
-+++ b/pixman/pixman-arm-neon.c
-@@ -567,6 +567,43 @@ neon_combine_##name##_u (pixman_implementation_t *imp,                   \
- BIND_COMBINE_U (over)
- BIND_COMBINE_U (add)
- 
-+void
-+pixman_fetch_scanline_r5g6b5_asm_neon (int             width,
-+                                       uint32_t       *buffer,
-+                                       const uint16_t *pixel);
-+void
-+pixman_store_scanline_r5g6b5_asm_neon (int             width,
-+                                       uint16_t       *pixel,
-+                                       const uint32_t *values);
-+
-+static void
-+neon_fetch_scanline_r5g6b5 (pixman_image_t *image,
-+                            int             x,
-+                            int             y,
-+                            int             width,
-+                            uint32_t *      buffer,
-+                            const uint32_t *mask,
-+                            uint32_t        mask_bits)
-+{
-+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
-+    const uint16_t *pixel = (const uint16_t *)bits + x;
-+
-+    pixman_fetch_scanline_r5g6b5_asm_neon (width, buffer, pixel);
-+}
-+
-+static void
-+neon_store_scanline_r5g6b5 (bits_image_t *  image,
-+                            int             x,
-+                            int             y,
-+                            int             width,
-+                            const uint32_t *values)
-+{
-+    uint32_t *bits = image->bits + image->rowstride * y;
-+    uint16_t *pixel = ((uint16_t *) bits) + x;
-+
-+    pixman_store_scanline_r5g6b5_asm_neon (width, pixel, values);
-+}
-+
- pixman_implementation_t *
- _pixman_implementation_create_arm_neon (void)
- {
-@@ -577,6 +614,10 @@ _pixman_implementation_create_arm_neon (void)
-     imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
-     imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
- 
-+    _pixman_bits_override_accessors (PIXMAN_r5g6b5,
-+                                     neon_fetch_scanline_r5g6b5,
-+                                     neon_store_scanline_r5g6b5);
-+
-     imp->blt = arm_neon_blt;
-     imp->fill = arm_neon_fill;
- 
-diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
-index eeb677d..ba2d401 100644
---- a/pixman/pixman-private.h
-+++ b/pixman/pixman-private.h
-@@ -220,6 +220,11 @@ void
- _pixman_bits_image_setup_raw_accessors (bits_image_t *image);
- 
- void
-+_pixman_bits_override_accessors (pixman_format_code_t format,
-+                                 fetch_scanline_t     fetch_func,
-+                                 store_scanline_t     store_func);
-+
-+void
- _pixman_image_get_scanline_generic_64  (pixman_image_t *image,
-                                         int             x,
-                                         int             y,
--- 
-1.6.6.1
-
diff --git a/recipes/xorg-lib/pixman/0006-Revert-ARM-SIMD-Try-without-any-CFLAGS-before-forcin.patch b/recipes/xorg-lib/pixman/0006-Revert-ARM-SIMD-Try-without-any-CFLAGS-before-forcin.patch
deleted file mode 100644
index 3d8d4e8292..0000000000
--- a/recipes/xorg-lib/pixman/0006-Revert-ARM-SIMD-Try-without-any-CFLAGS-before-forcin.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From 7f0adaef68c5b0bb1c5eb9f5db5792b71b8b8beb Mon Sep 17 00:00:00 2001
-From: Koen Kooi <koen@dominion.thruhere.net>
-Date: Fri, 19 Mar 2010 10:44:09 +0100
-Subject: [PATCH 6/6] Revert "ARM: SIMD: Try without any CFLAGS before forcing -mcpu="
-
-This forces -marm that results in runtime SIGILL on thumb userspace
-
-This reverts commit 18f0de452dc7e12e4cb544d761a626d5c6031663.
----
- configure.ac |   20 +++++---------------
- 1 files changed, 5 insertions(+), 15 deletions(-)
-
-diff --git a/configure.ac b/configure.ac
-index fc3ee24..f84a4dc 100644
---- a/configure.ac
-+++ b/configure.ac
-@@ -363,28 +363,18 @@ AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
- 
- dnl ===========================================================================
- dnl Check for ARM SIMD instructions
--ARM_SIMD_CFLAGS=""
-+ARM_SIMD_CFLAGS="-mcpu=arm1136j-s"
- 
- have_arm_simd=no
- AC_MSG_CHECKING(whether to use ARM SIMD assembler)
--# check with default CFLAGS in case the toolchain turns on a sufficiently recent -mcpu=
-+xserver_save_CFLAGS=$CFLAGS
-+CFLAGS="$ARM_SIMD_CFLAGS $CFLAGS"
- AC_COMPILE_IFELSE([
- int main () {
-     asm("uqadd8 r1, r1, r2");
-     return 0;
--}], have_arm_simd=yes,
--    # check again with an explicit -mcpu= in case the toolchain defaults to an
--    # older one; note that uqadd8 isn't available in Thumb mode on arm1136j-s
--    # so we force ARM mode
--    ARM_SIMD_CFLAGS="-mcpu=arm1136j-s -marm"
--    xserver_save_CFLAGS=$CFLAGS
--    CFLAGS="$ARM_SIMD_CFLAGS $CFLAGS"
--    AC_COMPILE_IFELSE([
--    int main () {
--        asm("uqadd8 r1, r1, r2");
--        return 0;
--    }], have_arm_simd=yes)
--    CFLAGS=$xserver_save_CFLAGS)
-+}], have_arm_simd=yes)
-+CFLAGS=$xserver_save_CFLAGS
- 
- AC_ARG_ENABLE(arm-simd,
-    [AC_HELP_STRING([--disable-arm-simd],
--- 
-1.6.6.1
-
diff --git a/recipes/xorg-lib/pixman/calloc.patch b/recipes/xorg-lib/pixman/calloc.patch
deleted file mode 100644
index 4a60d7ef9a..0000000000
--- a/recipes/xorg-lib/pixman/calloc.patch
+++ /dev/null
@@ -1,23 +0,0 @@
-From 634ba33b5b1fcfd5a0e7910f9991b4ed4f674549 Mon Sep 17 00:00:00 2001
-From: Søren Sandmann Pedersen <ssp@redhat.com>
-Date: Wed, 07 Apr 2010 05:39:14 +0000
-Subject: Fix uninitialized cache when pthreads are used
-
-The thread local cache is allocated with malloc(), but we rely on it
-being initialized to zero, so allocate it with calloc() instead.
----
-diff --git a/pixman/pixman-compiler.h b/pixman/pixman-compiler.h
-index a4e3f88..cdac0d8 100644
---- a/pixman/pixman-compiler.h
-+++ b/pixman/pixman-compiler.h
-@@ -101,7 +101,7 @@
-     static type *							\
-     tls_ ## name ## _alloc (key)					\
-     {									\
--	type *value = malloc (sizeof (type));				\
-+	type *value = calloc (1, sizeof (type));			\
- 	if (value)							\
- 	    pthread_setspecific (key, value);				\
- 	return value;							\
---
-cgit v0.8.3-6-g21f6
diff --git a/recipes/xorg-lib/pixman/nearest-neighbour.patch b/recipes/xorg-lib/pixman/nearest-neighbour.patch
deleted file mode 100644
index 29b140faf9..0000000000
--- a/recipes/xorg-lib/pixman/nearest-neighbour.patch
+++ /dev/null
@@ -1,1040 +0,0 @@
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Fri, 17 Jul 2009 10:22:23 +0000 (+0300)
-Subject: Fastpath for nearest neighbour scaled compositing operations.
-X-Git-Url: http://siarhei.siamashka.name/gitweb/?p=pixman.git;a=commitdiff_plain;h=247531c6978725a88fd3706129b9d3e339026f54
-
-Fastpath for nearest neighbour scaled compositing operations.
-
-OVER 8888x8888, OVER 8888x0565, SRC 8888x8888, SRC 8888x0565
-and SRC 0565x0565 cases are supported.
----
-
-diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
-index 7f80578..7f3a6ad 100644
---- a/pixman/pixman-fast-path.c
-+++ b/pixman/pixman-fast-path.c
-@@ -1261,6 +1261,993 @@ fast_composite_src_scale_nearest (pixman_implementation_t *imp,
-     }
- }
- 
-+/*
-+ * Functions, which implement the core inner loops for the nearest neighbour
-+ * scaled fastpath compositing operations. The do not need to do clipping
-+ * checks, also the loops are unrolled to process two pixels per iteration
-+ * for better performance on most CPU architectures (superscalar processors
-+ * can issue several operations simultaneously, other processors can hide
-+ * instructions latencies by pipelining operations). Unrolling more
-+ * does not make much sense because the compiler will start running out
-+ * of spare registers soon.
-+ */
-+
-+#undef READ
-+#undef WRITE
-+#define READ(img,x) (*(x))
-+#define WRITE(img,ptr,v) ((*(ptr)) = (v))
-+
-+#define UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(x, a, y) do {                     \
-+        UN8x4_MUL_UN8_ADD_UN8x4(x, a, y);                                      \
-+        x = CONVERT_8888_TO_0565(x);                                       \
-+    } while (0)
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x0565 (
-+    pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+    int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+    uint16_t *dstLine;
-+    uint32_t *srcFirstLine;
-+    uint32_t  d;
-+    uint32_t  s1, s2;
-+    uint8_t   a1, a2;
-+    int       w;
-+    int       x1, x2, y;
-+    int32_t   orig_vx = vx;
-+
-+    uint32_t *src;
-+    uint16_t *dst;
-+    int       srcStride, dstStride;
-+    PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+    /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+     * transformed from destination space to source space */
-+    PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+    while (--height >= 0)
-+    {
-+        dst = dstLine;
-+        dstLine += dstStride;
-+
-+        y = vy >> 16;
-+        vy += unit_y;
-+
-+        if ((y < 0) || (y >= pSrc->bits.height)) {
-+            continue;
-+        }
-+
-+        src = srcFirstLine + srcStride * y;
-+
-+        w = width;
-+        vx = orig_vx;
-+        while ((w -= 2) >= 0)
-+        {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            s1 = READ(pSrc, src + x1);
-+
-+            x2 = vx >> 16;
-+            vx += unit_x;
-+            s2 = READ(pSrc, src + x2);
-+
-+            a1 = s1 >> 24;
-+            a2 = s2 >> 24;
-+
-+            if (a1 == 0xff)
-+                WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+            else if (s1) {
-+                d = CONVERT_0565_TO_0888(READ(pDst, dst));
-+                a1 ^= 0xff;
-+                UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1);
-+                WRITE(pDst, dst, d);
-+            }
-+            dst++;
-+
-+            if (a2 == 0xff)
-+                WRITE(pDst, dst, CONVERT_8888_TO_0565(s2));
-+            else if (s2) {
-+                d = CONVERT_0565_TO_0888(READ(pDst, dst));
-+                a2 ^= 0xff;
-+                UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a2, s2);
-+                WRITE(pDst, dst, d);
-+            }
-+            dst++;
-+        }
-+        if (w & 1) {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            s1 = READ(pSrc, src + x1);
-+
-+            a1 = s1 >> 24;
-+            if (a1 == 0xff)
-+                WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+            else if (s1) {
-+                d = CONVERT_0565_TO_0888(READ(pDst, dst));
-+                a1 ^= 0xff;
-+                UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1);
-+                WRITE(pDst, dst, d);
-+            }
-+            dst++;
-+        }
-+    }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x0565 (
-+    pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+    int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+    uint16_t *dstLine;
-+    uint32_t *srcFirstLine;
-+    uint32_t  d;
-+    uint32_t  s1, s2;
-+    uint8_t   a1, a2;
-+    int       w;
-+    int       x1, x2, y;
-+    int32_t   orig_vx = vx;
-+    int32_t   max_vx, max_vy;
-+
-+    uint32_t *src;
-+    uint16_t *dst;
-+    int       srcStride, dstStride;
-+    PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+    /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+     * transformed from destination space to source space */
-+    PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+    max_vx = pSrc->bits.width << 16;
-+    max_vy = pSrc->bits.height << 16;
-+
-+    while (orig_vx < 0) orig_vx += max_vx;
-+    while (vy < 0) vy += max_vy;
-+    while (orig_vx >= max_vx) orig_vx -= max_vx;
-+    while (vy >= max_vy) vy -= max_vy;
-+
-+    while (--height >= 0)
-+    {
-+        dst = dstLine;
-+        dstLine += dstStride;
-+
-+        y = vy >> 16;
-+        vy += unit_y;
-+        while (vy >= max_vy) vy -= max_vy;
-+
-+        src = srcFirstLine + srcStride * y;
-+
-+        w = width;
-+        vx = orig_vx;
-+        while ((w -= 2) >= 0)
-+        {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            while (vx >= max_vx) vx -= max_vx;
-+            s1 = READ(pSrc, src + x1);
-+
-+            x2 = vx >> 16;
-+            vx += unit_x;
-+            while (vx >= max_vx) vx -= max_vx;
-+            s2 = READ(pSrc, src + x2);
-+
-+            a1 = s1 >> 24;
-+            a2 = s2 >> 24;
-+
-+            if (a1 == 0xff)
-+                WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+            else if (s1) {
-+                d = CONVERT_0565_TO_0888(READ(pDst, dst));
-+                a1 ^= 0xff;
-+                UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1);
-+                WRITE(pDst, dst, d);
-+            }
-+            dst++;
-+
-+            if (a2 == 0xff)
-+                WRITE(pDst, dst, CONVERT_8888_TO_0565(s2));
-+            else if (s2) {
-+                d = CONVERT_0565_TO_0888(READ(pDst, dst));
-+                a2 ^= 0xff;
-+                UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a2, s2);
-+                WRITE(pDst, dst, d);
-+            }
-+            dst++;
-+        }
-+        if (w & 1) {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            while (vx >= max_vx) vx -= max_vx;
-+            s1 = READ(pSrc, src + x1);
-+
-+            a1 = s1 >> 24;
-+            if (a1 == 0xff)
-+                WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+            else if (s1) {
-+                d = CONVERT_0565_TO_0888(READ(pDst, dst));
-+                a1 ^= 0xff;
-+                UN8x4_MUL_UN8_ADD_UN8x4_store_r5g6b5(d, a1, s1);
-+                WRITE(pDst, dst, d);
-+            }
-+            dst++;
-+        }
-+    }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x8888 (
-+    pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+    int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+    uint32_t *dstLine;
-+    uint32_t *srcFirstLine;
-+    uint32_t  d;
-+    uint32_t  s1, s2;
-+    uint8_t   a1, a2;
-+    int       w;
-+    int       x1, x2, y;
-+    int32_t   orig_vx = vx;
-+
-+    uint32_t *src, *dst;
-+    int       srcStride, dstStride;
-+    PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+    /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+     * transformed from destination space to source space */
-+    PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+    while (--height >= 0)
-+    {
-+        dst = dstLine;
-+        dstLine += dstStride;
-+
-+        y = vy >> 16;
-+        vy += unit_y;
-+
-+        if ((y < 0) || (y >= pSrc->bits.height)) {
-+            continue;
-+        }
-+
-+        src = srcFirstLine + srcStride * y;
-+
-+        w = width;
-+        vx = orig_vx;
-+        while ((w -= 2) >= 0)
-+        {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            s1 = READ(pSrc, src + x1);
-+
-+            x2 = vx >> 16;
-+            vx += unit_x;
-+            s2 = READ(pSrc, src + x2);
-+
-+            a1 = s1 >> 24;
-+            a2 = s2 >> 24;
-+
-+            if (a1 == 0xff)
-+                WRITE(pDst, dst, s1);
-+            else if (s1) {
-+                d = READ(pDst, dst);
-+                a1 ^= 0xff;
-+                UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1);
-+                WRITE(pDst, dst, d);
-+            }
-+            dst++;
-+
-+            if (a2 == 0xff)
-+                WRITE(pDst, dst, s2);
-+            else if (s2) {
-+                d = READ(pDst, dst);
-+                a2 ^= 0xff;
-+                UN8x4_MUL_UN8_ADD_UN8x4(d, a2, s2);
-+                WRITE(pDst, dst, d);
-+            }
-+            dst++;
-+        }
-+        if (w & 1) {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            s1 = READ(pSrc, src + x1);
-+
-+            a1 = s1 >> 24;
-+            if (a1 == 0xff)
-+                WRITE(pDst, dst, s1);
-+            else if (s1) {
-+                d = READ(pDst, dst);
-+                a1 ^= 0xff;
-+                UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1);
-+                WRITE(pDst, dst, d);
-+            }
-+            dst++;
-+        }
-+    }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x8888 (
-+    pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+    int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+    uint32_t *dstLine;
-+    uint32_t *srcFirstLine;
-+    uint32_t  d;
-+    uint32_t  s1, s2;
-+    uint8_t   a1, a2;
-+    int       w;
-+    int       x1, x2, y;
-+    int32_t   orig_vx = vx;
-+    int32_t   max_vx, max_vy;
-+
-+    uint32_t *src, *dst;
-+    int       srcStride, dstStride;
-+    PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+    /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+     * transformed from destination space to source space */
-+    PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+    max_vx = pSrc->bits.width << 16;
-+    max_vy = pSrc->bits.height << 16;
-+
-+    while (orig_vx < 0) orig_vx += max_vx;
-+    while (vy < 0) vy += max_vy;
-+    while (orig_vx >= max_vx) orig_vx -= max_vx;
-+    while (vy >= max_vy) vy -= max_vy;
-+
-+    while (--height >= 0)
-+    {
-+        dst = dstLine;
-+        dstLine += dstStride;
-+
-+        y = vy >> 16;
-+        vy += unit_y;
-+        while (vy >= max_vy) vy -= max_vy;
-+
-+        src = srcFirstLine + srcStride * y;
-+
-+        w = width;
-+        vx = orig_vx;
-+        while ((w -= 2) >= 0)
-+        {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            while (vx >= max_vx) vx -= max_vx;
-+            s1 = READ(pSrc, src + x1);
-+
-+            x2 = vx >> 16;
-+            vx += unit_x;
-+            while (vx >= max_vx) vx -= max_vx;
-+            s2 = READ(pSrc, src + x2);
-+
-+            a1 = s1 >> 24;
-+            a2 = s2 >> 24;
-+
-+            if (a1 == 0xff)
-+                WRITE(pDst, dst, s1);
-+            else if (s1) {
-+                d = READ(pDst, dst);
-+                a1 ^= 0xff;
-+                UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1);
-+                WRITE(pDst, dst, d);
-+            }
-+            dst++;
-+
-+            if (a2 == 0xff)
-+                WRITE(pDst, dst, s2);
-+            else if (s2) {
-+                d = READ(pDst, dst);
-+                a2 ^= 0xff;
-+                UN8x4_MUL_UN8_ADD_UN8x4(d, a2, s2);
-+                WRITE(pDst, dst, d);
-+            }
-+            dst++;
-+        }
-+        if (w & 1) {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            while (vx >= max_vx) vx -= max_vx;
-+            s1 = READ(pSrc, src + x1);
-+
-+            a1 = s1 >> 24;
-+            if (a1 == 0xff)
-+                WRITE(pDst, dst, s1);
-+            else if (s1) {
-+                d = READ(pDst, dst);
-+                a1 ^= 0xff;
-+                UN8x4_MUL_UN8_ADD_UN8x4(d, a1, s1);
-+                WRITE(pDst, dst, d);
-+            }
-+            dst++;
-+        }
-+    }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x8888 (
-+    pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+    int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+    uint32_t *dstLine;
-+    uint32_t *srcFirstLine;
-+    uint32_t  s1, s2;
-+    int       w;
-+    int       x1, x2, y;
-+    int32_t   orig_vx = vx;
-+
-+    uint32_t *src, *dst;
-+    int       srcStride, dstStride;
-+    PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+    /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+     * transformed from destination space to source space */
-+    PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+    while (--height >= 0)
-+    {
-+        dst = dstLine;
-+        dstLine += dstStride;
-+
-+        y = vy >> 16;
-+        vy += unit_y;
-+
-+        if ((y < 0) || (y >= pSrc->bits.height)) {
-+            memset(dst, 0, width * sizeof(*dst));
-+            continue;
-+        }
-+
-+        src = srcFirstLine + srcStride * y;
-+
-+        w = width;
-+        vx = orig_vx;
-+        while ((w -= 2) >= 0)
-+        {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            s1 = READ(pSrc, src + x1);
-+
-+            x2 = vx >> 16;
-+            vx += unit_x;
-+            s2 = READ(pSrc, src + x2);
-+
-+            WRITE(pDst, dst, s1);
-+            dst++;
-+            WRITE(pDst, dst, s2);
-+            dst++;
-+        }
-+        if (w & 1) {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            s1 = READ(pSrc, src + x1);
-+            WRITE(pDst, dst, s1);
-+            dst++;
-+        }
-+    }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x8888 (
-+    pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+    int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+    uint32_t *dstLine;
-+    uint32_t *srcFirstLine;
-+    uint32_t  s1, s2;
-+    int       w;
-+    int       x1, x2, y;
-+    int32_t   orig_vx = vx;
-+    int32_t   max_vx, max_vy;
-+
-+    uint32_t *src, *dst;
-+    int       srcStride, dstStride;
-+    PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+    /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+     * transformed from destination space to source space */
-+    PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+    max_vx = pSrc->bits.width << 16;
-+    max_vy = pSrc->bits.height << 16;
-+
-+    while (orig_vx < 0) orig_vx += max_vx;
-+    while (vy < 0) vy += max_vy;
-+    while (orig_vx >= max_vx) orig_vx -= max_vx;
-+    while (vy >= max_vy) vy -= max_vy;
-+
-+    while (--height >= 0)
-+    {
-+        dst = dstLine;
-+        dstLine += dstStride;
-+
-+        y = vy >> 16;
-+        vy += unit_y;
-+        while (vy >= max_vy) vy -= max_vy;
-+
-+        src = srcFirstLine + srcStride * y;
-+
-+        w = width;
-+        vx = orig_vx;
-+        while ((w -= 2) >= 0)
-+        {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            while (vx >= max_vx) vx -= max_vx;
-+            s1 = READ(pSrc, src + x1);
-+
-+            x2 = vx >> 16;
-+            vx += unit_x;
-+            while (vx >= max_vx) vx -= max_vx;
-+            s2 = READ(pSrc, src + x2);
-+
-+            WRITE(pDst, dst, s1);
-+            dst++;
-+            WRITE(pDst, dst, s2);
-+            dst++;
-+        }
-+        if (w & 1) {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            while (vx >= max_vx) vx -= max_vx;
-+            s1 = READ(pSrc, src + x1);
-+
-+            WRITE(pDst, dst, s1);
-+            dst++;
-+        }
-+    }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_0565x0565 (
-+    pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+    int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+    uint16_t *dstLine;
-+    uint16_t *srcFirstLine;
-+    uint16_t  s1, s2;
-+    int       w;
-+    int       x1, x2, y;
-+    int32_t   orig_vx = vx;
-+
-+    uint16_t *src, *dst;
-+    int       srcStride, dstStride;
-+    PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+    /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+     * transformed from destination space to source space */
-+    PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint16_t, srcStride, srcFirstLine, 1);
-+
-+    while (--height >= 0)
-+    {
-+        dst = dstLine;
-+        dstLine += dstStride;
-+
-+        y = vy >> 16;
-+        vy += unit_y;
-+
-+        if ((y < 0) || (y >= pSrc->bits.height)) {
-+            memset(dst, 0, width * sizeof(*dst));
-+            continue;
-+        }
-+
-+        src = srcFirstLine + srcStride * y;
-+
-+        w = width;
-+        vx = orig_vx;
-+        while ((w -= 2) >= 0)
-+        {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            s1 = READ(pSrc, src + x1);
-+
-+            x2 = vx >> 16;
-+            vx += unit_x;
-+            s2 = READ(pSrc, src + x2);
-+
-+            WRITE(pDst, dst, s1);
-+            dst++;
-+            WRITE(pDst, dst, s2);
-+            dst++;
-+        }
-+        if (w & 1) {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            s1 = READ(pSrc, src + x1);
-+            WRITE(pDst, dst, s1);
-+            dst++;
-+        }
-+    }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_0565x0565 (
-+    pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+    int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+    uint16_t *dstLine;
-+    uint16_t *srcFirstLine;
-+    uint16_t  s1, s2;
-+    int       w;
-+    int       x1, x2, y;
-+    int32_t   orig_vx = vx;
-+    int32_t   max_vx, max_vy;
-+
-+    uint16_t *src, *dst;
-+    int       srcStride, dstStride;
-+    PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+    /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+     * transformed from destination space to source space */
-+    PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint16_t, srcStride, srcFirstLine, 1);
-+
-+    max_vx = pSrc->bits.width << 16;
-+    max_vy = pSrc->bits.height << 16;
-+
-+    while (orig_vx < 0) orig_vx += max_vx;
-+    while (vy < 0) vy += max_vy;
-+    while (orig_vx >= max_vx) orig_vx -= max_vx;
-+    while (vy >= max_vy) vy -= max_vy;
-+
-+    while (--height >= 0)
-+    {
-+        dst = dstLine;
-+        dstLine += dstStride;
-+
-+        y = vy >> 16;
-+        vy += unit_y;
-+        while (vy >= max_vy) vy -= max_vy;
-+
-+        src = srcFirstLine + srcStride * y;
-+
-+        w = width;
-+        vx = orig_vx;
-+        while ((w -= 2) >= 0)
-+        {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            while (vx >= max_vx) vx -= max_vx;
-+            s1 = READ(pSrc, src + x1);
-+
-+            x2 = vx >> 16;
-+            vx += unit_x;
-+            while (vx >= max_vx) vx -= max_vx;
-+            s2 = READ(pSrc, src + x2);
-+
-+            WRITE(pDst, dst, s1);
-+            dst++;
-+            WRITE(pDst, dst, s2);
-+            dst++;
-+        }
-+        if (w & 1) {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            while (vx >= max_vx) vx -= max_vx;
-+            s1 = READ(pSrc, src + x1);
-+
-+            WRITE(pDst, dst, s1);
-+            dst++;
-+        }
-+    }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x0565 (
-+    pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+    int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+    uint16_t *dstLine;
-+    uint32_t *srcFirstLine;
-+    uint32_t  s1, s2;
-+    int       w;
-+    int       x1, x2, y;
-+    int32_t   orig_vx = vx;
-+
-+    uint32_t *src;
-+    uint16_t *dst;
-+    int       srcStride, dstStride;
-+    PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+    /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+     * transformed from destination space to source space */
-+    PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+    while (--height >= 0)
-+    {
-+        dst = dstLine;
-+        dstLine += dstStride;
-+
-+        y = vy >> 16;
-+        vy += unit_y;
-+
-+        if ((y < 0) || (y >= pSrc->bits.height)) {
-+            memset(dst, 0, width * sizeof(*dst));
-+            continue;
-+        }
-+
-+        src = srcFirstLine + srcStride * y;
-+
-+        w = width;
-+        vx = orig_vx;
-+        while ((w -= 2) >= 0)
-+        {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            s1 = READ(pSrc, src + x1);
-+
-+            x2 = vx >> 16;
-+            vx += unit_x;
-+            s2 = READ(pSrc, src + x2);
-+
-+            WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+            dst++;
-+            WRITE(pDst, dst, CONVERT_8888_TO_0565(s2));
-+            dst++;
-+        }
-+        if (w & 1) {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            s1 = READ(pSrc, src + x1);
-+            WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+            dst++;
-+        }
-+    }
-+}
-+
-+static void fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x0565 (
-+    pixman_image_t *pSrc, pixman_image_t *pDst, int xSrc, int ySrc, int xDst, int yDst,
-+    int width, int height, int32_t vx, int32_t vy, int32_t unit_x, int32_t unit_y)
-+{
-+    uint16_t *dstLine;
-+    uint32_t *srcFirstLine;
-+    uint32_t  s1, s2;
-+    int       w;
-+    int       x1, x2, y;
-+    int32_t   orig_vx = vx;
-+    int32_t   max_vx, max_vy;
-+
-+    uint32_t *src;
-+    uint16_t *dst;
-+    int       srcStride, dstStride;
-+    PIXMAN_IMAGE_GET_LINE (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+    /* pass in 0 instead of xSrc and ySrc because xSrc and ySrc need to be
-+     * transformed from destination space to source space */
-+    PIXMAN_IMAGE_GET_LINE (pSrc, 0, 0, uint32_t, srcStride, srcFirstLine, 1);
-+
-+    max_vx = pSrc->bits.width << 16;
-+    max_vy = pSrc->bits.height << 16;
-+
-+    while (orig_vx < 0) orig_vx += max_vx;
-+    while (vy < 0) vy += max_vy;
-+    while (orig_vx >= max_vx) orig_vx -= max_vx;
-+    while (vy >= max_vy) vy -= max_vy;
-+
-+    while (--height >= 0)
-+    {
-+        dst = dstLine;
-+        dstLine += dstStride;
-+
-+        y = vy >> 16;
-+        vy += unit_y;
-+        while (vy >= max_vy) vy -= max_vy;
-+
-+        src = srcFirstLine + srcStride * y;
-+
-+        w = width;
-+        vx = orig_vx;
-+        while ((w -= 2) >= 0)
-+        {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            while (vx >= max_vx) vx -= max_vx;
-+            s1 = READ(pSrc, src + x1);
-+
-+            x2 = vx >> 16;
-+            vx += unit_x;
-+            while (vx >= max_vx) vx -= max_vx;
-+            s2 = READ(pSrc, src + x2);
-+
-+            WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+            dst++;
-+            WRITE(pDst, dst, CONVERT_8888_TO_0565(s2));
-+            dst++;
-+        }
-+        if (w & 1) {
-+            x1 = vx >> 16;
-+            vx += unit_x;
-+            while (vx >= max_vx) vx -= max_vx;
-+            s1 = READ(pSrc, src + x1);
-+
-+            WRITE(pDst, dst, CONVERT_8888_TO_0565(s1));
-+            dst++;
-+        }
-+    }
-+}
-+
-+/*
-+ * Check if the clipping boundary is crossed on horizontal scaling
-+ */
-+static inline pixman_bool_t
-+fbTransformVerifyHorizontalClipping(pixman_image_t *pict, int width, int32_t vx, int32_t unit_x)
-+{
-+    while (--width >= 0) {
-+        int x = vx >> 16;
-+        if ((x < 0) || (x >= pict->bits.width)) return 1;
-+        vx += unit_x;
-+    }
-+    return 0;
-+}
-+
-+/*
-+ * Check if the clipping boundary is crossed on vertical scaling
-+ */
-+static inline pixman_bool_t
-+fbTransformVerifyVerticalClipping(pixman_image_t *pict, int height, int32_t vy, int32_t unit_y)
-+{
-+    while (--height >= 0) {
-+        int y = vy >> 16;
-+        if ((y < 0) || (y >= pict->bits.height)) return 1;
-+        vy += unit_y;
-+    }
-+    return 0;
-+}
-+
-+/*
-+ * Easy case of transform without rotation or complex clipping
-+ * Returns 1 in the case if it was able to handle this operation and 0 otherwise
-+ */
-+static pixman_bool_t
-+fbCompositeTransformNonrotatedAffineTrivialclip (
-+			    pixman_op_t     op,
-+			    pixman_image_t *pSrc,
-+			    pixman_image_t *pMask,
-+			    pixman_image_t *pDst,
-+			    int16_t         xSrc,
-+			    int16_t         ySrc,
-+			    int16_t         xMask,
-+			    int16_t         yMask,
-+			    int16_t         xDst,
-+			    int16_t         yDst,
-+			    uint16_t        width,
-+			    uint16_t        height)
-+{
-+    pixman_vector_t v, unit;
-+    int skipdst_x = 0, skipdst_y = 0;
-+
-+    /* Handle destination clipping */
-+    if (xDst < pDst->common.clip_region.extents.x1) {
-+        skipdst_x = pDst->common.clip_region.extents.x1 - xDst;
-+        if (skipdst_x >= (int)width)
-+            return 1;
-+        xDst = pDst->common.clip_region.extents.x1;
-+        width -= skipdst_x;
-+    }
-+
-+    if (yDst < pDst->common.clip_region.extents.y1) {
-+        skipdst_y = pDst->common.clip_region.extents.y1 - yDst;
-+        if (skipdst_y >= (int)height)
-+            return 1;
-+        yDst = pDst->common.clip_region.extents.y1;
-+        height -= skipdst_y;
-+    }
-+
-+    if (xDst >= pDst->common.clip_region.extents.x2 ||
-+        yDst >= pDst->common.clip_region.extents.y2)
-+    {
-+        return 1;
-+    }
-+
-+    if (xDst + width > pDst->common.clip_region.extents.x2)
-+        width = pDst->common.clip_region.extents.x2 - xDst;
-+    if (yDst + height > pDst->common.clip_region.extents.y2)
-+        height = pDst->common.clip_region.extents.y2 - yDst;
-+
-+    /* reference point is the center of the pixel */
-+    v.vector[0] = pixman_int_to_fixed(xSrc) + pixman_fixed_1 / 2;
-+    v.vector[1] = pixman_int_to_fixed(ySrc) + pixman_fixed_1 / 2;
-+    v.vector[2] = pixman_fixed_1;
-+
-+    if (!pixman_transform_point_3d (pSrc->common.transform, &v))
-+        return 0;
-+
-+    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
-+    v.vector[0] -= pixman_fixed_e;
-+    v.vector[1] -= pixman_fixed_e;
-+
-+    unit.vector[0] = pSrc->common.transform->matrix[0][0];
-+    unit.vector[1] = pSrc->common.transform->matrix[1][1];
-+
-+    v.vector[0] += unit.vector[0] * skipdst_x;
-+    v.vector[1] += unit.vector[1] * skipdst_y;
-+
-+    /* Check for possible fixed point arithmetics problems/overflows */
-+    if (unit.vector[0] <= 0 || unit.vector[1] <= 0)
-+        return 0;
-+    if (width == 0 || height == 0)
-+        return 0;
-+    if ((uint32_t)width + (unit.vector[0] >> 16) >= 0x7FFF)
-+        return 0;
-+    if ((uint32_t)height + (unit.vector[1] >> 16) >= 0x7FFF)
-+        return 0;
-+
-+    /* Horizontal source clipping is only supported for NORMAL repeat */
-+    if (pSrc->common.repeat != PIXMAN_REPEAT_NORMAL
-+        && fbTransformVerifyHorizontalClipping(pSrc, width, v.vector[0], unit.vector[0])) {
-+        return 0;
-+    }
-+
-+    /* Vertical source clipping is only supported for NONE and NORMAL repeat */
-+    if (pSrc->common.repeat != PIXMAN_REPEAT_NONE && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL
-+        && fbTransformVerifyVerticalClipping(pSrc, height, v.vector[1], unit.vector[1])) {
-+        return 0;
-+    }
-+
-+    if (op == PIXMAN_OP_OVER && pSrc->bits.format == PIXMAN_a8r8g8b8
-+            && (pDst->bits.format == PIXMAN_x8r8g8b8 || pDst->bits.format == PIXMAN_a8r8g8b8))
-+    {
-+        if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) {
-+            fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x8888(
-+                pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+                v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+            return 1;
-+        }
-+        if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) {
-+            fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x8888(
-+                pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+                v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+            return 1;
-+        }
-+    }
-+
-+    if (op == PIXMAN_OP_SRC && (pSrc->bits.format == PIXMAN_x8r8g8b8 || pSrc->bits.format == PIXMAN_a8r8g8b8)
-+            && (pDst->bits.format == PIXMAN_x8r8g8b8 || pDst->bits.format == pSrc->bits.format))
-+    {
-+        if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) {
-+            fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x8888(
-+                pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+                v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+            return 1;
-+        }
-+        if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) {
-+            fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x8888(
-+                pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+                v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+            return 1;
-+        }
-+    }
-+
-+    if (op == PIXMAN_OP_OVER && pSrc->bits.format == PIXMAN_a8r8g8b8 && pDst->bits.format == PIXMAN_r5g6b5)
-+    {
-+        if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) {
-+            fbCompositeTransformNearestNonrotatedAffineTrivialclipOver_8888x0565(
-+                pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+                v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+            return 1;
-+        }
-+        if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) {
-+            fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatOver_8888x0565(
-+                pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+                v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+            return 1;
-+        }
-+    }
-+
-+    if (op == PIXMAN_OP_SRC && pSrc->bits.format == PIXMAN_r5g6b5 && pDst->bits.format == PIXMAN_r5g6b5)
-+    {
-+        if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) {
-+            fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_0565x0565(
-+                pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+                v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+            return 1;
-+        }
-+        if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) {
-+            fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_0565x0565(
-+                pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+                v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+            return 1;
-+        }
-+    }
-+
-+    if (op == PIXMAN_OP_SRC && (pSrc->bits.format == PIXMAN_x8r8g8b8 || pSrc->bits.format == PIXMAN_a8r8g8b8)
-+        && pDst->bits.format == PIXMAN_r5g6b5)
-+    {
-+        if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat != PIXMAN_REPEAT_NORMAL) {
-+            fbCompositeTransformNearestNonrotatedAffineTrivialclipSrc_8888x0565(
-+                pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+                v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+            return 1;
-+        }
-+        if (pSrc->common.filter == PIXMAN_FILTER_NEAREST && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL) {
-+            fbCompositeTransformNearestNonrotatedAffineTrivialclipRepeatSrc_8888x0565(
-+                pSrc, pDst, xSrc, ySrc, xDst, yDst, width, height,
-+                v.vector[0], v.vector[1], unit.vector[0], unit.vector[1]);
-+            return 1;
-+        }
-+    }
-+
-+    /* No fastpath scaling implemented for this case */
-+    return 0;
-+}
-+
- static void
- fast_path_composite (pixman_implementation_t *imp,
-                      pixman_op_t              op,
-@@ -1279,6 +2266,30 @@ fast_path_composite (pixman_implementation_t *imp,
-     if (src->type == BITS
-         && src->common.transform
-         && !mask
-+        && !src->common.alpha_map && !dest->common.alpha_map
-+        && (src->common.filter == PIXMAN_FILTER_NEAREST)
-+        && !src->bits.read_func && !src->bits.write_func
-+        && !dest->bits.read_func && !dest->bits.write_func)
-+    {
-+        /* ensure that the transform matrix only has a scale */
-+        if (src->common.transform->matrix[0][1] == 0 &&
-+            src->common.transform->matrix[1][0] == 0 &&
-+            src->common.transform->matrix[2][0] == 0 &&
-+            src->common.transform->matrix[2][1] == 0 &&
-+            src->common.transform->matrix[2][2] == pixman_fixed_1 &&
-+            dest->common.clip_region.data == NULL)
-+        {
-+            if (fbCompositeTransformNonrotatedAffineTrivialclip (op, src, mask, dest,
-+                    src_x, src_y, mask_x, mask_y, dest_x, dest_y, width, height))
-+            {
-+                return;
-+            }
-+        }
-+    }
-+
-+    if (src->type == BITS
-+        && src->common.transform
-+        && !mask
-         && op == PIXMAN_OP_SRC
-         && !src->common.alpha_map && !dest->common.alpha_map
-         && (src->common.filter == PIXMAN_FILTER_NEAREST)
diff --git a/recipes/xorg-lib/pixman/over-n-8-0565.patch b/recipes/xorg-lib/pixman/over-n-8-0565.patch
deleted file mode 100644
index 3911068d94..0000000000
--- a/recipes/xorg-lib/pixman/over-n-8-0565.patch
+++ /dev/null
@@ -1,231 +0,0 @@
-From de2221a32d0b6628116565563f7b4ccd0a44e8b6 Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Thu, 04 Mar 2010 23:20:25 +0000
-Subject: ARM: added 'armv6_composite_over_n_8_0565' fast path
-
-Provides ~3x performance improvement when working with
-data in L1 cache and memory. This fast path is important
-for fonts rendering when using 16bpp desktop.
-
-Microbenchmark from N800 (ARM11 @ 400MHz), measured in MPix/s:
-
-before:
-
- over_n_8_0565 = L1:   2.99 M:  2.86
-
-after:
-
- over_n_8_0565 = L1:   9.07 M:  8.05
----
-diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
-index 09a2888..c375c01 100644
---- a/pixman/pixman-arm-simd.c
-+++ b/pixman/pixman-arm-simd.c
-@@ -419,6 +419,193 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
-     }
- }
- 
-+#if defined(__ARM_EABI__) && defined(__linux__)
-+/*
-+ * ARMv6 assembly optimized version of 'composite_over_n_8_0565'. It is
-+ * a bare metal 'naked' function which uses all the available CPU registers
-+ * and is compatible with ARM EABI. It might (or might not) break when used
-+ * with a different ABI, anyway it is better to be safe than sorry.
-+ */
-+static void __attribute__((naked)) armv6_composite_over_n_8_0565_asm (
-+    uint16_t *dst, uint8_t *mask, uint32_t src, int w,
-+    int dst_stride_delta, int mask_stride_delta, int h)
-+{
-+    asm volatile (
-+        ".macro composite_internal_armv6_asm opaque_flag\n"
-+            /* save all registers (8 words) to stack */
-+            "stmdb   sp!, {r4-r11, ip, lr}\n"
-+            /* some register aliases for better readability */
-+            "DST     .req  r0\n"
-+            "MASK    .req  r1\n"
-+            "S       .req  r2\n"
-+            "W       .req  r3\n"
-+            "A       .req  r8\n"
-+            "D       .req  r10\n"
-+            "C0000FF .req  r11\n"
-+            "C00001F .req  r9\n"
-+            "C800080 .req  ip\n"
-+            "CE000E0 .req  lr\n"
-+            /* precalculate some stuff and put it on stack */
-+            "mov     r6, #0xF8\n"
-+            "mov     r7, #0xFC\n"
-+
-+            "str     W, [sp, #-8]!\n"
-+
-+            ".if \\opaque_flag\n"
-+                /* precalculate and save it to stack for later use:
-+                 * ((src >> 3) & 0x001F) |
-+                 * ((src >> 5) & 0x07E0) |
-+                 * ((src >> 8) & 0xF800)
-+                 */
-+                "mov     A, #0x1F\n"
-+                "and     D, A, S, lsr #3\n"
-+                "and     r4, S, #0xF80000\n"
-+                "and     r5, S, #0xFC00\n"
-+                "orr     D, r4, lsr #8\n"
-+                "orr     D, r5, lsr #5\n"
-+                "str     D, [sp, #4]\n"
-+            ".endif\n"
-+
-+            "ldr     D, [sp, #(8 + 10*4 + 8)]\n" /* h */
-+            "ldr     A, =0xFF00FF\n"
-+            "ldr     C800080, =0x800080\n"
-+            "ldr     CE000E0, =0xE000E0\n"
-+            "ldr     C0000FF, =0xFF\n"
-+            "ldr     C00001F, =0x1F\n"
-+            "and     r4, A, S\n"           /* r4 = src & 0x00FF00FF */
-+            "and     r5, A, S, lsr #8\n"   /* r5 = (src >> 8) & 0x00FF00FF */
-+            "stmdb   sp!, {r4, r5, r6, r7}\n"
-+        "0:\n"
-+            "subs    D, D, #1\n"
-+            "blt     6f\n"
-+        "1:\n"
-+            "subs    W, W, #1\n"
-+            "blt     5f\n"
-+        "2:\n"
-+            "ldrb    A, [MASK], #1\n"
-+            "ldmia   sp, {r4, r5, r6, r7}\n" /* load constants from stack */
-+            "add     DST, DST, #2\n"
-+            "cmp     A, #0\n"
-+            "beq     1b\n"
-+
-+            ".if \\opaque_flag\n"
-+                "cmp     A, #0xFF\n"
-+                "bne     3f\n"
-+                "ldr     D, [sp, #(4*4 + 4)]\n" /* load precalculated value */
-+                "subs    W, #1\n"
-+                "strh    D, [DST, #-2]\n"
-+                "bge     2b\n"
-+            ".endif\n"
-+
-+        "3:\n"
-+            "ldrh    D, [DST, #-2]\n"
-+            "mla     r4, A, r4, C800080\n"
-+            "mla     r5, A, r5, C800080\n"
-+            "and     r6, r6, D, lsl #3\n" /* & 0xF8 */
-+            "and     r7, r7, D, lsr #3\n" /* & 0xFC */
-+            "and     D, D, #0xF800\n"
-+            "bic     S, r4, #0xFF0000\n"
-+            "bic     A, r5, #0xFF0000\n"
-+            "add     r4, r4, S, lsr #8\n"
-+            "add     r5, r5, A, lsr #8\n"
-+
-+            "and     S, r7, #0xC0\n"
-+            "orr     r6, r6, D, lsl #8\n"
-+            "and     D, r6, CE000E0\n"
-+            "eor     A, C0000FF, r5, lsr #24\n"
-+            "orr     r6, D, lsr #5\n"
-+            "orr     r7, S, lsr #6\n"
-+
-+            "mla     r6, A, r6, C800080\n"
-+            "mla     r7, A, r7, C800080\n"
-+            "subs    W, #1\n"
-+            "bic     D, r6, #0xFF0000\n"
-+            "bic     A, r7, #0xFF0000\n"
-+            "add     r6, r6, D, lsr #8\n"
-+            "uqadd8  r4, r4, r6\n"
-+            "add     r7, r7, A, lsr #8\n"
-+            "uqadd8  r5, r5, r7\n"
-+            "and     D, C00001F, r4, lsr #11\n"
-+            "and     r4, r4, #0xF8000000\n"
-+            "and     r5, r5, #0xFC00\n"
-+            "orr     D, r4, lsr #16\n"
-+            "orr     D, r5, lsr #5\n"
-+            "strh    D, [DST, #-2]\n"
-+            "bge     2b\n"
-+        "5:\n"
-+            "ldr     r6, [sp, #(4*4 + 8 + 10*4 + 8)]\n" /* h */
-+            "ldr     r4, [sp, #(4*4 + 8 + 10*4 + 4)]\n" /* mask stride */
-+            "ldr     r5, [sp, #(4*4 + 8 + 10*4 + 0)]\n" /* dst stride */
-+            "ldr     W, [sp, #(4*4)]\n"
-+            "subs    r6, r6, #1\n" /* h */
-+            "str     r6, [sp, #(4*4 + 8 + 10*4 + 8)]\n" /* h */
-+            "add     MASK, MASK, r4\n"
-+            "add     DST, DST, r5, lsl #1\n"
-+            "bgt     1b\n"
-+        "6:\n"
-+            "add     sp, sp, #(4*4 + 8)\n"
-+            /* restore all registers and return */
-+            "ldmia   sp!, {r4-r11, ip, pc}\n"
-+            ".unreq DST\n"
-+            ".unreq MASK\n"
-+            ".unreq S\n"
-+            ".unreq W\n"
-+            ".unreq A\n"
-+            ".unreq D\n"
-+            ".unreq C0000FF\n"
-+            ".unreq C00001F\n"
-+            ".unreq C800080\n"
-+            ".unreq CE000E0\n"
-+        ".endm\n"
-+
-+        "mov     ip, r2, lsr #24\n"
-+        "cmp     ip, #0xFF\n"
-+        "beq     9f\n"
-+        "composite_internal_armv6_asm 0\n"
-+    "9:\n"
-+        "composite_internal_armv6_asm 1\n"
-+        ".ltorg\n"
-+        ".purgem composite_internal_armv6_asm\n"
-+    );
-+}
-+
-+static void
-+armv6_composite_over_n_8_0565 (pixman_implementation_t * impl,
-+			       pixman_op_t               op,
-+			       pixman_image_t *          src_image,
-+			       pixman_image_t *          mask_image,
-+			       pixman_image_t *          dst_image,
-+			       int32_t                   src_x,
-+			       int32_t                   src_y,
-+			       int32_t                   mask_x,
-+			       int32_t                   mask_y,
-+			       int32_t                   dest_x,
-+			       int32_t                   dest_y,
-+			       int32_t                   width,
-+			       int32_t                   height)
-+{
-+    uint32_t src;
-+    uint16_t *dst;
-+    uint8_t  *mask;
-+    int dst_stride, mask_stride;
-+
-+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-+
-+    /* bail out if fully transparent */
-+    if (src == 0)
-+	return;
-+
-+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
-+			   dst_stride, dst, 1);
-+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t,
-+			   mask_stride, mask, 1);
-+
-+    armv6_composite_over_n_8_0565_asm (dst, mask, src, width,
-+	dst_stride - width, mask_stride - width, height);
-+}
-+
-+#endif
-+
- static const pixman_fast_path_t arm_simd_fast_paths[] =
- {
-     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, arm_composite_over_8888_8888),
-@@ -434,7 +621,10 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
-     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, arm_composite_over_n_8_8888),
-     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, arm_composite_over_n_8_8888),
-     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, arm_composite_over_n_8_8888),
--
-+#if defined(__ARM_EABI__) && defined(__linux__)
-+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, armv6_composite_over_n_8_0565),
-+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, armv6_composite_over_n_8_0565),
-+#endif
-     { PIXMAN_OP_NONE },
- };
- 
---
-cgit v0.8.3-6-g21f6
diff --git a/recipes/xorg-lib/pixman/pixman-0.13.2-neon1.patch b/recipes/xorg-lib/pixman/pixman-0.13.2-neon1.patch
deleted file mode 100644
index b3bb762415..0000000000
--- a/recipes/xorg-lib/pixman/pixman-0.13.2-neon1.patch
+++ /dev/null
@@ -1,1702 +0,0 @@
-diff --git a/configure.ac b/configure.ac
-index 063f6eb..bada55c 100644
---- a/configure.ac
-+++ b/configure.ac
-@@ -278,11 +278,12 @@ AC_SUBST(VMX_CFLAGS)
- AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
- 
- dnl Check for ARM SIMD instructions
-+ARM_SIMD_CFLAGS=""
- 
- have_arm_simd=no
- AC_MSG_CHECKING(whether to use ARM SIMD assembler)
- xserver_save_CFLAGS=$CFLAGS
--CFLAGS="$CFLAGS $ARM_CFLAGS"
-+CFLAGS="$CFLAGS $ARM_SIMD_CFLAGS"
- AC_COMPILE_IFELSE([
- int main () {
-     asm("uqadd8 r1, r1, r2");
-@@ -302,7 +303,7 @@ fi
- if test $have_arm_simd = yes ; then
-    AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD compiler intrinsics])
- else
--   ARM_CFLAGS=
-+   ARM_SIMD_CFLAGS=
- fi
- 
- AC_MSG_RESULT($have_arm_simd)
-@@ -310,9 +311,48 @@ if test $enable_arm_simd = yes && test $have_arm_simd = no ; then
-    AC_MSG_ERROR([ARM SIMD intrinsics not detected])
- fi
- 
--AC_SUBST(ARM_CFLAGS)
-+dnl Check for ARM NEON instructions
-+ARM_NEON_CFLAGS="-mcpu=cortex-a8 -mfpu=neon"
-+
-+have_arm_neon=no
-+AC_MSG_CHECKING(whether to use ARM NEON)
-+xserver_save_CFLAGS=$CFLAGS
-+CFLAGS="$CFLAGS $ARM_NEON_CFLAGS"
-+AC_COMPILE_IFELSE([
-+#include <arm_neon.h>
-+int main () {
-+    uint8x8_t neon_test=vmov_n_u8(0);
-+    return 0;
-+}], have_arm_neon=yes)
-+CFLAGS=$xserver_save_CFLAGS
-+
-+AC_ARG_ENABLE(arm-neon,
-+   [AC_HELP_STRING([--disable-arm-neon],
-+                   [disable ARM NEON fast paths])],
-+   [enable_arm_neon=$enableval], [enable_arm_neon=auto])
-+
-+if test $enable_arm_neon = no ; then
-+   have_arm_neon=disabled
-+fi
-+
-+if test $have_arm_neon = yes ; then
-+   AC_DEFINE(USE_ARM_NEON, 1, [use ARM NEON compiler intrinsics])
-+else
-+   ARM_NEON_CFLAGS=
-+fi
-+
-+AC_MSG_RESULT($have_arm_neon)
-+if test $enable_arm_neon = yes && test $have_arm_neon = no ; then
-+   AC_MSG_ERROR([ARM NEON intrinsics not detected])
-+fi
-+
-+
-+AC_SUBST(ARM_SIMD_CFLAGS)
-+AC_SUBST(ARM_NEON_CFLAGS)
- 
- AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
-+AM_CONDITIONAL(USE_ARM_NEON, test $have_arm_neon = yes)
-+
- 
- 
- AC_ARG_ENABLE(gtk,
-diff --git a/pixman/Makefile.am b/pixman/Makefile.am
-index c4612ea..4c1ec6b 100644
---- a/pixman/Makefile.am
-+++ b/pixman/Makefile.am
-@@ -80,15 +80,26 @@ libpixman_sse2_la_LIBADD = $(DEP_LIBS)
- libpixman_1_la_LIBADD += libpixman-sse2.la
- endif
- 
--# arm code
-+# arm simd code
- if USE_ARM_SIMD
- noinst_LTLIBRARIES += libpixman-arm-simd.la
- libpixman_arm_simd_la_SOURCES = \
- 	pixman-arm-simd.c \
- 	pixman-arm-simd.h
--libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) $(ARM_CFLAGS)
-+libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) $(ARM_SIMD_CFLAGS)
- libpixman_arm_simd_la_LIBADD = $(DEP_LIBS)
- libpixman_1_la_LIBADD += libpixman-arm-simd.la
- endif
- 
-+# arm neon code
-+if USE_ARM_NEON
-+noinst_LTLIBRARIES += libpixman-arm-neon.la
-+libpixman_arm_neon_la_SOURCES = \
-+        pixman-arm-neon.c \
-+        pixman-arm-neon.h
-+libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS) $(ARM_NEON_CFLAGS)
-+libpixman_arm_neon_la_LIBADD = $(DEP_LIBS)
-+libpixman_1_la_LIBADD += libpixman-arm-neon.la
-+endif
-+
- 
-diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-new file mode 100644
-index 0000000..10050e4
---- /dev/null
-+++ b/pixman/pixman-arm-neon.c
-@@ -0,0 +1,1387 @@
-+/*
-+ * Copyright © 2009 Mozilla Corporation
-+ *
-+ * Permission to use, copy, modify, distribute, and sell this software and its
-+ * documentation for any purpose is hereby granted without fee, provided that
-+ * the above copyright notice appear in all copies and that both that
-+ * copyright notice and this permission notice appear in supporting
-+ * documentation, and that the name of Mozilla Corporation not be used in
-+ * advertising or publicity pertaining to distribution of the software without
-+ * specific, written prior permission.  Mozilla Corporation makes no
-+ * representations about the suitability of this software for any purpose.  It
-+ * is provided "as is" without express or implied warranty.
-+ *
-+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
-+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
-+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
-+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
-+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
-+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
-+ * SOFTWARE.
-+ *
-+ * Author:  Ian Rickards (ian.rickards@arm.com) 
-+ *
-+ */
-+
-+#ifdef HAVE_CONFIG_H
-+#include <config.h>
-+#endif
-+
-+#include "pixman-arm-neon.h"
-+
-+#include <arm_neon.h>
-+
-+
-+#if !defined(__ARMCC_VERSION) && !defined(FORCE_NO_NEON_INLINE_ASM)
-+// [both armcc & gcc set __GNUC__]
-+// Use GNU style inline asm on gcc, for best performance
-+// Use intrinsics on armcc
-+// This switch determines if any GNU style inline asm is allowed
-+#define USE_NEON_INLINE_ASM
-+#endif
-+
-+
-+static force_inline uint8x8x4_t unpack0565(uint16x8_t rgb)
-+{
-+    uint16x8_t gb, b;
-+    uint8x8x4_t res;
-+
-+    res.val[3] = vdup_n_u8(0);
-+    gb = vshrq_n_u16(rgb, 5);
-+    b = vshrq_n_u16(rgb, 5+6);
-+    res.val[0] = vmovn_u16(rgb);  // get low 5 bits
-+    res.val[1] = vmovn_u16(gb);   // get mid 6 bits
-+    res.val[2] = vmovn_u16(b);    // get top 5 bits
-+
-+    res.val[0] = vshl_n_u8(res.val[0], 3); // shift to top
-+    res.val[1] = vshl_n_u8(res.val[1], 2); // shift to top
-+    res.val[2] = vshl_n_u8(res.val[2], 3); // shift to top
-+
-+    res.val[0] = vsri_n_u8(res.val[0], res.val[0], 5); 
-+    res.val[1] = vsri_n_u8(res.val[1], res.val[1], 6);
-+    res.val[2] = vsri_n_u8(res.val[2], res.val[2], 5);
-+
-+    return res;
-+}
-+
-+static force_inline uint16x8_t pack0565(uint8x8x4_t s)
-+{
-+    uint16x8_t rgb, val_g, val_r;
-+
-+    rgb = vshll_n_u8(s.val[2],8);
-+    val_g = vshll_n_u8(s.val[1],8);
-+    val_r = vshll_n_u8(s.val[0],8);
-+    rgb = vsriq_n_u16(rgb, val_g, 5);
-+    rgb = vsriq_n_u16(rgb, val_r, 5+6);
-+
-+    return rgb;
-+}
-+
-+static force_inline uint8x8_t neon2mul(uint8x8_t x, uint8x8_t alpha)
-+{
-+    uint16x8_t tmp,tmp2;
-+    uint8x8_t res;
-+
-+    tmp = vmull_u8(x,alpha);
-+    tmp2 = vrshrq_n_u16(tmp,8);
-+    res = vraddhn_u16(tmp,tmp2);
-+
-+    return res;
-+}
-+
-+static force_inline uint8x8x4_t neon8mul(uint8x8x4_t x, uint8x8_t alpha)
-+{
-+    uint16x8x4_t tmp;
-+    uint8x8x4_t res;
-+    uint16x8_t qtmp1,qtmp2;
-+
-+    tmp.val[0] = vmull_u8(x.val[0],alpha);
-+    tmp.val[1] = vmull_u8(x.val[1],alpha);
-+    tmp.val[2] = vmull_u8(x.val[2],alpha);
-+    tmp.val[3] = vmull_u8(x.val[3],alpha);
-+
-+    qtmp1 = vrshrq_n_u16(tmp.val[0],8);
-+    qtmp2 = vrshrq_n_u16(tmp.val[1],8);
-+    res.val[0] = vraddhn_u16(tmp.val[0],qtmp1);
-+    qtmp1 = vrshrq_n_u16(tmp.val[2],8);
-+    res.val[1] = vraddhn_u16(tmp.val[1],qtmp2);
-+    qtmp2 = vrshrq_n_u16(tmp.val[3],8);
-+    res.val[2] = vraddhn_u16(tmp.val[2],qtmp1);
-+    res.val[3] = vraddhn_u16(tmp.val[3],qtmp2);
-+
-+    return res;
-+}
-+
-+static force_inline uint8x8x4_t neon8qadd(uint8x8x4_t x, uint8x8x4_t y)
-+{
-+    uint8x8x4_t res;
-+
-+    res.val[0] = vqadd_u8(x.val[0],y.val[0]);
-+    res.val[1] = vqadd_u8(x.val[1],y.val[1]);
-+    res.val[2] = vqadd_u8(x.val[2],y.val[2]);
-+    res.val[3] = vqadd_u8(x.val[3],y.val[3]);
-+
-+    return res;
-+}
-+
-+
-+void
-+fbCompositeSrcAdd_8000x8000neon (pixman_op_t op,
-+                                pixman_image_t * pSrc,
-+                                pixman_image_t * pMask,
-+                                pixman_image_t * pDst,
-+                                int16_t      xSrc,
-+                                int16_t      ySrc,
-+                                int16_t      xMask,
-+                                int16_t      yMask,
-+                                int16_t      xDst,
-+                                int16_t      yDst,
-+                                uint16_t     width,
-+                                uint16_t     height)
-+{
-+    uint8_t     *dstLine, *dst;
-+    uint8_t     *srcLine, *src;
-+    int dstStride, srcStride;
-+    uint16_t    w;
-+
-+    fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
-+    fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
-+
-+    if (width>=8)
-+    {
-+        // Use overlapping 8-pixel method
-+        while (height--)
-+        {
-+            dst = dstLine;
-+            dstLine += dstStride;
-+            src = srcLine;
-+            srcLine += srcStride;
-+            w = width;
-+
-+            uint8_t *keep_dst;
-+
-+#ifndef USE_NEON_INLINE_ASM
-+            uint8x8_t sval,dval,temp;
-+
-+            sval = vld1_u8((void*)src);
-+            dval = vld1_u8((void*)dst);
-+            keep_dst = dst;
-+
-+            temp = vqadd_u8(dval,sval);
-+
-+            src += (w & 7);
-+            dst += (w & 7);
-+            w -= (w & 7);
-+
-+            while (w)
-+            {
-+                sval = vld1_u8((void*)src);
-+                dval = vld1_u8((void*)dst);
-+
-+                vst1_u8((void*)keep_dst,temp);
-+                keep_dst = dst;
-+
-+                temp = vqadd_u8(dval,sval);
-+
-+                src+=8;
-+                dst+=8;
-+                w-=8;
-+            }
-+            vst1_u8((void*)keep_dst,temp);
-+#else
-+            asm volatile (
-+// avoid using d8-d15 (q4-q7) aapcs callee-save registers
-+                        "vld1.8  {d0}, [%[src]]\n\t"
-+                        "vld1.8  {d4}, [%[dst]]\n\t"
-+                        "mov     %[keep_dst], %[dst]\n\t"
-+
-+                        "and ip, %[w], #7\n\t"
-+                        "add %[src], %[src], ip\n\t"
-+                        "add %[dst], %[dst], ip\n\t"
-+                        "subs %[w], %[w], ip\n\t"
-+                        "b 9f\n\t"
-+// LOOP
-+                        "2:\n\t"
-+                        "vld1.8  {d0}, [%[src]]!\n\t"
-+                        "vld1.8  {d4}, [%[dst]]!\n\t"
-+                        "vst1.8  {d20}, [%[keep_dst]]\n\t"
-+                        "sub     %[keep_dst], %[dst], #8\n\t"
-+                        "subs %[w], %[w], #8\n\t"
-+                        "9:\n\t"
-+                        "vqadd.u8 d20, d0, d4\n\t"
-+
-+                        "bne 2b\n\t"
-+
-+                        "1:\n\t"
-+                        "vst1.8  {d20}, [%[keep_dst]]\n\t"
-+
-+                        : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "+r" (keep_dst)
-+                        :
-+                        : "ip", "cc", "memory", "d0","d4",
-+                          "d20"
-+                        );
-+#endif
-+        }
-+    }
-+    else
-+    {
-+        while (height--)
-+        {
-+            dst = dstLine;
-+            dstLine += dstStride;
-+            src = srcLine;
-+            srcLine += srcStride;
-+            w = width;
-+            uint8x8_t sval, dval;
-+            uint8_t *dst4, *dst2;
-+
-+            if (w&4)
-+            {
-+                sval = vreinterpret_u8_u32(vld1_lane_u32((void*)src,vreinterpret_u32_u8(sval),1));
-+                dval = vreinterpret_u8_u32(vld1_lane_u32((void*)dst,vreinterpret_u32_u8(dval),1));
-+                dst4=dst;
-+                src+=4;
-+                dst+=4;
-+            }
-+            if (w&2)
-+            {
-+                sval = vreinterpret_u8_u16(vld1_lane_u16((void*)src,vreinterpret_u16_u8(sval),1));
-+                dval = vreinterpret_u8_u16(vld1_lane_u16((void*)dst,vreinterpret_u16_u8(dval),1));
-+                dst2=dst;
-+                src+=2;
-+                dst+=2;
-+            }
-+            if (w&1)
-+            {
-+                sval = vld1_lane_u8((void*)src,sval,1);
-+                dval = vld1_lane_u8((void*)dst,dval,1);
-+            }
-+
-+            dval = vqadd_u8(dval,sval);
-+
-+            if (w&1)
-+                vst1_lane_u8((void*)dst,dval,1);
-+            if (w&2)
-+                vst1_lane_u16((void*)dst2,vreinterpret_u16_u8(dval),1);
-+            if (w&4)
-+                vst1_lane_u32((void*)dst4,vreinterpret_u32_u8(dval),1);
-+        }
-+    }
-+}
-+
-+
-+void
-+fbCompositeSrc_8888x8888neon (pixman_op_t op,
-+			 pixman_image_t * pSrc,
-+			 pixman_image_t * pMask,
-+			 pixman_image_t * pDst,
-+			 int16_t      xSrc,
-+			 int16_t      ySrc,
-+			 int16_t      xMask,
-+			 int16_t      yMask,
-+			 int16_t      xDst,
-+			 int16_t      yDst,
-+			 uint16_t     width,
-+			 uint16_t     height)
-+{
-+    uint32_t	*dstLine, *dst;
-+    uint32_t	*srcLine, *src;
-+    int	dstStride, srcStride;
-+    uint32_t	w;
-+
-+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-+
-+    if (width>=8)
-+    {
-+        // Use overlapping 8-pixel method  
-+        while (height--)
-+        {
-+	    dst = dstLine;
-+	    dstLine += dstStride;
-+	    src = srcLine;
-+	    srcLine += srcStride;
-+	    w = width;
-+
-+            uint32_t *keep_dst;
-+
-+#ifndef USE_NEON_INLINE_ASM
-+            uint8x8x4_t sval,dval,temp;
-+
-+            sval = vld4_u8((void*)src);
-+            dval = vld4_u8((void*)dst);
-+            keep_dst = dst;
-+
-+            temp = neon8mul(dval,vmvn_u8(sval.val[3]));
-+            temp = neon8qadd(sval,temp);
-+
-+            src += (w & 7);
-+            dst += (w & 7);
-+            w -= (w & 7);
-+
-+            while (w)
-+            {
-+                sval = vld4_u8((void*)src);
-+                dval = vld4_u8((void*)dst);
-+
-+                vst4_u8((void*)keep_dst,temp);
-+                keep_dst = dst;
-+
-+                temp = neon8mul(dval,vmvn_u8(sval.val[3]));
-+                temp = neon8qadd(sval,temp);
-+
-+                src+=8;
-+                dst+=8;
-+                w-=8;
-+            }
-+            vst4_u8((void*)keep_dst,temp);
-+#else
-+            asm volatile (
-+// avoid using d8-d15 (q4-q7) aapcs callee-save registers
-+                        "vld4.8  {d0-d3}, [%[src]]\n\t"
-+                        "vld4.8  {d4-d7}, [%[dst]]\n\t"
-+                        "mov     %[keep_dst], %[dst]\n\t"
-+
-+                        "and ip, %[w], #7\n\t"
-+                        "add %[src], %[src], ip, LSL#2\n\t"
-+                        "add %[dst], %[dst], ip, LSL#2\n\t"
-+                        "subs %[w], %[w], ip\n\t"
-+                        "b 9f\n\t"
-+// LOOP
-+                        "2:\n\t"
-+                        "vld4.8  {d0-d3}, [%[src]]!\n\t"
-+                        "vld4.8  {d4-d7}, [%[dst]]!\n\t"
-+                        "vst4.8  {d20-d23}, [%[keep_dst]]\n\t"
-+                        "sub     %[keep_dst], %[dst], #8*4\n\t"
-+                        "subs %[w], %[w], #8\n\t"
-+                        "9:\n\t"
-+                        "vmvn.8  d31, d3\n\t"
-+                        "vmull.u8 q10, d31, d4\n\t"
-+                        "vmull.u8 q11, d31, d5\n\t"
-+                        "vmull.u8 q12, d31, d6\n\t"
-+                        "vmull.u8 q13, d31, d7\n\t"
-+                        "vrshr.u16 q8, q10, #8\n\t"
-+                        "vrshr.u16 q9, q11, #8\n\t"
-+                        "vraddhn.u16 d20, q10, q8\n\t"
-+                        "vraddhn.u16 d21, q11, q9\n\t"
-+                        "vrshr.u16 q8, q12, #8\n\t"
-+                        "vrshr.u16 q9, q13, #8\n\t"
-+                        "vraddhn.u16 d22, q12, q8\n\t"
-+                        "vraddhn.u16 d23, q13, q9\n\t"
-+// result in d20-d23
-+                        "vqadd.u8 d20, d0, d20\n\t"
-+                        "vqadd.u8 d21, d1, d21\n\t"
-+                        "vqadd.u8 d22, d2, d22\n\t"
-+                        "vqadd.u8 d23, d3, d23\n\t"
-+
-+                        "bne 2b\n\t"
-+
-+                        "1:\n\t"
-+                        "vst4.8  {d20-d23}, [%[keep_dst]]\n\t"
-+
-+                        : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "+r" (keep_dst)
-+                        : 
-+                        : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
-+                          "d16","d17","d18","d19","d20","d21","d22","d23"
-+                        );
-+#endif
-+        }
-+    }
-+    else
-+    {
-+        uint8x8_t    alpha_selector=vreinterpret_u8_u64(vcreate_u64(0x0707070703030303ULL));
-+
-+        // Handle width<8
-+        while (height--)
-+        {
-+            dst = dstLine;
-+            dstLine += dstStride;
-+            src = srcLine;
-+            srcLine += srcStride;
-+            w = width;
-+
-+            while (w>=2)
-+            {
-+                uint8x8_t sval,dval;
-+
-+                /* two 32-bit pixels packed into D-reg; ad-hoc vectorization */
-+                sval = vreinterpret_u8_u32(vld1_u32((void*)src));
-+                dval = vreinterpret_u8_u32(vld1_u32((void*)dst));
-+                dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval),alpha_selector));
-+                vst1_u8((void*)dst,vqadd_u8(sval,dval));
-+
-+                src+=2;
-+                dst+=2;
-+                w-=2;
-+            }
-+
-+            if (w)
-+            {
-+                uint8x8_t sval,dval;
-+
-+                /* single 32-bit pixel in lane 0 */
-+                sval = vreinterpret_u8_u32(vld1_dup_u32((void*)src));  // only interested in lane 0
-+                dval = vreinterpret_u8_u32(vld1_dup_u32((void*)dst));  // only interested in lane 0
-+                dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval),alpha_selector));
-+                vst1_lane_u32((void*)dst,vreinterpret_u32_u8(vqadd_u8(sval,dval)),0);
-+            }
-+        }
-+    }
-+}
-+
-+
-+
-+void
-+fbCompositeSrc_x888x0565neon (pixman_op_t op,
-+                          pixman_image_t * pSrc,
-+                          pixman_image_t * pMask,
-+                          pixman_image_t * pDst,
-+                          int16_t      xSrc,
-+                          int16_t      ySrc,
-+                          int16_t      xMask,
-+                          int16_t      yMask,
-+                          int16_t      xDst,
-+                          int16_t      yDst,
-+                          uint16_t     width,
-+                          uint16_t     height)
-+{
-+    uint16_t    *dstLine, *dst;
-+    uint32_t    *srcLine, *src;
-+    int dstStride, srcStride;
-+    uint32_t    w;
-+
-+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-+    fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+
-+    if (width>=8)
-+    {
-+        while (height--)
-+        {
-+            dst = dstLine;
-+            dstLine += dstStride;
-+            src = srcLine;
-+            srcLine += srcStride;
-+            w = width;
-+
-+	    do {
-+	        while (w>=8)
-+	        {
-+#ifndef USE_NEON_INLINE_ASM
-+	            vst1q_u16(dst, pack0565(vld4_u8((void*)src)));
-+#else
-+                    asm volatile (
-+                        "vld4.8       {d4-d7}, [%[src]]\n\t"
-+                        "vshll.u8     q0, d6, #8\n\t"
-+                        "vshll.u8     q1, d5, #8\n\t"
-+                        "vsriq.u16    q0, q1, #5\t\n"
-+                        "vshll.u8     q1, d4, #8\n\t"
-+                        "vsriq.u16    q0, q1, #11\t\n"
-+                        "vst1.16      {q0}, [%[dst]]\n\t"
-+                        :
-+                        : [dst] "r" (dst), [src] "r" (src)
-+                        : "memory", "d0","d1","d2","d3","d4","d5","d6","d7"
-+                        );
-+#endif
-+	            src+=8;
-+	            dst+=8;
-+	            w-=8;
-+          	}
-+                if (w != 0)
-+                {
-+                    src -= (8-w);
-+                    dst -= (8-w);
-+                    w = 8;  // do another vector
-+                }
-+            } while (w!=0);
-+        }
-+    }
-+    else
-+    {
-+        // Handle width<8
-+        while (height--)
-+        {
-+            dst = dstLine;
-+            dstLine += dstStride;
-+            src = srcLine;
-+            srcLine += srcStride;
-+            w = width;
-+
-+	    while (w>=2)
-+	    {
-+	        uint32x2_t sval, rgb, g, b;
-+	        sval = vld1_u32(src);
-+	        rgb = vshr_n_u32(sval,8-5); // r (5 bits) 
-+	        g = vshr_n_u32(sval,8+8-6);  // g to bottom byte
-+	        rgb = vsli_n_u32(rgb, g, 5);
-+	        b = vshr_n_u32(sval,8+8+8-5);  // b to bottom byte
-+                rgb = vsli_n_u32(rgb, b, 11);
-+	        vst1_lane_u16(dst++,vreinterpret_u16_u32(rgb),0);
-+	        vst1_lane_u16(dst++,vreinterpret_u16_u32(rgb),2);
-+	        src+=2;
-+	        w-=2;
-+	    }
-+            if (w)
-+            {
-+                uint32x2_t sval, rgb, g, b;
-+                sval = vld1_dup_u32(src);
-+                rgb = vshr_n_u32(sval,8-5); // r (5 bits)
-+                g = vshr_n_u32(sval,8+8-6);  // g to bottom byte
-+                rgb = vsli_n_u32(rgb, g, 5);
-+                b = vshr_n_u32(sval,8+8+8-5);  // b to bottom byte
-+                rgb = vsli_n_u32(rgb, b, 11);
-+                vst1_lane_u16(dst++,vreinterpret_u16_u32(rgb),0);
-+            }
-+	}
-+    }
-+}
-+
-+
-+void
-+fbCompositeSrc_8888x8x8888neon (pixman_op_t op,
-+			       pixman_image_t * pSrc,
-+			       pixman_image_t * pMask,
-+			       pixman_image_t * pDst,
-+			       int16_t	xSrc,
-+			       int16_t	ySrc,
-+			       int16_t      xMask,
-+			       int16_t      yMask,
-+			       int16_t      xDst,
-+			       int16_t      yDst,
-+			       uint16_t     width,
-+			       uint16_t     height)
-+{
-+    uint32_t	*dstLine, *dst;
-+    uint32_t	*srcLine, *src;
-+    uint32_t	mask;
-+    int	dstStride, srcStride;
-+    uint32_t	w;
-+    uint8x8_t mask_alpha;
-+
-+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-+
-+    fbComposeGetSolid (pMask, mask, pDst->bits.format);
-+    mask_alpha = vdup_n_u8((mask) >> 24);
-+
-+    if (width>=8)
-+    {
-+        // Use overlapping 8-pixel method
-+        while (height--)
-+        {
-+            dst = dstLine;
-+            dstLine += dstStride;
-+            src = srcLine;
-+            srcLine += srcStride;
-+            w = width;
-+
-+            uint32_t *keep_dst;
-+
-+#ifndef USE_NEON_INLINE_ASM
-+            uint8x8x4_t sval,dval,temp;
-+
-+            sval = vld4_u8((void*)src);
-+            dval = vld4_u8((void*)dst);
-+            keep_dst = dst;
-+
-+            sval = neon8mul(sval,mask_alpha);
-+            temp = neon8mul(dval,vmvn_u8(sval.val[3]));
-+            temp = neon8qadd(sval,temp);
-+
-+            src += (w & 7);
-+            dst += (w & 7);
-+            w -= (w & 7);
-+
-+            while (w)
-+            {
-+                sval = vld4_u8((void*)src);
-+                dval = vld4_u8((void*)dst);
-+
-+                vst4_u8((void*)keep_dst,temp);
-+                keep_dst = dst;
-+
-+                sval = neon8mul(sval,mask_alpha);
-+                temp = neon8mul(dval,vmvn_u8(sval.val[3]));
-+                temp = neon8qadd(sval,temp);
-+
-+                src+=8;
-+                dst+=8;
-+                w-=8;
-+            }
-+            vst4_u8((void*)keep_dst,temp);
-+#else
-+            asm volatile (
-+// avoid using d8-d15 (q4-q7) aapcs callee-save registers
-+                        "vdup.32      d30, %[mask]\n\t"
-+                        "vdup.8       d30, d30[3]\n\t"
-+
-+                        "vld4.8       {d0-d3}, [%[src]]\n\t"
-+                        "vld4.8       {d4-d7}, [%[dst]]\n\t"
-+                        "mov  %[keep_dst], %[dst]\n\t"
-+
-+                        "and  ip, %[w], #7\n\t"
-+                        "add  %[src], %[src], ip, LSL#2\n\t"
-+                        "add  %[dst], %[dst], ip, LSL#2\n\t"
-+                        "subs  %[w], %[w], ip\n\t"
-+                        "b 9f\n\t"
-+// LOOP
-+                        "2:\n\t"
-+                        "vld4.8       {d0-d3}, [%[src]]!\n\t"
-+                        "vld4.8       {d4-d7}, [%[dst]]!\n\t"
-+                        "vst4.8       {d20-d23}, [%[keep_dst]]\n\t"
-+                        "sub  %[keep_dst], %[dst], #8*4\n\t"
-+                        "subs  %[w], %[w], #8\n\t"
-+
-+                        "9:\n\t"
-+                        "vmull.u8     q10, d30, d0\n\t"
-+                        "vmull.u8     q11, d30, d1\n\t"
-+                        "vmull.u8     q12, d30, d2\n\t"
-+                        "vmull.u8     q13, d30, d3\n\t"
-+                        "vrshr.u16    q8, q10, #8\n\t"
-+                        "vrshr.u16    q9, q11, #8\n\t"
-+                        "vraddhn.u16  d0, q10, q8\n\t"
-+                        "vraddhn.u16  d1, q11, q9\n\t"
-+                        "vrshr.u16    q9, q13, #8\n\t"
-+                        "vrshr.u16    q8, q12, #8\n\t"
-+                        "vraddhn.u16  d3, q13, q9\n\t"
-+                        "vraddhn.u16  d2, q12, q8\n\t"
-+
-+                        "vmvn.8       d31, d3\n\t"
-+                        "vmull.u8     q10, d31, d4\n\t"
-+                        "vmull.u8     q11, d31, d5\n\t"
-+                        "vmull.u8     q12, d31, d6\n\t"
-+                        "vmull.u8     q13, d31, d7\n\t"
-+                        "vrshr.u16    q8, q10, #8\n\t"
-+                        "vrshr.u16    q9, q11, #8\n\t"
-+                        "vraddhn.u16  d20, q10, q8\n\t"
-+                        "vrshr.u16    q8, q12, #8\n\t"
-+                        "vraddhn.u16  d21, q11, q9\n\t"
-+                        "vrshr.u16    q9, q13, #8\n\t"
-+                        "vraddhn.u16  d22, q12, q8\n\t"
-+                        "vraddhn.u16  d23, q13, q9\n\t"
-+// result in d20-d23
-+                        "vqadd.u8     d20, d0, d20\n\t"
-+                        "vqadd.u8     d21, d1, d21\n\t"
-+                        "vqadd.u8     d22, d2, d22\n\t"
-+                        "vqadd.u8     d23, d3, d23\n\t"
-+
-+                        "bne  2b\n\t"
-+
-+                        "1:\n\t"
-+                        "vst4.8       {d20-d23}, [%[keep_dst]]\n\t"
-+
-+                        : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "+r" (keep_dst)
-+                        : [mask] "r" (mask)
-+                        : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
-+                          "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27",
-+                          "d30","d31"
-+                        );
-+#endif
-+        }
-+    }
-+    else
-+    {
-+        uint8x8_t    alpha_selector=vreinterpret_u8_u64(vcreate_u64(0x0707070703030303ULL));
-+
-+        // Handle width<8
-+        while (height--)
-+        {
-+            dst = dstLine;
-+            dstLine += dstStride;
-+            src = srcLine;
-+            srcLine += srcStride;
-+            w = width;
-+
-+            while (w>=2)
-+            {
-+                uint8x8_t sval,dval;
-+
-+                sval = vreinterpret_u8_u32(vld1_u32((void*)src));
-+                dval = vreinterpret_u8_u32(vld1_u32((void*)dst));
-+
-+                /* sval * const alpha_mul */
-+                sval = neon2mul(sval,mask_alpha);
-+
-+                /* dval * 255-(src alpha) */
-+                dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval), alpha_selector));
-+
-+                vst1_u8((void*)dst,vqadd_u8(sval,dval));
-+
-+                src+=2;
-+                dst+=2;
-+                w-=2;
-+            }
-+
-+            if (w)
-+            {
-+                uint8x8_t sval,dval;
-+
-+                sval = vreinterpret_u8_u32(vld1_dup_u32((void*)src));
-+                dval = vreinterpret_u8_u32(vld1_dup_u32((void*)dst));
-+
-+                /* sval * const alpha_mul */
-+                sval = neon2mul(sval,mask_alpha);
-+
-+                /* dval * 255-(src alpha) */
-+                dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval), alpha_selector));
-+
-+                vst1_lane_u32((void*)dst,vreinterpret_u32_u8(vqadd_u8(sval,dval)),0);
-+            }
-+        }
-+    }
-+}
-+
-+
-+
-+void
-+fbCompositeSolidMask_nx8x0565neon (pixman_op_t op,
-+                               pixman_image_t * pSrc,
-+                               pixman_image_t * pMask,
-+                               pixman_image_t * pDst,
-+                               int16_t      xSrc,
-+                               int16_t      ySrc,
-+                               int16_t      xMask,
-+                               int16_t      yMask,
-+                               int16_t      xDst,
-+                               int16_t      yDst,
-+                               uint16_t     width,
-+                               uint16_t     height)
-+{
-+    uint32_t     src, srca;
-+    uint16_t    *dstLine, *dst;
-+    uint8_t     *maskLine, *mask;
-+    int          dstStride, maskStride;
-+    uint32_t     w;
-+    uint8x8_t    sval2;
-+    uint8x8x4_t  sval8;
-+
-+    fbComposeGetSolid(pSrc, src, pDst->bits.format);
-+
-+    srca = src >> 24;
-+    if (src == 0)
-+        return;
-+
-+    sval2=vreinterpret_u8_u32(vdup_n_u32(src));
-+    sval8.val[0]=vdup_lane_u8(sval2,0);
-+    sval8.val[1]=vdup_lane_u8(sval2,1);
-+    sval8.val[2]=vdup_lane_u8(sval2,2);
-+    sval8.val[3]=vdup_lane_u8(sval2,3);
-+
-+    fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+    fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
-+
-+    if (width>=8)
-+    {
-+        // Use overlapping 8-pixel method, modified to avoid rewritten dest being reused
-+        while (height--)
-+        {
-+            uint16_t *keep_dst;
-+
-+            dst = dstLine;
-+            dstLine += dstStride;
-+            mask = maskLine;
-+            maskLine += maskStride;
-+            w = width;
-+
-+#ifndef USE_NEON_INLINE_ASM
-+            uint8x8_t alpha;
-+            uint16x8_t dval, temp; 
-+            uint8x8x4_t sval8temp;
-+
-+            alpha = vld1_u8((void*)mask);
-+            dval = vld1q_u16((void*)dst);
-+            keep_dst = dst;
-+
-+            sval8temp = neon8mul(sval8,alpha);
-+            temp = pack0565(neon8qadd(sval8temp,neon8mul(unpack0565(dval),vmvn_u8(sval8temp.val[3]))));
-+
-+            mask += (w & 7);
-+            dst += (w & 7);
-+            w -= (w & 7);
-+
-+            while (w)
-+            {
-+                dval = vld1q_u16((void*)dst);
-+	        alpha = vld1_u8((void*)mask);
-+
-+                vst1q_u16((void*)keep_dst,temp);
-+                keep_dst = dst;
-+
-+                sval8temp = neon8mul(sval8,alpha);
-+                temp = pack0565(neon8qadd(sval8temp,neon8mul(unpack0565(dval),vmvn_u8(sval8temp.val[3]))));
-+
-+                mask+=8;
-+                dst+=8;
-+                w-=8;
-+            }
-+            vst1q_u16((void*)keep_dst,temp);
-+#else
-+        asm volatile (
-+                        "vdup.32      d0, %[src]\n\t"
-+                        "vdup.8       d1, d0[1]\n\t"
-+                        "vdup.8       d2, d0[2]\n\t"
-+                        "vdup.8       d3, d0[3]\n\t"
-+                        "vdup.8       d0, d0[0]\n\t"
-+
-+                        "vld1.8       {q12}, [%[dst]]\n\t"
-+                        "vld1.8       {d31}, [%[mask]]\n\t"
-+                        "mov  %[keep_dst], %[dst]\n\t"
-+
-+                        "and  ip, %[w], #7\n\t"
-+                        "add  %[mask], %[mask], ip\n\t"
-+                        "add  %[dst], %[dst], ip, LSL#1\n\t"
-+                        "subs  %[w], %[w], ip\n\t"
-+                        "b  9f\n\t"
-+// LOOP
-+                        "2:\n\t"
-+
-+                        "vld1.16      {q12}, [%[dst]]!\n\t"
-+                        "vld1.8       {d31}, [%[mask]]!\n\t"
-+                        "vst1.16      {q10}, [%[keep_dst]]\n\t"
-+                        "sub  %[keep_dst], %[dst], #8*2\n\t"
-+                        "subs  %[w], %[w], #8\n\t"
-+                        "9:\n\t"
-+// expand 0565 q12 to 8888 {d4-d7}
-+                        "vmovn.u16    d4, q12\t\n"
-+                        "vshr.u16     q11, q12, #5\t\n"
-+                        "vshr.u16     q10, q12, #6+5\t\n"
-+                        "vmovn.u16    d5, q11\t\n"
-+                        "vmovn.u16    d6, q10\t\n"
-+                        "vshl.u8      d4, d4, #3\t\n"
-+                        "vshl.u8      d5, d5, #2\t\n"
-+                        "vshl.u8      d6, d6, #3\t\n"
-+                        "vsri.u8      d4, d4, #5\t\n"
-+                        "vsri.u8      d5, d5, #6\t\n"
-+                        "vsri.u8      d6, d6, #5\t\n"
-+
-+                        "vmull.u8     q10, d31, d0\n\t"
-+                        "vmull.u8     q11, d31, d1\n\t"
-+                        "vmull.u8     q12, d31, d2\n\t"
-+                        "vmull.u8     q13, d31, d3\n\t"
-+                        "vrshr.u16    q8, q10, #8\n\t"
-+                        "vrshr.u16    q9, q11, #8\n\t"
-+                        "vraddhn.u16  d20, q10, q8\n\t"
-+                        "vraddhn.u16  d21, q11, q9\n\t"
-+                        "vrshr.u16    q9, q13, #8\n\t"
-+                        "vrshr.u16    q8, q12, #8\n\t"
-+                        "vraddhn.u16  d23, q13, q9\n\t"
-+                        "vraddhn.u16  d22, q12, q8\n\t"
-+
-+// duplicate in 4/2/1 & 8pix vsns
-+                        "vmvn.8       d30, d23\n\t"
-+                        "vmull.u8     q14, d30, d6\n\t"
-+                        "vmull.u8     q13, d30, d5\n\t"
-+                        "vmull.u8     q12, d30, d4\n\t"
-+                        "vrshr.u16    q8, q14, #8\n\t"
-+                        "vrshr.u16    q9, q13, #8\n\t"
-+                        "vraddhn.u16  d6, q14, q8\n\t"
-+                        "vrshr.u16    q8, q12, #8\n\t"
-+                        "vraddhn.u16  d5, q13, q9\n\t"
-+                        "vqadd.u8     d6, d6, d22\n\t"  // moved up
-+                        "vraddhn.u16  d4, q12, q8\n\t"
-+// intentionally don't calculate alpha
-+// result in d4-d6
-+
-+//                      "vqadd.u8     d6, d6, d22\n\t"  ** moved up
-+                        "vqadd.u8     d5, d5, d21\n\t"
-+                        "vqadd.u8     d4, d4, d20\n\t"
-+
-+// pack 8888 {d20-d23} to 0565 q10
-+                        "vshll.u8     q10, d6, #8\n\t"
-+                        "vshll.u8     q3, d5, #8\n\t"
-+                        "vshll.u8     q2, d4, #8\n\t"
-+                        "vsri.u16     q10, q3, #5\t\n"
-+                        "vsri.u16     q10, q2, #11\t\n"
-+
-+                        "bne 2b\n\t"
-+
-+                        "1:\n\t"
-+                        "vst1.16      {q10}, [%[keep_dst]]\n\t"
-+
-+                        : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [keep_dst] "+r" (keep_dst)
-+                        : [src] "r" (src)
-+                        : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
-+                          "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29",
-+                          "d30","d31"
-+                        );
-+#endif
-+        }
-+    }
-+    else
-+    {
-+        while (height--)
-+        {
-+            void *dst4, *dst2;
-+
-+            dst = dstLine;
-+            dstLine += dstStride;
-+            mask = maskLine;
-+            maskLine += maskStride;
-+            w = width;
-+
-+
-+#ifndef USE_NEON_INLINE_ASM
-+            uint8x8_t alpha;
-+            uint16x8_t dval, temp;
-+            uint8x8x4_t sval8temp;
-+
-+            if (w&4)
-+            {
-+                alpha = vreinterpret_u8_u32(vld1_lane_u32((void*)mask,vreinterpret_u32_u8(alpha),1));
-+                dval = vreinterpretq_u16_u64(vld1q_lane_u64((void*)dst,vreinterpretq_u64_u16(dval),1));
-+                dst4=dst;
-+                mask+=4;
-+                dst+=4;
-+            }
-+            if (w&2)
-+            {
-+                alpha = vreinterpret_u8_u16(vld1_lane_u16((void*)mask,vreinterpret_u16_u8(alpha),1));
-+                dval = vreinterpretq_u16_u32(vld1q_lane_u32((void*)dst,vreinterpretq_u32_u16(dval),1));
-+                dst2=dst;
-+                mask+=2;
-+                dst+=2;
-+            }
-+            if (w&1)
-+            {
-+                alpha = vld1_lane_u8((void*)mask,alpha,1);
-+                dval = vld1q_lane_u16((void*)dst,dval,1);
-+            }
-+
-+            sval8temp = neon8mul(sval8,alpha);
-+            temp = pack0565(neon8qadd(sval8temp,neon8mul(unpack0565(dval),vmvn_u8(sval8temp.val[3]))));
-+
-+            if (w&1)
-+                vst1q_lane_u16((void*)dst,temp,1);
-+            if (w&2)
-+                vst1q_lane_u32((void*)dst2,vreinterpretq_u32_u16(temp),1);
-+            if (w&4)
-+                vst1q_lane_u64((void*)dst4,vreinterpretq_u64_u16(temp),1);
-+#else
-+            asm volatile (
-+                        "vdup.32      d0, %[src]\n\t"
-+                        "vdup.8       d1, d0[1]\n\t"
-+                        "vdup.8       d2, d0[2]\n\t"
-+                        "vdup.8       d3, d0[3]\n\t"
-+                        "vdup.8       d0, d0[0]\n\t"
-+
-+                        "tst  %[w], #4\t\n"
-+                        "beq  skip_load4\t\n"
-+
-+                        "vld1.64      {d25}, [%[dst]]\n\t"
-+                        "vld1.32      {d31[1]}, [%[mask]]\n\t"
-+                        "mov  %[dst4], %[dst]\t\n"
-+                        "add  %[mask], %[mask], #4\t\n"
-+                        "add  %[dst], %[dst], #4*2\t\n"
-+
-+                        "skip_load4:\t\n"
-+                        "tst  %[w], #2\t\n"
-+                        "beq  skip_load2\t\n"
-+                        "vld1.32      {d24[1]}, [%[dst]]\n\t"
-+                        "vld1.16      {d31[1]}, [%[mask]]\n\t"
-+                        "mov  %[dst2], %[dst]\t\n"
-+                        "add  %[mask], %[mask], #2\t\n"
-+                        "add  %[dst], %[dst], #2*2\t\n"
-+
-+                        "skip_load2:\t\n"
-+                        "tst  %[w], #1\t\n"
-+                        "beq  skip_load1\t\n"
-+                        "vld1.16      {d24[1]}, [%[dst]]\n\t"
-+                        "vld1.8       {d31[1]}, [%[mask]]\n\t"
-+
-+                        "skip_load1:\t\n"
-+// expand 0565 q12 to 8888 {d4-d7}
-+                        "vmovn.u16    d4, q12\t\n"
-+                        "vshr.u16     q11, q12, #5\t\n"
-+                        "vshr.u16     q10, q12, #6+5\t\n"
-+                        "vmovn.u16    d5, q11\t\n"
-+                        "vmovn.u16    d6, q10\t\n"
-+                        "vshl.u8      d4, d4, #3\t\n"
-+                        "vshl.u8      d5, d5, #2\t\n"
-+                        "vshl.u8      d6, d6, #3\t\n"
-+                        "vsri.u8      d4, d4, #5\t\n"
-+                        "vsri.u8      d5, d5, #6\t\n"
-+                        "vsri.u8      d6, d6, #5\t\n"
-+
-+                        "vmull.u8     q10, d31, d0\n\t"
-+                        "vmull.u8     q11, d31, d1\n\t"
-+                        "vmull.u8     q12, d31, d2\n\t"
-+                        "vmull.u8     q13, d31, d3\n\t"
-+                        "vrshr.u16    q8, q10, #8\n\t"
-+                        "vrshr.u16    q9, q11, #8\n\t"
-+                        "vraddhn.u16  d20, q10, q8\n\t"
-+                        "vraddhn.u16  d21, q11, q9\n\t"
-+                        "vrshr.u16    q9, q13, #8\n\t"
-+                        "vrshr.u16    q8, q12, #8\n\t"
-+                        "vraddhn.u16  d23, q13, q9\n\t"
-+                        "vraddhn.u16  d22, q12, q8\n\t"
-+
-+// duplicate in 4/2/1 & 8pix vsns
-+                        "vmvn.8       d30, d23\n\t"
-+                        "vmull.u8     q14, d30, d6\n\t"
-+                        "vmull.u8     q13, d30, d5\n\t"
-+                        "vmull.u8     q12, d30, d4\n\t"
-+                        "vrshr.u16    q8, q14, #8\n\t"
-+                        "vrshr.u16    q9, q13, #8\n\t"
-+                        "vraddhn.u16  d6, q14, q8\n\t"
-+                        "vrshr.u16    q8, q12, #8\n\t"
-+                        "vraddhn.u16  d5, q13, q9\n\t"
-+                        "vqadd.u8     d6, d6, d22\n\t"  // moved up
-+                        "vraddhn.u16  d4, q12, q8\n\t"
-+// intentionally don't calculate alpha
-+// result in d4-d6
-+
-+//                      "vqadd.u8     d6, d6, d22\n\t"  ** moved up
-+                        "vqadd.u8     d5, d5, d21\n\t"
-+                        "vqadd.u8     d4, d4, d20\n\t"
-+
-+// pack 8888 {d20-d23} to 0565 q10
-+                        "vshll.u8     q10, d6, #8\n\t"
-+                        "vshll.u8     q3, d5, #8\n\t"
-+                        "vshll.u8     q2, d4, #8\n\t"
-+                        "vsri.u16     q10, q3, #5\t\n"
-+                        "vsri.u16     q10, q2, #11\t\n"
-+
-+                        "tst  %[w], #1\n\t"
-+                        "beq skip_store1\t\n"
-+                        "vst1.16      {d20[1]}, [%[dst]]\t\n"
-+                        "skip_store1:\t\n"
-+                        "tst  %[w], #2\n\t"
-+                        "beq  skip_store2\t\n"
-+                        "vst1.32      {d20[1]}, [%[dst2]]\t\n"
-+                        "skip_store2:\t\n"
-+                        "tst  %[w], #4\n\t"
-+                        "beq skip_store4\t\n"
-+                        "vst1.16      {d21}, [%[dst4]]\t\n"
-+                        "skip_store4:\t\n"
-+
-+                        : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [dst4] "+r" (dst4), [dst2] "+r" (dst2)
-+                        : [src] "r" (src)
-+                        : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
-+                          "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29",
-+                          "d30","d31"
-+                        );
-+#endif
-+        }
-+    }
-+}
-+
-+
-+void
-+fbCompositeSolidMask_nx8x8888neon (pixman_op_t      op,
-+			       pixman_image_t * pSrc,
-+			       pixman_image_t * pMask,
-+			       pixman_image_t * pDst,
-+			       int16_t      xSrc,
-+			       int16_t      ySrc,
-+			       int16_t      xMask,
-+			       int16_t      yMask,
-+			       int16_t      xDst,
-+			       int16_t      yDst,
-+			       uint16_t     width,
-+			       uint16_t     height)
-+{
-+    uint32_t	 src, srca;
-+    uint32_t	*dstLine, *dst;
-+    uint8_t	*maskLine, *mask;
-+    int		 dstStride, maskStride;
-+    uint32_t	 w;
-+    uint8x8_t    sval2;
-+    uint8x8x4_t  sval8;
-+    uint8x8_t    mask_selector=vreinterpret_u8_u64(vcreate_u64(0x0101010100000000ULL));
-+    uint8x8_t    alpha_selector=vreinterpret_u8_u64(vcreate_u64(0x0707070703030303ULL));
-+
-+    fbComposeGetSolid(pSrc, src, pDst->bits.format);
-+
-+    srca = src >> 24;
-+    if (src == 0)
-+	return;
-+
-+    sval2=vreinterpret_u8_u32(vdup_n_u32(src));
-+    sval8.val[0]=vdup_lane_u8(sval2,0);
-+    sval8.val[1]=vdup_lane_u8(sval2,1);
-+    sval8.val[2]=vdup_lane_u8(sval2,2);
-+    sval8.val[3]=vdup_lane_u8(sval2,3);
-+
-+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+    fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
-+
-+    if (width>=8)
-+    {
-+        // Use overlapping 8-pixel method, modified to avoid rewritten dest being reused
-+        while (height--)
-+        {
-+            uint32_t *keep_dst;
-+
-+            dst = dstLine;
-+            dstLine += dstStride;
-+            mask = maskLine;
-+            maskLine += maskStride;
-+            w = width;
-+
-+#ifndef USE_NEON_INLINE_ASM
-+            uint8x8_t alpha;
-+            uint8x8x4_t dval, temp;
-+
-+            alpha = vld1_u8((void*)mask);
-+            dval = vld4_u8((void*)dst);
-+            keep_dst = dst;
-+
-+            temp = neon8mul(sval8,alpha);
-+            dval = neon8mul(dval,vmvn_u8(temp.val[3]));
-+            temp = neon8qadd(temp,dval);
-+
-+            mask += (w & 7);
-+            dst += (w & 7);
-+            w -= (w & 7);
-+
-+            while (w)
-+            {
-+                alpha = vld1_u8((void*)mask);
-+                dval = vld4_u8((void*)dst);
-+
-+                vst4_u8((void*)keep_dst,temp);
-+                keep_dst = dst;
-+
-+                temp = neon8mul(sval8,alpha);
-+                dval = neon8mul(dval,vmvn_u8(temp.val[3]));
-+                temp = neon8qadd(temp,dval);
-+
-+                mask+=8;
-+                dst+=8;
-+                w-=8;
-+            }
-+            vst4_u8((void*)keep_dst,temp);
-+#else
-+        asm volatile (
-+                        "vdup.32      d0, %[src]\n\t"
-+                        "vdup.8       d1, d0[1]\n\t"
-+                        "vdup.8       d2, d0[2]\n\t"
-+                        "vdup.8       d3, d0[3]\n\t"
-+                        "vdup.8       d0, d0[0]\n\t"
-+
-+                        "vld4.8       {d4-d7}, [%[dst]]\n\t"
-+                        "vld1.8       {d31}, [%[mask]]\n\t"
-+                        "mov  %[keep_dst], %[dst]\n\t"
-+
-+                        "and  ip, %[w], #7\n\t"
-+                        "add  %[mask], %[mask], ip\n\t"
-+                        "add  %[dst], %[dst], ip, LSL#2\n\t"
-+                        "subs  %[w], %[w], ip\n\t"
-+                        "b 9f\n\t"
-+// LOOP
-+                        "2:\n\t" 
-+                        "vld4.8       {d4-d7}, [%[dst]]!\n\t"
-+                        "vld1.8       {d31}, [%[mask]]!\n\t"
-+                        "vst4.8       {d20-d23}, [%[keep_dst]]\n\t"
-+                        "sub  %[keep_dst], %[dst], #8*4\n\t"
-+                        "subs  %[w], %[w], #8\n\t"
-+                        "9:\n\t"
-+
-+                        "vmull.u8     q10, d31, d0\n\t"
-+                        "vmull.u8     q11, d31, d1\n\t"
-+                        "vmull.u8     q12, d31, d2\n\t"
-+                        "vmull.u8     q13, d31, d3\n\t"
-+                        "vrshr.u16    q8, q10, #8\n\t"
-+                        "vrshr.u16    q9, q11, #8\n\t"
-+                        "vraddhn.u16  d20, q10, q8\n\t"
-+                        "vraddhn.u16  d21, q11, q9\n\t"
-+                        "vrshr.u16    q9, q13, #8\n\t"
-+                        "vrshr.u16    q8, q12, #8\n\t"
-+                        "vraddhn.u16  d23, q13, q9\n\t"
-+                        "vraddhn.u16  d22, q12, q8\n\t"
-+
-+                        "vmvn.8       d30, d23\n\t"
-+                        "vmull.u8     q12, d30, d4\n\t"
-+                        "vmull.u8     q13, d30, d5\n\t"
-+                        "vmull.u8     q14, d30, d6\n\t"
-+                        "vmull.u8     q15, d30, d7\n\t"
-+
-+                        "vrshr.u16    q8, q12, #8\n\t"
-+                        "vrshr.u16    q9, q13, #8\n\t"
-+                        "vraddhn.u16  d4, q12, q8\n\t"
-+                        "vrshr.u16    q8, q14, #8\n\t"
-+                        "vraddhn.u16  d5, q13, q9\n\t"
-+                        "vrshr.u16    q9, q15, #8\n\t"
-+                        "vraddhn.u16  d6, q14, q8\n\t"
-+                        "vraddhn.u16  d7, q15, q9\n\t"
-+// result in d4-d7
-+
-+                        "vqadd.u8     d20, d4, d20\n\t"
-+                        "vqadd.u8     d21, d5, d21\n\t"
-+                        "vqadd.u8     d22, d6, d22\n\t"
-+                        "vqadd.u8     d23, d7, d23\n\t"
-+
-+                        "bne 2b\n\t"
-+
-+                        "1:\n\t"
-+                        "vst4.8       {d20-d23}, [%[keep_dst]]\n\t"
-+
-+                        : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [keep_dst] "+r" (keep_dst)
-+                        : [src] "r" (src) 
-+                        : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
-+                          "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29",
-+                          "d30","d31"
-+                        );
-+#endif
-+        }
-+    }
-+    else
-+    {
-+        while (height--)
-+        {
-+            uint8x8_t alpha;
-+
-+            dst = dstLine;
-+            dstLine += dstStride;
-+            mask = maskLine;
-+            maskLine += maskStride;
-+            w = width;
-+
-+            while (w>=2)
-+            {
-+                uint8x8_t dval, temp, res;
-+
-+                alpha = vtbl1_u8(vreinterpret_u8_u16(vld1_dup_u16((void*)mask)), mask_selector);
-+                dval = vld1_u8((void*)dst);
-+
-+                temp = neon2mul(sval2,alpha);
-+                res = vqadd_u8(temp,neon2mul(dval,vtbl1_u8(vmvn_u8(temp), alpha_selector)));
-+
-+                vst1_u8((void*)dst,res);
-+
-+                mask+=2;
-+                dst+=2;
-+                w-=2;
-+            }
-+            if (w)
-+            {
-+                uint8x8_t dval, temp, res;
-+
-+                alpha = vtbl1_u8(vld1_dup_u8((void*)mask), mask_selector);
-+                dval = vreinterpret_u8_u32(vld1_dup_u32((void*)dst));
-+
-+                temp = neon2mul(sval2,alpha);
-+                res = vqadd_u8(temp,neon2mul(dval,vtbl1_u8(vmvn_u8(temp), alpha_selector)));
-+
-+                vst1_lane_u32((void*)dst,vreinterpret_u32_u8(res),0);
-+            }
-+        }
-+    }
-+}
-+
-+
-+void
-+fbCompositeSrcAdd_8888x8x8neon (pixman_op_t op,
-+                            pixman_image_t * pSrc,
-+                            pixman_image_t * pMask,
-+                            pixman_image_t * pDst,
-+                            int16_t      xSrc,
-+                            int16_t      ySrc,
-+                            int16_t      xMask,
-+                            int16_t      yMask,
-+                            int16_t      xDst,
-+                            int16_t      yDst,
-+                            uint16_t     width,
-+                            uint16_t     height)
-+{
-+    uint8_t     *dstLine, *dst;
-+    uint8_t     *maskLine, *mask;
-+    int dstStride, maskStride;
-+    uint32_t    w;
-+    uint32_t    src;
-+    uint8x8_t   sa;
-+
-+    fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
-+    fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
-+    fbComposeGetSolid (pSrc, src, pDst->bits.format);
-+    sa = vdup_n_u8((src) >> 24);
-+
-+    if (width>=8)
-+    {
-+        // Use overlapping 8-pixel method, modified to avoid rewritten dest being reused
-+        while (height--)
-+        {
-+            dst = dstLine;
-+            dstLine += dstStride;
-+            mask = maskLine;
-+            maskLine += maskStride;
-+            w = width;
-+
-+            uint8x8_t mval, dval, res;
-+            uint8_t     *keep_dst;
-+
-+            mval = vld1_u8((void *)mask);
-+            dval = vld1_u8((void *)dst);
-+            keep_dst = dst;
-+
-+            res = vqadd_u8(neon2mul(mval,sa),dval);
-+
-+            mask += (w & 7);
-+            dst += (w & 7);
-+            w -= w & 7;
-+
-+            while (w)
-+            {
-+                mval = vld1_u8((void *)mask);
-+                dval = vld1_u8((void *)dst);
-+                vst1_u8((void *)keep_dst, res);
-+                keep_dst = dst;
-+
-+                res = vqadd_u8(neon2mul(mval,sa),dval);
-+
-+                mask += 8;
-+                dst += 8;
-+                w -= 8;
-+            }
-+            vst1_u8((void *)keep_dst, res);
-+        }
-+    }
-+    else
-+    {
-+        // Use 4/2/1 load/store method to handle 1-7 pixels
-+        while (height--)
-+        {
-+            dst = dstLine;
-+            dstLine += dstStride;
-+            mask = maskLine;
-+            maskLine += maskStride;
-+            w = width;
-+
-+            uint8x8_t mval, dval, res;
-+            uint8_t *dst4, *dst2;
-+
-+            if (w&4)
-+            {
-+                mval = vreinterpret_u8_u32(vld1_lane_u32((void *)mask, vreinterpret_u32_u8(mval), 1));
-+                dval = vreinterpret_u8_u32(vld1_lane_u32((void *)dst, vreinterpret_u32_u8(dval), 1));
-+
-+                dst4 = dst;
-+                mask += 4;
-+                dst += 4;
-+            }
-+            if (w&2)
-+            {
-+                mval = vreinterpret_u8_u16(vld1_lane_u16((void *)mask, vreinterpret_u16_u8(mval), 1));
-+                dval = vreinterpret_u8_u16(vld1_lane_u16((void *)dst, vreinterpret_u16_u8(dval), 1));
-+                dst2 = dst;
-+                mask += 2;
-+                dst += 2;
-+            }
-+            if (w&1)
-+            {
-+                mval = vld1_lane_u8((void *)mask, mval, 1);
-+                dval = vld1_lane_u8((void *)dst, dval, 1);
-+            }
-+
-+            res = vqadd_u8(neon2mul(mval,sa),dval);
-+
-+            if (w&1)
-+                vst1_lane_u8((void *)dst, res, 1);
-+            if (w&2)
-+                vst1_lane_u16((void *)dst2, vreinterpret_u16_u8(res), 1);
-+            if (w&4)
-+                vst1_lane_u32((void *)dst4, vreinterpret_u32_u8(res), 1);
-+        }
-+    }
-+}
-+
-diff --git a/pixman/pixman-arm-neon.h b/pixman/pixman-arm-neon.h
-new file mode 100644
-index 0000000..bab4dee
---- /dev/null
-+++ b/pixman/pixman-arm-neon.h
-@@ -0,0 +1,137 @@
-+/*
-+ * Copyright © 2009 Mozilla Corporation
-+ *
-+ * Permission to use, copy, modify, distribute, and sell this software and its
-+ * documentation for any purpose is hereby granted without fee, provided that
-+ * the above copyright notice appear in all copies and that both that
-+ * copyright notice and this permission notice appear in supporting
-+ * documentation, and that the name of Mozilla Corporation not be used in
-+ * advertising or publicity pertaining to distribution of the software without
-+ * specific, written prior permission.  Mozilla Corporation makes no
-+ * representations about the suitability of this software for any purpose.  It
-+ * is provided "as is" without express or implied warranty.
-+ *
-+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
-+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
-+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
-+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
-+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
-+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
-+ * SOFTWARE.
-+ *
-+ * Author:  Ian Rickards (ian.rickards@arm.com)
-+ *
-+ */
-+
-+#include "pixman-private.h"
-+
-+#ifdef USE_ARM_NEON
-+
-+static inline pixman_bool_t pixman_have_arm_neon(void) { return TRUE; }
-+
-+#else
-+#define pixman_have_arm_neon() FALSE
-+#endif
-+
-+#ifdef USE_ARM_NEON
-+
-+void
-+fbCompositeSrcAdd_8000x8000neon (pixman_op_t op,
-+                        pixman_image_t * pSrc,
-+                        pixman_image_t * pMask,
-+                        pixman_image_t * pDst,
-+                        int16_t      xSrc,
-+                        int16_t      ySrc,
-+                        int16_t      xMask,
-+                        int16_t      yMask,
-+                        int16_t      xDst,
-+                        int16_t      yDst,
-+                        uint16_t     width,
-+                        uint16_t     height);
-+
-+void
-+fbCompositeSrc_8888x8888neon (pixman_op_t op,
-+			pixman_image_t * pSrc,
-+			pixman_image_t * pMask,
-+			pixman_image_t * pDst,
-+			int16_t      xSrc,
-+			int16_t      ySrc,
-+			int16_t      xMask,
-+			int16_t      yMask,
-+			int16_t      xDst,
-+			int16_t      yDst,
-+			uint16_t     width,
-+			uint16_t     height);
-+
-+void
-+fbCompositeSrc_8888x8x8888neon (pixman_op_t op,
-+			pixman_image_t * pSrc,
-+			pixman_image_t * pMask,
-+			pixman_image_t * pDst,
-+			int16_t      xSrc,
-+			int16_t      ySrc,
-+			int16_t      xMask,
-+			int16_t      yMask,
-+			int16_t      xDst,
-+			int16_t      yDst,
-+			uint16_t     width,
-+			uint16_t     height);
-+
-+void
-+fbCompositeSolidMask_nx8x0565neon (pixman_op_t op,
-+                        pixman_image_t * pSrc,
-+                        pixman_image_t * pMask,
-+                        pixman_image_t * pDst,
-+                        int16_t      xSrc,
-+                        int16_t      ySrc,
-+                        int16_t      xMask,
-+                        int16_t      yMask,
-+                        int16_t      xDst,
-+                        int16_t      yDst,
-+                        uint16_t     width,
-+                        uint16_t     height);
-+
-+void
-+fbCompositeSolidMask_nx8x8888neon (pixman_op_t op,
-+			pixman_image_t * pSrc,
-+			pixman_image_t * pMask,
-+			pixman_image_t * pDst,
-+			int16_t      xSrc,
-+			int16_t      ySrc,
-+			int16_t      xMask,
-+			int16_t      yMask,
-+			int16_t      xDst,
-+			int16_t      yDst,
-+		 	uint16_t     width,
-+			uint16_t     height);
-+
-+void
-+fbCompositeSrc_x888x0565neon (pixman_op_t op,
-+                        pixman_image_t * pSrc,
-+                        pixman_image_t * pMask,
-+                        pixman_image_t * pDst,
-+                        int16_t      xSrc,
-+                        int16_t      ySrc,
-+                        int16_t      xMask,
-+                        int16_t      yMask,
-+                        int16_t      xDst,
-+                        int16_t      yDst,
-+                        uint16_t     width,
-+                        uint16_t     height);
-+
-+void
-+fbCompositeSrcAdd_8888x8x8neon (pixman_op_t op,
-+                        pixman_image_t * pSrc,
-+                        pixman_image_t * pMask,
-+                        pixman_image_t * pDst,
-+                        int16_t      xSrc,
-+                        int16_t      ySrc,
-+                        int16_t      xMask,
-+                        int16_t      yMask,
-+                        int16_t      xDst,
-+                        int16_t      yDst,
-+                        uint16_t     width,
-+                        uint16_t     height);
-+
-+#endif /* USE_ARM_NEON */
-diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
-index 1388517..b13947a 100644
---- a/pixman/pixman-pict.c
-+++ b/pixman/pixman-pict.c
-@@ -34,6 +34,7 @@
- #include "pixman-mmx.h"
- #include "pixman-vmx.h"
- #include "pixman-sse2.h"
-+#include "pixman-arm-neon.h"
- #include "pixman-arm-simd.h"
- #include "pixman-combine32.h"
- 
-@@ -1518,6 +1519,31 @@ static const FastPathInfo vmx_fast_paths[] =
- };
- #endif
- 
-+#ifdef USE_ARM_NEON
-+static const FastPathInfo arm_neon_fast_paths[] =
-+{
-+    { PIXMAN_OP_ADD,  PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8,       fbCompositeSrcAdd_8888x8x8neon,        0 },
-+    { PIXMAN_OP_ADD,  PIXMAN_a8,       PIXMAN_null,     PIXMAN_a8,       fbCompositeSrcAdd_8000x8000neon,       0 },
-+    { PIXMAN_OP_SRC,  PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSrc_x888x0565neon,          0 },
-+    { PIXMAN_OP_SRC,  PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSrc_x888x0565neon,          0 },
-+    { PIXMAN_OP_SRC,  PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeSrc_x888x0565neon,          0 },
-+    { PIXMAN_OP_SRC,  PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeSrc_x888x0565neon,          0 },
-+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888neon,          0 },
-+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888neon,          0 },
-+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888neon,          0 },
-+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888neon,          0 },
-+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888neon,        NEED_SOLID_MASK },
-+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888neon,        NEED_SOLID_MASK },
-+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   fbCompositeSolidMask_nx8x0565neon,     0 },
-+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b5g6r5,   fbCompositeSolidMask_nx8x0565neon,     0 },
-+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888neon,     0 },
-+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888neon,     0 },
-+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888neon,     0 },
-+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888neon,     0 },
-+    { PIXMAN_OP_NONE },
-+};
-+#endif
-+
- #ifdef USE_ARM_SIMD
- static const FastPathInfo arm_simd_fast_paths[] =
- {
-@@ -1893,6 +1919,11 @@ pixman_image_composite (pixman_op_t      op,
- 	    info = get_fast_path (vmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
- #endif
- 
-+#ifdef USE_ARM_NEON
-+        if (!info && pixman_have_arm_neon())
-+            info = get_fast_path (arm_neon_fast_paths, op, pSrc, pMask, pDst, pixbuf);
-+#endif
-+
- #ifdef USE_ARM_SIMD
- 	if (!info && pixman_have_arm_simd())
- 	    info = get_fast_path (arm_simd_fast_paths, op, pSrc, pMask, pDst, pixbuf);
diff --git a/recipes/xorg-lib/pixman/pixman-28986.patch b/recipes/xorg-lib/pixman/pixman-28986.patch
deleted file mode 100644
index f5ba4c302e..0000000000
--- a/recipes/xorg-lib/pixman/pixman-28986.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-From 7b7860d61fb1526acdf010dd8fd644bbf1396b9e Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Fri, 28 Aug 2009 22:34:21 +0300
-Subject: [PATCH] ARM: workaround for gcc bug in vshll_n_u8 intrinsic
-
-Some versions of gcc (cs2009q1, 4.4.1) incorrectly reject
-shift operand having value >= 8, claiming that it is out of
-range. So inline assembly is used as a workaround.
----
- pixman/pixman-arm-neon.c |    6 ++++++
- 1 files changed, 6 insertions(+), 0 deletions(-)
-
-diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index 4125d1b..3e7f566 100644
---- a/pixman/pixman-arm-neon.c
-+++ b/pixman/pixman-arm-neon.c
-@@ -64,6 +64,12 @@ unpack0565 (uint16x8_t rgb)
-     return res;
- }
- 
-+#ifdef USE_GCC_INLINE_ASM
-+/* Some versions of gcc have problems with vshll_n_u8 intrinsic (Bug 23576) */
-+#define vshll_n_u8(a, n) ({ uint16x8_t r; \
-+    asm ("vshll.u8 %q0, %P1, %2\n" : "=w" (r) : "w" (a), "i" (n)); r; })
-+#endif
-+
- static force_inline uint16x8_t
- pack0565 (uint8x8x4_t s)
- {
--- 
-1.5.4.3
-
diff --git a/recipes/xorg-lib/pixman/pixman-arm.patch b/recipes/xorg-lib/pixman/pixman-arm.patch
deleted file mode 100644
index 91dda03b7c..0000000000
--- a/recipes/xorg-lib/pixman/pixman-arm.patch
+++ /dev/null
@@ -1,632 +0,0 @@
-From: Jeff Muizelaar <jmuizelaar@mozilla.com>
-Date: Wed, 17 Sep 2008 19:53:20 +0000 (-0400)
-Subject: Add support for ARMv6 SIMD fastpaths.
-X-Git-Url: http://gitweb.freedesktop.org/?p=pixman.git;a=commitdiff;h=d0b181f347ef4720d130beee3f03196afbd28aba
-
-Add support for ARMv6 SIMD fastpaths.
----
-
---- a/configure.ac
-+++ b/configure.ac
-@@ -277,6 +277,44 @@ AC_SUBST(VMX_CFLAGS)
- 
- AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
- 
-+dnl Check for ARM
-+
-+have_armv6_simd=no
-+AC_MSG_CHECKING(whether to use ARM assembler)
-+xserver_save_CFLAGS=$CFLAGS
-+CFLAGS="$CFLAGS $ARM_CFLAGS"
-+AC_COMPILE_IFELSE([
-+int main () {
-+    asm("uqadd8 r1, r1, r2");
-+    return 0;
-+}], have_armv6_simd=yes)
-+CFLAGS=$xserver_save_CFLAGS
-+
-+AC_ARG_ENABLE(arm,
-+   [AC_HELP_STRING([--disable-arm],
-+                   [disable ARM fast paths])],
-+   [enable_arm=$enableval], [enable_arm=auto])
-+
-+if test $enable_arm = no ; then
-+   have_armv6_simd=disabled
-+fi
-+
-+if test $have_armv6_simd = yes ; then
-+   AC_DEFINE(USE_ARM, 1, [use ARM compiler intrinsics])
-+else
-+   ARM_CFLAGS=
-+fi
-+
-+AC_MSG_RESULT($have_armv6_simd)
-+if test $enable_arm = yes && test $have_armv6_simd = no ; then
-+   AC_MSG_ERROR([ARM intrinsics not detected])
-+fi
-+
-+AC_SUBST(ARM_CFLAGS)
-+
-+AM_CONDITIONAL(USE_ARM, test $have_armv6_simd = yes)
-+
-+
- AC_ARG_ENABLE(gtk,
-    [AC_HELP_STRING([--enable-gtk],
-                    [enable tests using GTK+ [default=auto]])],
---- a/pixman/Makefile.am
-+++ b/pixman/Makefile.am
-@@ -79,3 +79,15 @@ libpixman_sse2_la_LIBADD = $(DEP_LIBS)
- libpixman_1_la_LIBADD += libpixman-sse2.la
- endif
- 
-+# arm code
-+if USE_ARM
-+noinst_LTLIBRARIES += libpixman-arm.la
-+libpixman_arm_la_SOURCES = \
-+	pixman-arm.c \
-+	pixman-arm.h
-+libpixman_arm_la_CFLAGS = $(DEP_CFLAGS) $(ARM_CFLAGS)
-+libpixman_arm_la_LIBADD = $(DEP_LIBS)
-+libpixman_1_la_LIBADD += libpixman-arm.la
-+endif
-+
-+
---- /dev/null
-+++ b/pixman/pixman-arm.c
-@@ -0,0 +1,409 @@
-+/*
-+ * Copyright Â© 2008 Mozilla Corporation
-+ *
-+ * Permission to use, copy, modify, distribute, and sell this software and its
-+ * documentation for any purpose is hereby granted without fee, provided that
-+ * the above copyright notice appear in all copies and that both that
-+ * copyright notice and this permission notice appear in supporting
-+ * documentation, and that the name of Mozilla Corporation not be used in
-+ * advertising or publicity pertaining to distribution of the software without
-+ * specific, written prior permission.  Mozilla Corporation makes no
-+ * representations about the suitability of this software for any purpose.  It
-+ * is provided "as is" without express or implied warranty.
-+ *
-+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
-+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
-+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
-+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
-+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
-+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
-+ * SOFTWARE.
-+ *
-+ * Author:  Jeff Muizelaar (jeff@infidigm.net)
-+ *
-+ */
-+#ifdef HAVE_CONFIG_H
-+#include <config.h>
-+#endif
-+
-+#include "pixman-arm.h"
-+
-+void
-+fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
-+				pixman_image_t * pSrc,
-+				pixman_image_t * pMask,
-+				pixman_image_t * pDst,
-+				int16_t      xSrc,
-+				int16_t      ySrc,
-+				int16_t      xMask,
-+				int16_t      yMask,
-+				int16_t      xDst,
-+				int16_t      yDst,
-+				uint16_t     width,
-+				uint16_t     height)
-+{
-+    uint8_t	*dstLine, *dst;
-+    uint8_t	*srcLine, *src;
-+    int	dstStride, srcStride;
-+    uint16_t	w;
-+    uint8_t	s, d;
-+
-+    fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
-+    fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
-+
-+    while (height--)
-+    {
-+	dst = dstLine;
-+	dstLine += dstStride;
-+	src = srcLine;
-+	srcLine += srcStride;
-+	w = width;
-+
-+	while (w && (unsigned long)dst & 3)
-+	{
-+	    s = *src;
-+	    d = *dst;
-+	    asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
-+	    *dst = d;
-+
-+	    dst++;
-+	    src++;
-+	    w--;
-+	}
-+
-+	while (w >= 4)
-+	{
-+	    asm("uqadd8 %0, %1, %2" : "=r"(*(uint32_t*)dst) : "r"(*(uint32_t*)src), "r"(*(uint32_t*)dst));
-+	    dst += 4;
-+	    src += 4;
-+	    w -= 4;
-+	}
-+
-+	while (w)
-+	{
-+	    s = *src;
-+	    d = *dst;
-+	    asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
-+	    *dst = d;
-+
-+	    dst++;
-+	    src++;
-+	    w--;
-+	}
-+    }
-+
-+}
-+
-+void
-+fbCompositeSrc_8888x8888arm (pixman_op_t op,
-+			 pixman_image_t * pSrc,
-+			 pixman_image_t * pMask,
-+			 pixman_image_t * pDst,
-+			 int16_t      xSrc,
-+			 int16_t      ySrc,
-+			 int16_t      xMask,
-+			 int16_t      yMask,
-+			 int16_t      xDst,
-+			 int16_t      yDst,
-+			 uint16_t     width,
-+			 uint16_t     height)
-+{
-+    uint32_t	*dstLine, *dst;
-+    uint32_t	*srcLine, *src;
-+    int	dstStride, srcStride;
-+    uint16_t	w;
-+    uint32_t component_half = 0x800080;
-+    uint32_t upper_component_mask = 0xff00ff00;
-+    uint32_t alpha_mask = 0xff;
-+
-+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-+
-+    while (height--)
-+    {
-+	dst = dstLine;
-+	dstLine += dstStride;
-+	src = srcLine;
-+	srcLine += srcStride;
-+	w = width;
-+
-+//#define inner_branch
-+	asm volatile (
-+			"cmp %[w], #0\n\t"
-+			"beq 2f\n\t"
-+			"1:\n\t"
-+			/* load dest */
-+			"ldr r5, [%[src]], #4\n\t"
-+#ifdef inner_branch
-+			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
-+			 * The 0x0 case also allows us to avoid doing an unecessary data
-+			 * write which is more valuable so we only check for that */
-+			"cmp r5, #0x1000000\n\t"
-+			"blt 3f\n\t"
-+
-+			/* = 255 - alpha */
-+			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
-+
-+			"ldr r4, [%[dest]] \n\t"
-+
-+#else
-+			"ldr r4, [%[dest]] \n\t"
-+
-+			/* = 255 - alpha */
-+			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
-+#endif
-+			"uxtb16 r6, r4\n\t"
-+			"uxtb16 r7, r4, ror #8\n\t"
-+
-+			/* multiply by 257 and divide by 65536 */
-+			"mla r6, r6, r8, %[component_half]\n\t"
-+			"mla r7, r7, r8, %[component_half]\n\t"
-+
-+			"uxtab16 r6, r6, r6, ror #8\n\t"
-+			"uxtab16 r7, r7, r7, ror #8\n\t"
-+
-+			/* recombine the 0xff00ff00 bytes of r6 and r7 */
-+			"and r7, %[upper_component_mask]\n\t"
-+			"uxtab16 r6, r7, r6, ror #8\n\t"
-+
-+			"uqadd8 r5, r6, r5\n\t"
-+
-+#ifdef inner_branch
-+			"3:\n\t"
-+
-+#endif
-+			"str r5, [%[dest]], #4\n\t"
-+			/* increment counter and jmp to top */
-+			"subs	%[w], %[w], #1\n\t"
-+			"bne	1b\n\t"
-+			"2:\n\t"
-+			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
-+			: [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask),
-+			  [alpha_mask] "r" (alpha_mask)
-+			: "r4", "r5", "r6", "r7", "r8", "cc", "memory"
-+			);
-+    }
-+}
-+
-+void
-+fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
-+			       pixman_image_t * pSrc,
-+			       pixman_image_t * pMask,
-+			       pixman_image_t * pDst,
-+			       int16_t	xSrc,
-+			       int16_t	ySrc,
-+			       int16_t      xMask,
-+			       int16_t      yMask,
-+			       int16_t      xDst,
-+			       int16_t      yDst,
-+			       uint16_t     width,
-+			       uint16_t     height)
-+{
-+    uint32_t	*dstLine, *dst;
-+    uint32_t	*srcLine, *src;
-+    uint32_t	mask;
-+    int	dstStride, srcStride;
-+    uint16_t	w;
-+    uint32_t component_half = 0x800080;
-+    uint32_t alpha_mask = 0xff;
-+
-+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-+
-+    fbComposeGetSolid (pMask, mask, pDst->bits.format);
-+    mask = (mask) >> 24;
-+
-+    while (height--)
-+    {
-+	dst = dstLine;
-+	dstLine += dstStride;
-+	src = srcLine;
-+	srcLine += srcStride;
-+	w = width;
-+
-+//#define inner_branch
-+	asm volatile (
-+			"cmp %[w], #0\n\t"
-+			"beq 2f\n\t"
-+			"1:\n\t"
-+			/* load dest */
-+			"ldr r5, [%[src]], #4\n\t"
-+#ifdef inner_branch
-+			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
-+			 * The 0x0 case also allows us to avoid doing an unecessary data
-+			 * write which is more valuable so we only check for that */
-+			"cmp r5, #0x1000000\n\t"
-+			"blt 3f\n\t"
-+
-+#endif
-+			"ldr r4, [%[dest]] \n\t"
-+
-+			"uxtb16 r6, r5\n\t"
-+			"uxtb16 r7, r5, ror #8\n\t"
-+
-+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
-+			"mla r6, r6, %[mask_alpha], %[component_half]\n\t"
-+			"mla r7, r7, %[mask_alpha], %[component_half]\n\t"
-+
-+			"uxtab16 r6, r6, r6, ror #8\n\t"
-+			"uxtab16 r7, r7, r7, ror #8\n\t"
-+
-+			"uxtb16 r6, r6, ror #8\n\t"
-+			"uxtb16 r7, r7, ror #8\n\t"
-+
-+			/* recombine */
-+			"orr r5, r6, r7, lsl #8\n\t"
-+
-+			"uxtb16 r6, r4\n\t"
-+			"uxtb16 r7, r4, ror #8\n\t"
-+
-+			/* 255 - alpha */
-+			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
-+
-+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
-+			"mla r6, r6, r8, %[component_half]\n\t"
-+			"mla r7, r7, r8, %[component_half]\n\t"
-+
-+			"uxtab16 r6, r6, r6, ror #8\n\t"
-+			"uxtab16 r7, r7, r7, ror #8\n\t"
-+
-+			"uxtb16 r6, r6, ror #8\n\t"
-+			"uxtb16 r7, r7, ror #8\n\t"
-+
-+			/* recombine */
-+			"orr r6, r6, r7, lsl #8\n\t"
-+
-+			"uqadd8 r5, r6, r5\n\t"
-+
-+#ifdef inner_branch
-+			"3:\n\t"
-+
-+#endif
-+			"str r5, [%[dest]], #4\n\t"
-+			/* increment counter and jmp to top */
-+			"subs	%[w], %[w], #1\n\t"
-+			"bne	1b\n\t"
-+			"2:\n\t"
-+			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
-+			: [component_half] "r" (component_half), [mask_alpha] "r" (mask),
-+			  [alpha_mask] "r" (alpha_mask)
-+			: "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory"
-+			);
-+    }
-+}
-+
-+void
-+fbCompositeSolidMask_nx8x8888arm (pixman_op_t      op,
-+			       pixman_image_t * pSrc,
-+			       pixman_image_t * pMask,
-+			       pixman_image_t * pDst,
-+			       int16_t      xSrc,
-+			       int16_t      ySrc,
-+			       int16_t      xMask,
-+			       int16_t      yMask,
-+			       int16_t      xDst,
-+			       int16_t      yDst,
-+			       uint16_t     width,
-+			       uint16_t     height)
-+{
-+    uint32_t	 src, srca;
-+    uint32_t	*dstLine, *dst;
-+    uint8_t	*maskLine, *mask;
-+    int		 dstStride, maskStride;
-+    uint16_t	 w;
-+
-+    fbComposeGetSolid(pSrc, src, pDst->bits.format);
-+
-+    srca = src >> 24;
-+    if (src == 0)
-+	return;
-+
-+    uint32_t component_mask = 0xff00ff;
-+    uint32_t component_half = 0x800080;
-+
-+    uint32_t src_hi = (src >> 8) & component_mask;
-+    uint32_t src_lo = src & component_mask;
-+
-+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
-+    fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
-+
-+    while (height--)
-+    {
-+	dst = dstLine;
-+	dstLine += dstStride;
-+	mask = maskLine;
-+	maskLine += maskStride;
-+	w = width;
-+
-+//#define inner_branch
-+	asm volatile (
-+			"cmp %[w], #0\n\t"
-+			"beq 2f\n\t"
-+			"1:\n\t"
-+			/* load mask */
-+			"ldrb r5, [%[mask]], #1\n\t"
-+#ifdef inner_branch
-+			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
-+			 * The 0x0 case also allows us to avoid doing an unecessary data
-+			 * write which is more valuable so we only check for that */
-+			/* 0x1000000 is the least value that contains alpha all values
-+			 * less than it have a 0 alpha value */
-+			"cmp r5, #0x0\n\t"
-+			"beq 3f\n\t"
-+
-+#endif
-+			"ldr r4, [%[dest]] \n\t"
-+
-+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
-+			"mla r6, %[src_lo], r5, %[component_half]\n\t"
-+			"mla r7, %[src_hi], r5, %[component_half]\n\t"
-+
-+			"uxtab16 r6, r6, r6, ror #8\n\t"
-+			"uxtab16 r7, r7, r7, ror #8\n\t"
-+
-+			"uxtb16 r6, r6, ror #8\n\t"
-+			"uxtb16 r7, r7, ror #8\n\t"
-+
-+			/* recombine */
-+			"orr r5, r6, r7, lsl #8\n\t"
-+
-+			"uxtb16 r6, r4\n\t"
-+			"uxtb16 r7, r4, ror #8\n\t"
-+
-+			/* we could simplify this to use 'sub' if we were
-+			 * willing to give up a register for alpha_mask */
-+			"mvn r8, r5\n\t"
-+			"mov r8, r8, lsr #24\n\t"
-+
-+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
-+			"mla r6, r6, r8, %[component_half]\n\t"
-+			"mla r7, r7, r8, %[component_half]\n\t"
-+
-+			"uxtab16 r6, r6, r6, ror #8\n\t"
-+			"uxtab16 r7, r7, r7, ror #8\n\t"
-+
-+			"uxtb16 r6, r6, ror #8\n\t"
-+			"uxtb16 r7, r7, ror #8\n\t"
-+
-+			/* recombine */
-+			"orr r6, r6, r7, lsl #8\n\t"
-+
-+			"uqadd8 r5, r6, r5\n\t"
-+
-+#ifdef inner_branch
-+			"3:\n\t"
-+
-+#endif
-+			"str r5, [%[dest]], #4\n\t"
-+			/* increment counter and jmp to top */
-+			"subs	%[w], %[w], #1\n\t"
-+			"bne	1b\n\t"
-+			"2:\n\t"
-+			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask)
-+			: [component_half] "r" (component_half),
-+			  [src_hi] "r" (src_hi), [src_lo] "r" (src_lo)
-+			: "r4", "r5", "r6", "r7", "r8", "cc", "memory"
-+			);
-+    }
-+}
---- /dev/null
-+++ b/pixman/pixman-arm.h
-@@ -0,0 +1,94 @@
-+/*
-+ * Copyright Â© 2008 Mozilla Corporation
-+ *
-+ * Permission to use, copy, modify, distribute, and sell this software and its
-+ * documentation for any purpose is hereby granted without fee, provided that
-+ * the above copyright notice appear in all copies and that both that
-+ * copyright notice and this permission notice appear in supporting
-+ * documentation, and that the name of Mozilla Corporation not be used in
-+ * advertising or publicity pertaining to distribution of the software without
-+ * specific, written prior permission.  Mozilla Corporation makes no
-+ * representations about the suitability of this software for any purpose.  It
-+ * is provided "as is" without express or implied warranty.
-+ *
-+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
-+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
-+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
-+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
-+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
-+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
-+ * SOFTWARE.
-+ *
-+ * Author:  Jeff Muizelaar (jeff@infidigm.net)
-+ *
-+ */
-+
-+#include "pixman-private.h"
-+
-+#ifdef USE_ARM
-+
-+static inline pixman_bool_t pixman_have_arm(void) { return TRUE; }
-+
-+#else
-+#define pixman_have_arm() FALSE
-+#endif
-+
-+#ifdef USE_ARM
-+
-+void
-+fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
-+				pixman_image_t * pSrc,
-+				pixman_image_t * pMask,
-+				pixman_image_t * pDst,
-+				int16_t      xSrc,
-+				int16_t      ySrc,
-+				int16_t      xMask,
-+				int16_t      yMask,
-+				int16_t      xDst,
-+				int16_t      yDst,
-+				uint16_t     width,
-+				uint16_t     height);
-+void
-+fbCompositeSrc_8888x8888arm (pixman_op_t op,
-+			 pixman_image_t * pSrc,
-+			 pixman_image_t * pMask,
-+			 pixman_image_t * pDst,
-+			 int16_t      xSrc,
-+			 int16_t      ySrc,
-+			 int16_t      xMask,
-+			 int16_t      yMask,
-+			 int16_t      xDst,
-+			 int16_t      yDst,
-+			 uint16_t     width,
-+			 uint16_t     height);
-+
-+void
-+fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
-+			 pixman_image_t * pSrc,
-+			 pixman_image_t * pMask,
-+			 pixman_image_t * pDst,
-+			 int16_t      xSrc,
-+			 int16_t      ySrc,
-+			 int16_t      xMask,
-+			 int16_t      yMask,
-+			 int16_t      xDst,
-+			 int16_t      yDst,
-+			 uint16_t     width,
-+			 uint16_t     height);
-+void
-+fbCompositeSolidMask_nx8x8888arm (pixman_op_t op,
-+			 pixman_image_t * pSrc,
-+			 pixman_image_t * pMask,
-+			 pixman_image_t * pDst,
-+			 int16_t      xSrc,
-+			 int16_t      ySrc,
-+			 int16_t      xMask,
-+			 int16_t      yMask,
-+			 int16_t      xDst,
-+			 int16_t      yDst,
-+			 uint16_t     width,
-+			 uint16_t     height);
-+
-+
-+#endif /* USE_ARM */
---- a/pixman/pixman-pict.c
-+++ b/pixman/pixman-pict.c
-@@ -34,6 +34,7 @@
- #include "pixman-mmx.h"
- #include "pixman-vmx.h"
- #include "pixman-sse2.h"
-+#include "pixman-arm.h"
- #include "pixman-combine32.h"
- 
- #ifdef __GNUC__
-@@ -1479,6 +1480,26 @@ static const FastPathInfo vmx_fast_paths
- };
- #endif
- 
-+#ifdef USE_ARM
-+static const FastPathInfo arm_fast_paths[] =
-+{
-+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888arm,      0 },
-+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,	PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888arm,	   0 },
-+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,	PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888arm,	   0 },
-+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,	PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888arm,	   0 },
-+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888arm,    NEED_SOLID_MASK },
-+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888arm,	   NEED_SOLID_MASK },
-+
-+    { PIXMAN_OP_ADD, PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       fbCompositeSrcAdd_8000x8000arm,   0 },
-+
-+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888arm,     0 },
-+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888arm,     0 },
-+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888arm,     0 },
-+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888arm,     0 },
-+
-+    { PIXMAN_OP_NONE },
-+};
-+#endif
- 
- static const FastPathInfo c_fast_paths[] =
- {
-@@ -1829,6 +1850,12 @@ pixman_image_composite (pixman_op_t     
- 	if (!info && pixman_have_vmx())
- 	    info = get_fast_path (vmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
- #endif
-+
-+#ifdef USE_ARM
-+	if (!info && pixman_have_arm())
-+	    info = get_fast_path (arm_fast_paths, op, pSrc, pMask, pDst, pixbuf);
-+#endif
-+
-         if (!info)
- 	    info = get_fast_path (c_fast_paths, op, pSrc, pMask, pDst, pixbuf);
- 
diff --git a/recipes/xorg-lib/pixman/pixman-x888-565.patch b/recipes/xorg-lib/pixman/pixman-x888-565.patch
deleted file mode 100644
index a3fa331710..0000000000
--- a/recipes/xorg-lib/pixman/pixman-x888-565.patch
+++ /dev/null
@@ -1,68 +0,0 @@
-From: Vladimir Vukicevic <vladimir@slide.(none)>
-Date: Wed, 17 Sep 2008 20:01:31 +0000 (-0400)
-Subject: Add SRC x888x0565 C fast path
-X-Git-Url: http://gitweb.freedesktop.org/?p=pixman.git;a=commitdiff;h=7180230d4d87c55dfef1e17a0cc3b125d45aa3a0
-
-Add SRC x888x0565 C fast path
----
-
---- a/pixman/pixman-pict.c
-+++ b/pixman/pixman-pict.c
-@@ -759,6 +759,46 @@ fbCompositeSrc_8888x0565 (pixman_op_t op
-     }
- }
- 
-+
-+void
-+fbCompositeSrc_x888x0565 (pixman_op_t op,
-+                          pixman_image_t * pSrc,
-+                          pixman_image_t * pMask,
-+                          pixman_image_t * pDst,
-+                          int16_t      xSrc,
-+                          int16_t      ySrc,
-+                          int16_t      xMask,
-+                          int16_t      yMask,
-+                          int16_t      xDst,
-+                          int16_t      yDst,
-+                          uint16_t     width,
-+                          uint16_t     height)
-+{
-+    uint16_t	*dstLine, *dst;
-+    uint32_t	*srcLine, *src, s;
-+    int	dstStride, srcStride;
-+    uint16_t	w;
-+
-+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
-+    fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
-+
-+    while (height--)
-+    {
-+	dst = dstLine;
-+	dstLine += dstStride;
-+	src = srcLine;
-+	srcLine += srcStride;
-+	w = width;
-+
-+	while (w--)
-+	{
-+	    s = READ(pSrc, src++);
-+	    WRITE(pDst, dst, cvt8888to0565(s));
-+	    dst++;
-+	}
-+    }
-+}
-+
- void
- fbCompositeSrcAdd_8000x8000 (pixman_op_t	op,
- 			     pixman_image_t * pSrc,
-@@ -1568,6 +1608,10 @@ static const FastPathInfo c_fast_paths[]
-     { PIXMAN_OP_SRC, PIXMAN_r5g6b5,    PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSrcSrc_nxn, 0 },
-     { PIXMAN_OP_SRC, PIXMAN_b5g6r5,    PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeSrcSrc_nxn, 0 },
- #endif
-+    { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSrc_x888x0565, 0 },
-+    { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8,  PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSrc_x888x0565, 0 },
-+    { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeSrc_x888x0565, 0 },
-+    { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8,  PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeSrc_x888x0565, 0 },
-     { PIXMAN_OP_IN,  PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       fbCompositeSrcIn_8x8,   0 },
-     { PIXMAN_OP_IN,  PIXMAN_solid,     PIXMAN_a8,	PIXMAN_a8,	 fbCompositeSolidMaskIn_nx8x8, 0 },
-     { PIXMAN_OP_NONE },
diff --git a/recipes/xorg-lib/pixman/prefetch.patch b/recipes/xorg-lib/pixman/prefetch.patch
deleted file mode 100644
index c2e856ec25..0000000000
--- a/recipes/xorg-lib/pixman/prefetch.patch
+++ /dev/null
@@ -1,298 +0,0 @@
-From d0044bfbd596f22ed1560579ea6537b39f3dc1af Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Thu, 29 Oct 2009 19:06:42 +0000
-Subject: ARM: Don't emit prefetch code if prefetch distance is set to 0
-
-Also it is now possible to disable prefetch globally with
-a configuration macro
----
-diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
-index bca499a..35e6a7e 100644
---- a/pixman/pixman-arm-neon-asm.S
-+++ b/pixman/pixman-arm-neon-asm.S
-@@ -219,33 +219,33 @@
-     vshrn.u16   d7, q2, #3
-     vsli.u16    q2, q2, #5
-         vshll.u8    q14, d16, #8
--                                    add PF_X, PF_X, #8
-+                                    PF add PF_X, PF_X, #8
-         vshll.u8    q8, d19, #8
--                                    tst PF_CTL, #0xF
-+                                    PF tst PF_CTL, #0xF
-     vsri.u8     d6, d6, #5
--                                    addne PF_X, PF_X, #8
-+                                    PF addne PF_X, PF_X, #8
-     vmvn.8      d3, d3
--                                    subne PF_CTL, PF_CTL, #1
-+                                    PF subne PF_CTL, PF_CTL, #1
-     vsri.u8     d7, d7, #6
-     vshrn.u16   d30, q2, #2
-     vmull.u8    q10, d3, d6
--                                    pld [PF_SRC, PF_X, lsl #src_bpp_shift]
-+                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-     vmull.u8    q11, d3, d7
-     vmull.u8    q12, d3, d30
--                                    pld [PF_DST, PF_X, lsl #dst_bpp_shift]
-+                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-         vsri.u16    q14, q8, #5
--                                    cmp PF_X, ORIG_W
-+                                    PF cmp PF_X, ORIG_W
-         vshll.u8    q9, d18, #8
-     vrshr.u16   q13, q10, #8
--                                    subge PF_X, PF_X, ORIG_W
-+                                    PF subge PF_X, PF_X, ORIG_W
-     vrshr.u16   q3, q11, #8
-     vrshr.u16   q15, q12, #8
--                                    subges PF_CTL, PF_CTL, #0x10
-+                                    PF subges PF_CTL, PF_CTL, #0x10
-         vsri.u16    q14, q9, #11
--                                    ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-+                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-     vraddhn.u16 d20, q10, q13
-     vraddhn.u16 d23, q11, q3
--                                    ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-+                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-     vraddhn.u16 d22, q12, q15
-         vst1.16     {d28, d29}, [DST_W, :128]!
- .endm
-@@ -323,20 +323,20 @@ generate_composite_function \
- 
- .macro pixman_composite_src_8888_0565_process_pixblock_tail_head
-         vsri.u16    q14, q8, #5
--                                    add PF_X, PF_X, #8
--                                    tst PF_CTL, #0xF
-+                                    PF add PF_X, PF_X, #8
-+                                    PF tst PF_CTL, #0xF
-     vld4.8      {d0, d1, d2, d3}, [SRC]!
--                                    addne PF_X, PF_X, #8
--                                    subne PF_CTL, PF_CTL, #1
-+                                    PF addne PF_X, PF_X, #8
-+                                    PF subne PF_CTL, PF_CTL, #1
-         vsri.u16    q14, q9, #11
--                                    cmp PF_X, ORIG_W
--                                    pld [PF_SRC, PF_X, lsl #src_bpp_shift]
-+                                    PF cmp PF_X, ORIG_W
-+                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-     vshll.u8    q8, d1, #8
-         vst1.16     {d28, d29}, [DST_W, :128]!
--                                    subge PF_X, PF_X, ORIG_W
--                                    subges PF_CTL, PF_CTL, #0x10
-+                                    PF subge PF_X, PF_X, ORIG_W
-+                                    PF subges PF_CTL, PF_CTL, #0x10
-     vshll.u8    q14, d2, #8
--                                    ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-+                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-     vshll.u8    q9, d0, #8
- .endm
- 
-@@ -363,20 +363,20 @@ generate_composite_function \
- 
- .macro pixman_composite_add_8000_8000_process_pixblock_tail_head
-     vld1.8      {d0, d1, d2, d3}, [SRC]!
--                                    add PF_X, PF_X, #32
--                                    tst PF_CTL, #0xF
-+                                    PF add PF_X, PF_X, #32
-+                                    PF tst PF_CTL, #0xF
-     vld1.8      {d4, d5, d6, d7}, [DST_R, :128]!
--                                    addne PF_X, PF_X, #32
--                                    subne PF_CTL, PF_CTL, #1
-+                                    PF addne PF_X, PF_X, #32
-+                                    PF subne PF_CTL, PF_CTL, #1
-         vst1.8      {d28, d29, d30, d31}, [DST_W, :128]!
--                                    cmp PF_X, ORIG_W
--                                    pld [PF_SRC, PF_X, lsl #src_bpp_shift]
--                                    pld [PF_DST, PF_X, lsl #dst_bpp_shift]
--                                    subge PF_X, PF_X, ORIG_W
--                                    subges PF_CTL, PF_CTL, #0x10
-+                                    PF cmp PF_X, ORIG_W
-+                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-+                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-+                                    PF subge PF_X, PF_X, ORIG_W
-+                                    PF subges PF_CTL, PF_CTL, #0x10
-     vqadd.u8    q14, q0, q2
--                                    ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
--                                    ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-+                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-+                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-     vqadd.u8    q15, q1, q3
- .endm
- 
-@@ -418,32 +418,32 @@ generate_composite_function \
- .macro pixman_composite_over_8888_8888_process_pixblock_tail_head
-     vld4.8      {d4, d5, d6, d7}, [DST_R, :128]!
-         vrshr.u16   q14, q8, #8
--                                    add PF_X, PF_X, #8
--                                    tst PF_CTL, #0xF
-+                                    PF add PF_X, PF_X, #8
-+                                    PF tst PF_CTL, #0xF
-         vrshr.u16   q15, q9, #8
-         vrshr.u16   q12, q10, #8
-         vrshr.u16   q13, q11, #8
--                                    addne PF_X, PF_X, #8
--                                    subne PF_CTL, PF_CTL, #1
-+                                    PF addne PF_X, PF_X, #8
-+                                    PF subne PF_CTL, PF_CTL, #1
-         vraddhn.u16 d28, q14, q8
-         vraddhn.u16 d29, q15, q9
--                                    cmp PF_X, ORIG_W
-+                                    PF cmp PF_X, ORIG_W
-         vraddhn.u16 d30, q12, q10
-         vraddhn.u16 d31, q13, q11
-         vqadd.u8    q14, q0, q14
-         vqadd.u8    q15, q1, q15
-     vld4.8      {d0, d1, d2, d3}, [SRC]!
--                                    pld [PF_SRC, PF_X, lsl #src_bpp_shift]
-+                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
-     vmvn.8      d22, d3
--                                    pld [PF_DST, PF_X, lsl #dst_bpp_shift]
-+                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
-         vst4.8      {d28, d29, d30, d31}, [DST_W, :128]!
--                                    subge PF_X, PF_X, ORIG_W
-+                                    PF subge PF_X, PF_X, ORIG_W
-     vmull.u8    q8, d22, d4
--                                    subges PF_CTL, PF_CTL, #0x10
-+                                    PF subges PF_CTL, PF_CTL, #0x10
-     vmull.u8    q9, d22, d5
--                                    ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-+                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-     vmull.u8    q10, d22, d6
--                                    ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-+                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-     vmull.u8    q11, d22, d7
- .endm
- 
-diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
-index d276ab9..a2941ae 100644
---- a/pixman/pixman-arm-neon-asm.h
-+++ b/pixman/pixman-arm-neon-asm.h
-@@ -58,6 +58,11 @@
- #define RESPECT_STRICT_ALIGNMENT 1
- 
- /*
-+ * If set to nonzero value, prefetch is globally disabled
-+ */
-+#define PREFETCH_GLOBALLY_DISABLED 0 
-+
-+/*
-  * Definitions of supplementary pixld/pixst macros (for partial load/store of
-  * pixel data)
-  */
-@@ -218,37 +223,43 @@
-  * pixels processing like simple copy. Anyway, having prefetch is a must
-  * when working with graphics data.
-  */
-+.macro PF a, x:vararg
-+.if (ADVANCED_PREFETCH_ENABLED != 0) && (PREFETCH_GLOBALLY_DISABLED == 0)
-+    a x
-+.endif
-+.endm
-+
- .macro cache_preload std_increment, boost_increment
- .if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0)
- .if regs_shortage
--    ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */
-+    PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */
- .endif
- .if std_increment != 0
--    add PF_X, PF_X, #std_increment
-+    PF add PF_X, PF_X, #std_increment
- .endif
--    tst PF_CTL, #0xF
--    addne PF_X, PF_X, #boost_increment
--    subne PF_CTL, PF_CTL, #1
--    cmp PF_X, ORIG_W
-+    PF tst PF_CTL, #0xF
-+    PF addne PF_X, PF_X, #boost_increment
-+    PF subne PF_CTL, PF_CTL, #1
-+    PF cmp PF_X, ORIG_W
- .if src_bpp_shift >= 0
--    pld [PF_SRC, PF_X, lsl #src_bpp_shift]
-+    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- .endif
- .if dst_r_bpp != 0
--    pld [PF_DST, PF_X, lsl #dst_bpp_shift]
-+    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- .endif
- .if mask_bpp_shift >= 0
--    pld [PF_MASK, PF_X, lsl #mask_bpp_shift]
-+    PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift]
- .endif
--    subge PF_X, PF_X, ORIG_W
--    subges PF_CTL, PF_CTL, #0x10
-+    PF subge PF_X, PF_X, ORIG_W
-+    PF subges PF_CTL, PF_CTL, #0x10
- .if src_bpp_shift >= 0
--    ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-+    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- .endif
- .if dst_r_bpp != 0
--    ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
-+    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- .endif
- .if mask_bpp_shift >= 0
--    ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
-+    PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
- .endif
- .endif
- .endm
-@@ -297,6 +308,12 @@ fname:
-     PF_DST      .req        r12
-     PF_MASK     .req        r14
- 
-+.if prefetch_distance == 0
-+    .set ADVANCED_PREFETCH_ENABLED, 0
-+.else
-+    .set ADVANCED_PREFETCH_ENABLED, 1
-+.endif
-+
- .if mask_bpp == 0
-     ORIG_W      .req        r7      /* saved original width */
-     DUMMY       .req        r8      /* temporary register */
-@@ -374,12 +391,12 @@ fname:
-     ldr         MASK_STRIDE, [sp, #52]
- .endif
-     mov         DST_R, DST_W
--    mov         PF_SRC, SRC
--    mov         PF_DST, DST_R
--    mov         PF_MASK, MASK
--    mov         PF_CTL, H, lsl #4
--    /* pf_ctl = 10 | ((h - 1) << 4) */
--    add         PF_CTL, #(prefetch_distance - 0x10)
-+    PF mov      PF_SRC, SRC
-+    PF mov      PF_DST, DST_R
-+    PF mov      PF_MASK, MASK
-+    /* PF_CTL = prefetch_distance | ((h - 1) << 4) */
-+    PF mov      PF_CTL, H, lsl #4
-+    PF add      PF_CTL, #(prefetch_distance - 0x10)
- 
-     init
- .if regs_shortage
-@@ -412,7 +429,7 @@ fname:
- .else
-     add         DST_R, DST_R, #lowbit
- .endif
--    add         PF_X, PF_X, #(lowbit * 8 / dst_w_bpp)
-+    PF add      PF_X, PF_X, #(lowbit * 8 / dst_w_bpp)
-     sub         W, W, #(lowbit * 8 / dst_w_bpp)
- 1:
- .endif
-@@ -444,7 +461,7 @@ fname:
-                 (src_basereg - pixblock_size * src_bpp / 64), SRC
-     pixld       pixblock_size, mask_bpp, \
-                 (mask_basereg - pixblock_size * mask_bpp / 64), MASK
--    add         PF_X, PF_X, #pixblock_size
-+    PF add      PF_X, PF_X, #pixblock_size
-     process_pixblock_head
-     cache_preload 0, pixblock_size
-     subs        W, W, #(pixblock_size * 2)
-@@ -468,7 +485,7 @@ fname:
-     pixld       chunk_size, src_bpp, src_basereg, SRC
-     pixld       chunk_size, mask_bpp, mask_basereg, MASK
-     pixld_a     chunk_size, dst_r_bpp, dst_r_basereg, DST_R
--    add         PF_X, PF_X, #chunk_size
-+    PF add      PF_X, PF_X, #chunk_size
- 1:
- .endif
- .endr
---
-cgit v0.8.2
diff --git a/recipes/xorg-lib/pixman/remove-broken.patch b/recipes/xorg-lib/pixman/remove-broken.patch
deleted file mode 100644
index fd025b4bbd..0000000000
--- a/recipes/xorg-lib/pixman/remove-broken.patch
+++ /dev/null
@@ -1,826 +0,0 @@
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Sun, 26 Jul 2009 22:21:26 +0000 (+0300)
-Subject: ARM: Removal of unused/broken NEON code
-X-Git-Url: http://siarhei.siamashka.name/gitweb/?p=pixman.git;a=commitdiff_plain;h=7ef2322eefcccc28a2d45c0da22c0fee88b8f464
-
-ARM: Removal of unused/broken NEON code
----
-
-diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
-index 4125d1b..9404c70 100644
---- a/pixman/pixman-arm-neon.c
-+++ b/pixman/pixman-arm-neon.c
-@@ -1895,710 +1895,6 @@ pixman_fill_neon (uint32_t *bits,
- #endif
- }
- 
--/* TODO: is there a more generic way of doing this being introduced? */
--#define NEON_SCANLINE_BUFFER_PIXELS (1024)
--
--static inline void
--neon_quadword_copy (void *   dst,
--		    void *   src,
--		    uint32_t count,         /* of quadwords */
--		    uint32_t trailer_count  /* of bytes */)
--{
--    uint8_t *t_dst = dst, *t_src = src;
--
--    /* Uses aligned multi-register loads to maximise read bandwidth
--     * on uncached memory such as framebuffers
--     * The accesses do not have the aligned qualifiers, so that the copy
--     * may convert between aligned-uncached and unaligned-cached memory.
--     * It is assumed that the CPU can infer alignedness from the address.
--     */
--
--#ifdef USE_GCC_INLINE_ASM
--
--    asm volatile (
--        "	cmp       %[count], #8				\n"
--        "	blt 1f    @ skip oversized fragments		\n"
--        "0: @ start with eight quadwords at a time		\n"
--        "	sub       %[count], %[count], #8		\n"
--        "	vld1.8    {d16, d17, d18, d19}, [%[src]]!		\n"
--        "	vld1.8    {d20, d21, d22, d23}, [%[src]]!		\n"
--        "	vld1.8    {d24, d25, d26, d27}, [%[src]]!		\n"
--        "	vld1.8    {d28, d29, d30, d31}, [%[src]]!		\n"
--        "	cmp       %[count], #8				\n"
--        "	vst1.8    {d16, d17, d18, d19}, [%[dst]]!		\n"
--        "	vst1.8    {d20, d21, d22, d23}, [%[dst]]!		\n"
--        "	vst1.8    {d24, d25, d26, d27}, [%[dst]]!		\n"
--        "	vst1.8    {d28, d29, d30, d31}, [%[dst]]!		\n"
--        "	bge 0b						\n"
--        "1: @ four quadwords					\n"
--        "	tst       %[count], #4				\n"
--        "	beq 2f    @ skip oversized fragment		\n"
--        "	vld1.8    {d16, d17, d18, d19}, [%[src]]!		\n"
--        "	vld1.8    {d20, d21, d22, d23}, [%[src]]!		\n"
--        "	vst1.8    {d16, d17, d18, d19}, [%[dst]]!		\n"
--        "	vst1.8    {d20, d21, d22, d23}, [%[dst]]!		\n"
--        "2: @ two quadwords					\n"
--        "	tst       %[count], #2				\n"
--        "	beq 3f    @ skip oversized fragment		\n"
--        "	vld1.8    {d16, d17, d18, d19}, [%[src]]!		\n"
--        "	vst1.8    {d16, d17, d18, d19}, [%[dst]]!		\n"
--        "3: @ one quadword					\n"
--        "	tst       %[count], #1				\n"
--        "	beq 4f    @ skip oversized fragment		\n"
--        "	vld1.8    {d16, d17}, [%[src]]!			\n"
--        "	vst1.8    {d16, d17}, [%[dst]]!			\n"
--        "4: @ end						\n"
--
--        /* Clobbered input registers marked as input/outputs */
--	: [dst] "+r" (t_dst), [src] "+r" (t_src), [count] "+r" (count)
--
--	  /* No unclobbered inputs */
--	:
--
--        /* Clobbered vector registers */
--	: "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25",
--	  "d26", "d27", "d28", "d29", "d30", "d31", "cc", "memory");
--
--#else
--
--    while (count >= 8)
--    {
--	uint8x16x4_t t1 = vld4q_u8 (t_src);
--	uint8x16x4_t t2 = vld4q_u8 (t_src + sizeof(uint8x16x4_t));
--	
--	t_src += sizeof(uint8x16x4_t) * 2;
--	vst4q_u8 (t_dst, t1);
--	vst4q_u8 (t_dst + sizeof(uint8x16x4_t), t2);
--	t_dst += sizeof(uint8x16x4_t) * 2;
--	count -= 8;
--    }
--
--    if (count & 4)
--    {
--	uint8x16x4_t t1 = vld4q_u8 (t_src);
--	
--	t_src += sizeof(uint8x16x4_t);
--	vst4q_u8 (t_dst, t1);
--	t_dst += sizeof(uint8x16x4_t);
--    }
--
--    if (count & 2)
--    {
--	uint8x8x4_t t1 = vld4_u8 (t_src);
--	
--	t_src += sizeof(uint8x8x4_t);
--	vst4_u8 (t_dst, t1);
--	t_dst += sizeof(uint8x8x4_t);
--    }
--
--    if (count & 1)
--    {
--	uint8x16_t t1 = vld1q_u8 (t_src);
--	
--	t_src += sizeof(uint8x16_t);
--	vst1q_u8 (t_dst, t1);
--	t_dst += sizeof(uint8x16_t);
--    }
--
--#endif  /* !USE_GCC_INLINE_ASM */
--
--    if (trailer_count)
--    {
--	if (trailer_count & 8)
--	{
--	    uint8x8_t t1 = vld1_u8 (t_src);
--	    
--	    t_src += sizeof(uint8x8_t);
--	    vst1_u8 (t_dst, t1);
--	    t_dst += sizeof(uint8x8_t);
--	}
--
--	if (trailer_count & 4)
--	{
--	    *((uint32_t*) t_dst) = *((uint32_t*) t_src);
--	    
--	    t_dst += 4;
--	    t_src += 4;
--	}
--
--	if (trailer_count & 2)
--	{
--	    *((uint16_t*) t_dst) = *((uint16_t*) t_src);
--	    
--	    t_dst += 2;
--	    t_src += 2;
--	}
--
--	if (trailer_count & 1)
--	{
--	    *t_dst++ = *t_src++;
--	}
--    }
--}
--
--static inline void
--solid_over_565_8_pix_neon (uint32_t  glyph_colour,
--                           uint16_t *dest,
--                           uint8_t * in_mask,
--                           uint32_t  dest_stride,    /* bytes, not elements */
--                           uint32_t  mask_stride,
--                           uint32_t  count           /* 8-pixel groups */)
--{
--    /* Inner loop of glyph blitter (solid colour, alpha mask) */
--
--#ifdef USE_GCC_INLINE_ASM
--
--    asm volatile (
--        "	vld4.8 {d20[], d21[], d22[], d23[]}, [%[glyph_colour]]  @ splat solid colour components	\n"
--        "0:	@ loop																				\n"
--        "	vld1.16   {d0, d1}, [%[dest]]         @ load first pixels from framebuffer			\n"
--        "	vld1.8    {d17}, [%[in_mask]]         @ load alpha mask of glyph						\n"
--        "	vmull.u8  q9, d17, d23               @ apply glyph colour alpha to mask				\n"
--        "	vshrn.u16 d17, q9, #8                @ reformat it to match original mask			\n"
--        "	vmvn      d18, d17                   @ we need the inverse mask for the background	\n"
--        "	vsli.u16  q3, q0, #5                 @ duplicate framebuffer blue bits				\n"
--        "	vshrn.u16 d2, q0, #8                 @ unpack red from framebuffer pixels			\n"
--        "	vshrn.u16 d4, q0, #3                 @ unpack green									\n"
--        "	vsri.u8   d2, d2, #5                 @ duplicate red bits (extend 5 to 8)			\n"
--        "	vshrn.u16 d6, q3, #2                 @ unpack extended blue (truncate 10 to 8)		\n"
--        "	vsri.u8   d4, d4, #6                 @ duplicate green bits (extend 6 to 8)			\n"
--        "	vmull.u8  q1, d2, d18                @ apply inverse mask to background red...		\n"
--        "	vmull.u8  q2, d4, d18                @ ...green...									\n"
--        "	vmull.u8  q3, d6, d18                @ ...blue										\n"
--        "	subs      %[count], %[count], #1     @ decrement/test loop counter					\n"
--        "	vmlal.u8  q1, d17, d22               @ add masked foreground red...					\n"
--        "	vmlal.u8  q2, d17, d21               @ ...green...									\n"
--        "	vmlal.u8  q3, d17, d20               @ ...blue										\n"
--        "	add %[in_mask], %[in_mask], %[mask_stride] @ advance mask pointer, while we wait		\n"
--        "	vsri.16   q1, q2, #5                 @ pack green behind red						\n"
--        "	vsri.16   q1, q3, #11                @ pack blue into pixels						\n"
--        "	vst1.16   {d2, d3}, [%[dest]]         @ store composited pixels						\n"
--        "	add %[dest], %[dest], %[dest_stride]  @ advance framebuffer pointer					\n"
--        "	bne 0b                               @ next please									\n"
--
--	/* Clobbered registers marked as input/outputs */
--	: [dest] "+r" (dest), [in_mask] "+r" (in_mask), [count] "+r" (count)
--	  
--	  /* Inputs */
--	: [dest_stride] "r" (dest_stride), [mask_stride] "r" (mask_stride), [glyph_colour] "r" (&glyph_colour)
--
--	  /* Clobbers, including the inputs we modify, and potentially lots of memory */
--	: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d17", "d18", "d19",
--	  "d20", "d21", "d22", "d23", "d24", "d25", "cc", "memory"
--        );
--
--#else
--
--    uint8x8x4_t solid_colour = vld4_dup_u8 ((uint8_t*) &glyph_colour);
--
--    while (count--)
--    {
--	uint16x8_t pixels = vld1q_u16 (dest);
--	uint8x8_t mask = vshrn_n_u16 (vmull_u8 (solid_colour.val[3], vld1_u8 (in_mask)), 8);
--	uint8x8_t mask_image = vmvn_u8 (mask);
--
--	uint8x8_t t_red   = vshrn_n_u16 (pixels, 8);
--	uint8x8_t t_green = vshrn_n_u16 (pixels, 3);
--	uint8x8_t t_blue  = vshrn_n_u16 (vsli_n_u8 (pixels, pixels, 5), 2);
--
--	uint16x8_t s_red   = vmull_u8 (vsri_n_u8 (t_red, t_red, 5), mask_image);
--	uint16x8_t s_green = vmull_u8 (vsri_n_u8 (t_green, t_green, 6), mask_image);
--	uint16x8_t s_blue  = vmull_u8 (t_blue, mask_image);
--
--	s_red   = vmlal (s_red, mask, solid_colour.val[2]);
--	s_green = vmlal (s_green, mask, solid_colour.val[1]);
--	s_blue  = vmlal (s_blue, mask, solid_colour.val[0]);
--
--	pixels = vsri_n_u16 (s_red, s_green, 5);
--	pixels = vsri_n_u16 (pixels, s_blue, 11);
--	vst1q_u16 (dest, pixels);
--
--	dest += dest_stride;
--	mask += mask_stride;
--    }
--
--#endif
--}
--
--#if 0 /* this is broken currently */
--static void
--neon_composite_over_n_8_0565 (pixman_implementation_t * impl,
--                              pixman_op_t               op,
--                              pixman_image_t *          src_image,
--                              pixman_image_t *          mask_image,
--                              pixman_image_t *          dst_image,
--                              int32_t                   src_x,
--                              int32_t                   src_y,
--                              int32_t                   mask_x,
--                              int32_t                   mask_y,
--                              int32_t                   dest_x,
--                              int32_t                   dest_y,
--                              int32_t                   width,
--                              int32_t                   height)
--{
--    uint32_t  src, srca;
--    uint16_t *dst_line, *aligned_line;
--    uint8_t  *mask_line;
--    uint32_t  dst_stride, mask_stride;
--    uint32_t  kernel_count, copy_count, copy_tail;
--    uint8_t   kernel_offset, copy_offset;
--
--    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
--
--    /* bail out if fully transparent or degenerate */
--    srca = src >> 24;
--    if (src == 0)
--	return;
--
--    if (width == 0 || height == 0)
--	return;
--
--    if (width > NEON_SCANLINE_BUFFER_PIXELS)
--    {
--	/* split the blit, so we can use a fixed-size scanline buffer
--	 * TODO: there must be a more elegant way of doing this.
--	 */
--	int x;
--	for (x = 0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS)
--	{
--	    neon_composite_over_n_8_0565 (
--		impl, op,
--		src_image, mask_image, dst_image,
--		src_x + x, src_y, mask_x + x, mask_y, dest_x + x, dest_y,
--		(x + NEON_SCANLINE_BUFFER_PIXELS > width) ? width - x : NEON_SCANLINE_BUFFER_PIXELS, height);
--	}
--
--	return;
--    }
--    
--    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
--    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
--
--    /* keep within minimum number of aligned quadwords on width
--     * while also keeping the minimum number of columns to process
--     */
--    {
--	unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF;
--	unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF;
--	unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF;
--
--	/* the fast copy should be quadword aligned */
--	copy_offset = dst_line - ((uint16_t*) aligned_left);
--	aligned_line = dst_line - copy_offset;
--	copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4);
--	copy_tail = 0;
--
--	if (aligned_right - aligned_left > ceiling_length)
--	{
--	    /* unaligned routine is tightest */
--	    kernel_count = (uint32_t) (ceiling_length >> 4);
--	    kernel_offset = copy_offset;
--	}
--	else
--	{
--	    /* aligned routine is equally tight, so it is safer to align */
--	    kernel_count = copy_count;
--	    kernel_offset = 0;
--	}
--
--	/* We should avoid reading beyond scanline ends for safety */
--	if (aligned_line < (dst_line - dest_x) ||
--	    (aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width))
--	{
--	    /* switch to precise read */
--	    copy_offset = kernel_offset = 0;
--	    aligned_line = dst_line;
--	    kernel_count = (uint32_t) (ceiling_length >> 4);
--	    copy_count = (width * sizeof(*dst_line)) >> 4;
--	    copy_tail = (width * sizeof(*dst_line)) & 0xF;
--	}
--    }
--
--    {
--	uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8];         /* deliberately not initialised */
--	uint8_t glyph_line[NEON_SCANLINE_BUFFER_PIXELS + 8];
--	int y = height;
--
--	/* row-major order */
--	/* left edge, middle block, right edge */
--	for ( ; y--; mask_line += mask_stride, aligned_line += dst_stride, dst_line += dst_stride)
--	{
--	    /* We don't want to overrun the edges of the glyph,
--	     * so realign the edge data into known buffers
--	     */
--	    neon_quadword_copy (glyph_line + copy_offset, mask_line, width >> 4, width & 0xF);
--
--	    /* Uncached framebuffer access is really, really slow
--	     * if we do it piecemeal. It should be much faster if we
--	     * grab it all at once. One scanline should easily fit in
--	     * L1 cache, so this should not waste RAM bandwidth.
--	     */
--	    neon_quadword_copy (scan_line, aligned_line, copy_count, copy_tail);
--
--	    /* Apply the actual filter */
--	    solid_over_565_8_pix_neon (
--		src, scan_line + kernel_offset,
--		glyph_line + kernel_offset, 8 * sizeof(*dst_line),
--		8, kernel_count);
--
--	    /* Copy the modified scanline back */
--	    neon_quadword_copy (dst_line, scan_line + copy_offset,
--				width >> 3, (width & 7) * 2);
--	}
--    }
--}
--#endif
--
--#ifdef USE_GCC_INLINE_ASM
--
--static inline void
--plain_over_565_8_pix_neon (uint32_t  colour,
--			   uint16_t *dest,
--			   uint32_t  dest_stride,     /* bytes, not elements */
--			   uint32_t  count            /* 8-pixel groups */)
--{
--    /* Inner loop for plain translucent rects
--     * (solid colour without alpha mask)
--     */
--    asm volatile (
--        "	vld4.8   {d20[], d21[], d22[], d23[]}, [%[colour]]  @ solid colour load/splat \n"
--        "	vmull.u8  q12, d23, d22              @ premultiply alpha red   \n"
--        "	vmull.u8  q13, d23, d21              @ premultiply alpha green \n"
--        "	vmull.u8  q14, d23, d20              @ premultiply alpha blue  \n"
--        "	vmvn      d18, d23                   @ inverse alpha for background \n"
--        "0:	@ loop\n"
--        "	vld1.16   {d0, d1}, [%[dest]]         @ load first pixels from framebuffer	\n"
--        "	vshrn.u16 d2, q0, #8                 @ unpack red from framebuffer pixels	\n"
--        "	vshrn.u16 d4, q0, #3                 @ unpack green				\n"
--        "	vsli.u16  q3, q0, #5                 @ duplicate framebuffer blue bits		\n"
--        "	vsri.u8   d2, d2, #5                 @ duplicate red bits (extend 5 to 8)	\n"
--        "	vsri.u8   d4, d4, #6                 @ duplicate green bits (extend 6 to 8)	\n"
--        "	vshrn.u16 d6, q3, #2                 @ unpack extended blue (truncate 10 to 8)	\n"
--        "	vmov      q0, q12                    @ retrieve foreground red   \n"
--        "	vmlal.u8  q0, d2, d18                @ blend red - my kingdom for a four-operand MLA \n"
--        "	vmov      q1, q13                    @ retrieve foreground green \n"
--        "	vmlal.u8  q1, d4, d18                @ blend green               \n"
--        "	vmov      q2, q14                    @ retrieve foreground blue  \n"
--        "	vmlal.u8  q2, d6, d18                @ blend blue                \n"
--        "	subs      %[count], %[count], #1     @ decrement/test loop counter		\n"
--        "	vsri.16   q0, q1, #5                 @ pack green behind red			\n"
--        "	vsri.16   q0, q2, #11                @ pack blue into pixels			\n"
--        "	vst1.16   {d0, d1}, [%[dest]]         @ store composited pixels			\n"
--        "	add %[dest], %[dest], %[dest_stride]  @ advance framebuffer pointer		\n"
--        "	bne 0b                               @ next please				\n"
--
--        /* Clobbered registers marked as input/outputs */
--	: [dest] "+r" (dest), [count] "+r" (count)
--
--	  /* Inputs */
--	: [dest_stride] "r" (dest_stride), [colour] "r" (&colour)
--
--	  /* Clobbers, including the inputs we modify, and
--	   * potentially lots of memory
--	   */
--	: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d18", "d19",
--	  "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29",
--	  "cc", "memory"
--        );
--}
--
--static void
--neon_composite_over_n_0565 (pixman_implementation_t * impl,
--                            pixman_op_t               op,
--                            pixman_image_t *          src_image,
--                            pixman_image_t *          mask_image,
--                            pixman_image_t *          dst_image,
--                            int32_t                   src_x,
--                            int32_t                   src_y,
--                            int32_t                   mask_x,
--                            int32_t                   mask_y,
--                            int32_t                   dest_x,
--                            int32_t                   dest_y,
--                            int32_t                   width,
--                            int32_t                   height)
--{
--    uint32_t src, srca;
--    uint16_t    *dst_line, *aligned_line;
--    uint32_t dst_stride;
--    uint32_t kernel_count, copy_count, copy_tail;
--    uint8_t kernel_offset, copy_offset;
--
--    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
--
--    /* bail out if fully transparent */
--    srca = src >> 24;
--    if (src == 0)
--	return;
--    
--    if (width == 0 || height == 0)
--	return;
--
--    if (width > NEON_SCANLINE_BUFFER_PIXELS)
--    {
--	/* split the blit, so we can use a fixed-size scanline buffer *
--	 * TODO: there must be a more elegant way of doing this.
--	 */
--	int x;
--	
--	for (x = 0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS)
--	{
--	    neon_composite_over_n_0565 (
--		impl, op,
--		src_image, mask_image, dst_image,
--		src_x + x, src_y, mask_x + x, mask_y, dest_x + x, dest_y,
--		(x + NEON_SCANLINE_BUFFER_PIXELS > width) ? width - x : NEON_SCANLINE_BUFFER_PIXELS, height);
--	}
--	return;
--    }
--
--    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
--
--    /* keep within minimum number of aligned quadwords on width
--     * while also keeping the minimum number of columns to process
--     */
--    {
--	unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF;
--	unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF;
--	unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF;
--
--	/* the fast copy should be quadword aligned */
--	copy_offset = dst_line - ((uint16_t*) aligned_left);
--	aligned_line = dst_line - copy_offset;
--	copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4);
--	copy_tail = 0;
--
--	if (aligned_right - aligned_left > ceiling_length)
--	{
--	    /* unaligned routine is tightest */
--	    kernel_count = (uint32_t) (ceiling_length >> 4);
--	    kernel_offset = copy_offset;
--	}
--	else
--	{
--	    /* aligned routine is equally tight, so it is safer to align */
--	    kernel_count = copy_count;
--	    kernel_offset = 0;
--	}
--
--	/* We should avoid reading beyond scanline ends for safety */
--	if (aligned_line < (dst_line - dest_x) ||
--	    (aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width))
--	{
--	    /* switch to precise read */
--	    copy_offset = kernel_offset = 0;
--	    aligned_line = dst_line;
--	    kernel_count = (uint32_t) (ceiling_length >> 4);
--	    copy_count = (width * sizeof(*dst_line)) >> 4;
--	    copy_tail = (width * sizeof(*dst_line)) & 0xF;
--	}
--    }
--
--    {
--	uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8];  /* deliberately not initialised */
--
--	/* row-major order */
--	/* left edge, middle block, right edge */
--	for ( ; height--; aligned_line += dst_stride, dst_line += dst_stride)
--	{
--	    /* Uncached framebuffer access is really, really slow if we do it piecemeal.
--	     * It should be much faster if we grab it all at once.
--	     * One scanline should easily fit in L1 cache, so this should
--	     * not waste RAM bandwidth.
--	     */
--	    neon_quadword_copy (scan_line, aligned_line, copy_count, copy_tail);
--
--	    /* Apply the actual filter */
--	    plain_over_565_8_pix_neon (
--		src, scan_line + kernel_offset, 8 * sizeof(*dst_line), kernel_count);
--
--	    /* Copy the modified scanline back */
--	    neon_quadword_copy (
--		dst_line, scan_line + copy_offset, width >> 3, (width & 7) * 2);
--	}
--    }
--}
--
--static inline void
--ARGB8_over_565_8_pix_neon (uint32_t *src,
--                           uint16_t *dest,
--                           uint32_t  src_stride,     /* bytes, not elements */
--                           uint32_t  count           /* 8-pixel groups */)
--{
--    asm volatile (
--        "0:	@ loop\n"
--        "	pld   [%[src], %[src_stride]]         @ preload from next scanline	\n"
--        "	vld1.16   {d0, d1}, [%[dest]]         @ load pixels from framebuffer	\n"
--        "	vld4.8   {d20, d21, d22, d23},[%[src]]! @ load source image pixels		\n"
--        "	vsli.u16  q3, q0, #5                 @ duplicate framebuffer blue bits		\n"
--        "	vshrn.u16 d2, q0, #8                 @ unpack red from framebuffer pixels	\n"
--        "	vshrn.u16 d4, q0, #3                 @ unpack green				\n"
--        "	vmvn      d18, d23                   @ we need the inverse alpha for the background	\n"
--        "	vsri.u8   d2, d2, #5                 @ duplicate red bits (extend 5 to 8)	\n"
--        "	vshrn.u16 d6, q3, #2                 @ unpack extended blue (truncate 10 to 8)	\n"
--        "	vsri.u8   d4, d4, #6                 @ duplicate green bits (extend 6 to 8)	\n"
--        "	vmull.u8  q1, d2, d18                @ apply inverse alpha to background red...	\n"
--        "	vmull.u8  q2, d4, d18                @ ...green...				\n"
--        "	vmull.u8  q3, d6, d18                @ ...blue					\n"
--        "	subs      %[count], %[count], #1     @ decrement/test loop counter		\n"
--        "	vmlal.u8  q1, d23, d22               @ add blended foreground red...		\n"
--        "	vmlal.u8  q2, d23, d21               @ ...green...				\n"
--        "	vmlal.u8  q3, d23, d20               @ ...blue					\n"
--        "	vsri.16   q1, q2, #5                 @ pack green behind red			\n"
--        "	vsri.16   q1, q3, #11                @ pack blue into pixels			\n"
--        "	vst1.16   {d2, d3}, [%[dest]]!        @ store composited pixels			\n"
--        "	bne 0b                               @ next please				\n"
--
--        /* Clobbered registers marked as input/outputs */
--	: [dest] "+r" (dest), [src] "+r" (src), [count] "+r" (count)
--
--	  /* Inputs */
--	: [src_stride] "r" (src_stride)
--
--	  /* Clobbers, including the inputs we modify, and potentially lots of memory */
--	: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d17", "d18", "d20",
--	  "d21", "d22", "d23", "cc", "memory"
--        );
--}
--
--static void
--neon_composite_over_8888_0565 (pixman_implementation_t * impl,
--                               pixman_op_t               op,
--                               pixman_image_t *          src_image,
--                               pixman_image_t *          mask_image,
--                               pixman_image_t *          dst_image,
--                               int32_t                   src_x,
--                               int32_t                   src_y,
--                               int32_t                   mask_x,
--                               int32_t                   mask_y,
--                               int32_t                   dest_x,
--                               int32_t                   dest_y,
--                               int32_t                   width,
--                               int32_t                   height)
--{
--    uint32_t    *src_line;
--    uint16_t    *dst_line, *aligned_line;
--    uint32_t dst_stride, src_stride;
--    uint32_t kernel_count, copy_count, copy_tail;
--    uint8_t kernel_offset, copy_offset;
--
--    /* we assume mask is opaque 
--     * so the only alpha to deal with is embedded in src
--     */
--    if (width > NEON_SCANLINE_BUFFER_PIXELS)
--    {
--	/* split the blit, so we can use a fixed-size scanline buffer */
--	int x;
--	for (x = 0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS)
--	{
--	    neon_composite_over_8888_0565 (
--		impl, op,
--		src_image, mask_image, dst_image,
--		src_x + x, src_y, mask_x + x, mask_y, dest_x + x, dest_y,
--		(x + NEON_SCANLINE_BUFFER_PIXELS > width) ? width - x : NEON_SCANLINE_BUFFER_PIXELS, height);
--	}
--	return;
--    }
--
--    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
--    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
--
--    /* keep within minimum number of aligned quadwords on width
--     * while also keeping the minimum number of columns to process
--     */
--    {
--	unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF;
--	unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF;
--	unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF;
--
--	/* the fast copy should be quadword aligned */
--	copy_offset = dst_line - ((uint16_t*) aligned_left);
--	aligned_line = dst_line - copy_offset;
--	copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4);
--	copy_tail = 0;
--
--	if (aligned_right - aligned_left > ceiling_length)
--	{
--	    /* unaligned routine is tightest */
--	    kernel_count = (uint32_t) (ceiling_length >> 4);
--	    kernel_offset = copy_offset;
--	}
--	else
--	{
--	    /* aligned routine is equally tight, so it is safer to align */
--	    kernel_count = copy_count;
--	    kernel_offset = 0;
--	}
--
--	/* We should avoid reading beyond scanline ends for safety */
--	if (aligned_line < (dst_line - dest_x) ||
--	    (aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width))
--	{
--	    /* switch to precise read */
--	    copy_offset = kernel_offset = 0;
--	    aligned_line = dst_line;
--	    kernel_count = (uint32_t) (ceiling_length >> 4);
--	    copy_count = (width * sizeof(*dst_line)) >> 4;
--	    copy_tail = (width * sizeof(*dst_line)) & 0xF;
--	}
--    }
--
--    /* Preload the first input scanline */
--    {
--	uint8_t *src_ptr = (uint8_t*) src_line;
--	uint32_t count = (width + 15) / 16;
--
--#ifdef USE_GCC_INLINE_ASM
--	asm volatile (
--	    "0: @ loop						\n"
--	    "	subs    %[count], %[count], #1			\n"
--	    "	pld     [%[src]]				\n"
--	    "	add     %[src], %[src], #64			\n"
--	    "	bgt 0b						\n"
--
--	    /* Clobbered input registers marked as input/outputs */
--	    : [src] "+r" (src_ptr), [count] "+r" (count)
--	    :     /* no unclobbered inputs */
--	    : "cc"
--	    );
--#else
--	do
--	{
--	    __pld (src_ptr);
--	    src_ptr += 64;
--	}
--	while (--count);
--#endif
--    }
--
--    {
--	uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8]; /* deliberately not initialised */
--
--	/* row-major order */
--	/* left edge, middle block, right edge */
--	for ( ; height--; src_line += src_stride, aligned_line += dst_stride)
--	{
--	    /* Uncached framebuffer access is really, really slow if we do
--	     * it piecemeal. It should be much faster if we grab it all at
--	     * once. One scanline should easily fit in L1 cache, so this
--	     * should not waste RAM bandwidth.
--	     */
--	    neon_quadword_copy (scan_line, aligned_line, copy_count, copy_tail);
--
--	    /* Apply the actual filter */
--	    ARGB8_over_565_8_pix_neon (
--		src_line, scan_line + kernel_offset,
--		src_stride * sizeof(*src_line), kernel_count);
--
--	    /* Copy the modified scanline back */
--	    neon_quadword_copy (dst_line,
--				scan_line + copy_offset,
--				width >> 3, (width & 7) * 2);
--	}
--    }
--}
--
--#endif  /* USE_GCC_INLINE_ASM */
--
- static const pixman_fast_path_t arm_neon_fast_path_array[] =
- {
-     { PIXMAN_OP_ADD,  PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8,       neon_composite_add_8888_8_8,     0 },
-@@ -2612,12 +1908,6 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
- #ifdef USE_GCC_INLINE_ASM
-     { PIXMAN_OP_SRC,  PIXMAN_r5g6b5,   PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_16_16,        0 },
-     { PIXMAN_OP_SRC,  PIXMAN_b5g6r5,   PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_16_16,        0 },
--#if 0 /* this code has some bugs */
--    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_over_n_0565,      0 },
--    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_over_n_0565,      0 },
--    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_over_8888_0565,   0 },
--    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_over_8888_0565,   0 },
--#endif
- #endif
-     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, neon_composite_over_8888_8888,   0 },
-     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_over_8888_8888,   0 },
-@@ -2668,79 +1958,6 @@ arm_neon_composite (pixman_implementation_t *imp,
- }
- 
- static pixman_bool_t
--pixman_blt_neon (void *src_bits,
--                 void *dst_bits,
--                 int   src_stride,
--                 int   dst_stride,
--                 int   src_bpp,
--                 int   dst_bpp,
--                 int   src_x,
--                 int   src_y,
--                 int   dst_x,
--                 int   dst_y,
--                 int   width,
--                 int   height)
--{
--    if (!width || !height)
--	return TRUE;
--
--    /* accelerate only straight copies involving complete bytes */
--    if (src_bpp != dst_bpp || (src_bpp & 7))
--	return FALSE;
--
--    {
--	uint32_t bytes_per_pixel = src_bpp >> 3;
--	uint32_t byte_width = width * bytes_per_pixel;
--	/* parameter is in words for some reason */
--	int32_t src_stride_bytes = src_stride * 4;
--	int32_t dst_stride_bytes = dst_stride * 4;
--	uint8_t *src_bytes = ((uint8_t*) src_bits) +
--	    src_y * src_stride_bytes + src_x * bytes_per_pixel;
--	uint8_t *dst_bytes = ((uint8_t*) dst_bits) +
--	    dst_y * dst_stride_bytes + dst_x * bytes_per_pixel;
--	uint32_t quadword_count = byte_width / 16;
--	uint32_t offset         = byte_width % 16;
--
--	while (height--)
--	{
--	    neon_quadword_copy (dst_bytes, src_bytes, quadword_count, offset);
--	    src_bytes += src_stride_bytes;
--	    dst_bytes += dst_stride_bytes;
--	}
--    }
--
--    return TRUE;
--}
--
--static pixman_bool_t
--arm_neon_blt (pixman_implementation_t *imp,
--              uint32_t *               src_bits,
--              uint32_t *               dst_bits,
--              int                      src_stride,
--              int                      dst_stride,
--              int                      src_bpp,
--              int                      dst_bpp,
--              int                      src_x,
--              int                      src_y,
--              int                      dst_x,
--              int                      dst_y,
--              int                      width,
--              int                      height)
--{
--    if (pixman_blt_neon (
--            src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
--            src_x, src_y, dst_x, dst_y, width, height))
--    {
--	return TRUE;
--    }
--
--    return _pixman_implementation_blt (
--               imp->delegate,
--               src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
--               src_x, src_y, dst_x, dst_y, width, height);
--}
--
--static pixman_bool_t
- arm_neon_fill (pixman_implementation_t *imp,
-                uint32_t *               bits,
-                int                      stride,
-@@ -2765,9 +1982,6 @@ _pixman_implementation_create_arm_neon (void)
-     pixman_implementation_t *imp = _pixman_implementation_create (simd);
- 
-     imp->composite = arm_neon_composite;
--#if 0 /* this code has some bugs */
--    imp->blt = arm_neon_blt;
--#endif
-     imp->fill = arm_neon_fill;
- 
-     return imp;
diff --git a/recipes/xorg-lib/pixman/src-8888-0565.patch b/recipes/xorg-lib/pixman/src-8888-0565.patch
deleted file mode 100644
index c544225f65..0000000000
--- a/recipes/xorg-lib/pixman/src-8888-0565.patch
+++ /dev/null
@@ -1,324 +0,0 @@
-From 6494f9ae8820078d0e6109bf8f294156f7a5da4c Mon Sep 17 00:00:00 2001
-From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
-Date: Fri, 05 Mar 2010 00:40:34 +0000
-Subject: ARM: added 'armv6_composite_src_8888_0565' fast path
-
-Provides ~3x performance improvement when working with
-data in L1 cache, and ~80% performace improvement when working
-with memory. This fast path is important for 32bpp -> 16bpp
-color format conversion and is commonly used with 16bpp desktop.
-
-Microbenchmark from N800 (ARM11 @ 400MHz), measured in MPix/s:
-
-before:
-
- src_8888_0565 = L1:  21.54 M: 15.62
-
-after (armv4):
-
- src_8888_0565 = L1:  45.26 M: 23.29
-
-after (armv6):
-
- src_8888_0565 = L1:  60.62 M: 28.37
----
-diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
-index c375c01..69243c1 100644
---- a/pixman/pixman-arm-simd.c
-+++ b/pixman/pixman-arm-simd.c
-@@ -604,6 +604,282 @@ armv6_composite_over_n_8_0565 (pixman_implementation_t * impl,
- 	dst_stride - width, mask_stride - width, height);
- }
- 
-+static inline void
-+armv4_composite_src_8888_0565_asm (
-+    uint16_t *dst, uint32_t *src, int w, int dst_stride,
-+    int src_stride, int h)
-+{
-+    uint32_t a, x, y, c1F001F = 0x1F001F, cFFFF = 0xFFFF;
-+    int backup_w = w;
-+    while (h--)
-+    {
-+        w = backup_w;
-+        if (w > 0 && (uintptr_t)dst & 2)
-+        {
-+            x = *src++;
-+
-+            a = (x >> 3) & c1F001F;
-+            x &= 0xFC00;
-+            a |= a >> 5;
-+            a |= x >> 5;
-+
-+            *dst++ = a;
-+            w--;
-+        }
-+
-+        asm volatile(
-+            "subs  %[w], %[w], #2\n"
-+            "blt   2f\n"
-+        "1:\n"
-+            "ldr   %[x], [%[src]], #4\n"
-+            "ldr   %[y], [%[src]], #4\n"
-+            "subs  %[w], %[w], #2\n"
-+
-+            "and   %[a], %[c1F001F], %[x], lsr #3\n"
-+            "and   %[x], %[x], #0xFC00\n\n"
-+            "orr   %[a], %[a], %[a], lsr #5\n"
-+            "orr   %[x], %[a], %[x], lsr #5\n"
-+
-+            "and   %[a], %[c1F001F], %[y], lsr #3\n"
-+            "and   %[y], %[y], #0xFC00\n\n"
-+            "orr   %[a], %[a], %[a], lsr #5\n"
-+            "orr   %[y], %[a], %[y], lsr #5\n"
-+            /*
-+             * Writing single 32-bit value is much faster than two
-+             * separate 16-bit values for older CPUs without (efficient)
-+             * write combining, even though it costs an extra instruction.
-+             */
-+            "and   %[x], %[x], %[cFFFF]\n"
-+            "orr   %[x], %[x], %[y], lsl #16\n"
-+            "str   %[x], [%[dst]], #4\n"
-+            "bge   1b\n"
-+        "2:\n"
-+        : [c1F001F] "+&r" (c1F001F), [cFFFF] "+&r" (cFFFF),
-+          [src] "+&r" (src), [dst] "+&r" (dst), [a] "=&r" (a),
-+          [x] "=&r" (x), [y] "=&r" (y), [w] "+&r" (w)
-+        );
-+
-+        if (w & 1)
-+        {
-+            x = *src++;
-+
-+            a = (x >> 3) & c1F001F;
-+            x = x & 0xFC00;
-+            a |= a >> 5;
-+            a |= x >> 5;
-+
-+            *dst++ = a;
-+        }
-+
-+        src += src_stride - backup_w;
-+        dst += dst_stride - backup_w;
-+    }
-+}
-+
-+/*
-+ * Conversion x8r8g8b8 -> r5g6b5
-+ *
-+ * Note: 'w' must be >= 7 here
-+ */
-+static void __attribute__((naked))
-+armv6_composite_src_8888_0565_asm (
-+    uint16_t *dst, uint32_t *src, int w, int dst_stride,
-+    int src_stride, int h)
-+{
-+    asm volatile(
-+        /* define supplementary macros */
-+        ".macro cvt8888to565 PIX\n"
-+            "and   A, C1F001F, \\PIX, lsr #3\n"
-+            "and   \\PIX, \\PIX, #0xFC00\n\n"
-+            "orr   A, A, A, lsr #5\n"
-+            "orr   \\PIX, A, \\PIX, lsr #5\n"
-+        ".endm\n"
-+
-+        ".macro combine_pixels_pair PIX1, PIX2\n"
-+            /* Note: assume little endian byte order */
-+            "pkhbt \\PIX1, \\PIX1, \\PIX2, lsl #16\n"
-+        ".endm\n"
-+
-+        /* function entry, save all registers (10 words) to stack */
-+        "stmdb   sp!, {r4-r11, ip, lr}\n"
-+
-+        /* define some aliases */
-+        "DST     .req  r0\n"
-+        "SRC     .req  r1\n"
-+        "W       .req  r2\n"
-+        "H       .req  r3\n"
-+
-+        "TMP1    .req  r4\n"
-+        "TMP2    .req  r5\n"
-+        "TMP3    .req  r6\n"
-+        "TMP4    .req  r7\n"
-+        "TMP5    .req  r8\n"
-+        "TMP6    .req  r9\n"
-+        "TMP7    .req  r10\n"
-+        "TMP8    .req  r11\n"
-+
-+        "C1F001F .req  ip\n"
-+        "A       .req  lr\n"
-+
-+        "ldr     TMP1, [sp, #(10*4+0)]\n" /* load src_stride */
-+        "ldr     C1F001F, =0x1F001F\n"
-+        "sub     r3, r3, W\n"
-+        "str     r3, [sp, #(10*4+0)]\n" /* store (dst_stride-w) */
-+        "ldr     r3, [sp, #(10*4+4)]\n" /* load h */
-+        "sub     TMP1, TMP1, W\n"
-+        "str     TMP1, [sp, #(10*4+4)]\n" /* store (src_stride-w) */
-+
-+        "str     W, [sp, #(8*4)]\n" /* saved ip = W */
-+
-+    "0:\n"
-+        "subs    H, H, #1\n"
-+        "blt     6f\n"
-+    "1:\n"
-+        /* align DST at 4 byte boundary */
-+        "tst     DST, #2\n"
-+        "beq     2f\n"
-+        "ldr     TMP1, [SRC], #4\n"
-+        "sub     W, W, #1\n"
-+        "cvt8888to565 TMP1\n"
-+        "strh    TMP1, [DST], #2\n"
-+    "2:"
-+        /* align DST at 8 byte boundary */
-+        "tst     DST, #4\n"
-+        "beq     2f\n"
-+        "ldmia   SRC!, {TMP1, TMP2}\n"
-+        "sub     W, W, #2\n"
-+        "cvt8888to565 TMP1\n"
-+        "cvt8888to565 TMP2\n"
-+        "combine_pixels_pair TMP1, TMP2\n"
-+        "str     TMP1, [DST], #4\n"
-+    "2:"
-+        /* align DST at 16 byte boundary */
-+        "tst     DST, #8\n"
-+        "beq     2f\n"
-+        "ldmia   SRC!, {TMP1, TMP2, TMP3, TMP4}\n"
-+        "sub     W, W, #4\n"
-+        "cvt8888to565 TMP1\n"
-+        "cvt8888to565 TMP2\n"
-+        "cvt8888to565 TMP3\n"
-+        "cvt8888to565 TMP4\n"
-+        "combine_pixels_pair TMP1, TMP2\n"
-+        "combine_pixels_pair TMP3, TMP4\n"
-+        "stmia DST!, {TMP1, TMP3}\n"
-+    "2:"
-+        /* inner loop, process 8 pixels per iteration */
-+        "subs    W, W, #8\n"
-+        "blt     4f\n"
-+    "3:\n"
-+        "ldmia   SRC!, {TMP1, TMP2, TMP3, TMP4, TMP5, TMP6, TMP7, TMP8}\n"
-+        "subs    W, W, #8\n"
-+        "cvt8888to565 TMP1\n"
-+        "cvt8888to565 TMP2\n"
-+        "cvt8888to565 TMP3\n"
-+        "cvt8888to565 TMP4\n"
-+        "cvt8888to565 TMP5\n"
-+        "cvt8888to565 TMP6\n"
-+        "cvt8888to565 TMP7\n"
-+        "cvt8888to565 TMP8\n"
-+        "combine_pixels_pair TMP1, TMP2\n"
-+        "combine_pixels_pair TMP3, TMP4\n"
-+        "combine_pixels_pair TMP5, TMP6\n"
-+        "combine_pixels_pair TMP7, TMP8\n"
-+        "stmia   DST!, {TMP1, TMP3, TMP5, TMP7}\n"
-+        "bge     3b\n"
-+    "4:\n"
-+
-+        /* process the remaining pixels */
-+        "tst     W, #4\n"
-+        "beq     4f\n"
-+        "ldmia   SRC!, {TMP1, TMP2, TMP3, TMP4}\n"
-+        "cvt8888to565 TMP1\n"
-+        "cvt8888to565 TMP2\n"
-+        "cvt8888to565 TMP3\n"
-+        "cvt8888to565 TMP4\n"
-+        "combine_pixels_pair TMP1, TMP2\n"
-+        "combine_pixels_pair TMP3, TMP4\n"
-+        "stmia   DST!, {TMP1, TMP3}\n"
-+    "4:\n"
-+        "tst     W, #2\n"
-+        "beq     4f\n"
-+        "ldmia   SRC!, {TMP1, TMP2}\n"
-+        "cvt8888to565 TMP1\n"
-+        "cvt8888to565 TMP2\n"
-+        "combine_pixels_pair TMP1, TMP2\n"
-+        "str     TMP1, [DST], #4\n"
-+    "4:\n"
-+        "tst     W, #1\n"
-+        "beq     4f\n"
-+        "ldr     TMP1, [SRC], #4\n"
-+        "cvt8888to565 TMP1\n"
-+        "strh    TMP1, [DST], #2\n"
-+    "4:\n"
-+        "ldr     TMP1, [sp, #(10*4+0)]\n" /* (dst_stride-w) */
-+        "ldr     TMP2, [sp, #(10*4+4)]\n" /* (src_stride-w) */
-+        "ldr     W, [sp, #(8*4)]\n"
-+        "subs    H, H, #1\n"
-+        "add     DST, DST, TMP1, lsl #1\n"
-+        "add     SRC, SRC, TMP2, lsl #2\n"
-+        "bge     1b\n"
-+    "6:\n"
-+        /* restore all registers and return */
-+        "ldmia   sp!, {r4-r11, ip, pc}\n"
-+        ".ltorg\n"
-+
-+        ".unreq   DST\n"
-+        ".unreq   SRC\n"
-+        ".unreq   W\n"
-+        ".unreq   H\n"
-+
-+        ".unreq   TMP1\n"
-+        ".unreq   TMP2\n"
-+        ".unreq   TMP3\n"
-+        ".unreq   TMP4\n"
-+        ".unreq   TMP5\n"
-+        ".unreq   TMP6\n"
-+        ".unreq   TMP7\n"
-+        ".unreq   TMP8\n"
-+
-+        ".unreq   C1F001F\n"
-+        ".unreq   A\n"
-+
-+        ".purgem  cvt8888to565\n"
-+        ".purgem  combine_pixels_pair\n"
-+    );
-+}
-+
-+static void
-+armv6_composite_src_8888_0565 (pixman_implementation_t * impl,
-+			       pixman_op_t               op,
-+			       pixman_image_t *          src_image,
-+			       pixman_image_t *          mask_image,
-+			       pixman_image_t *          dst_image,
-+			       int32_t                   src_x,
-+			       int32_t                   src_y,
-+			       int32_t                   mask_x,
-+			       int32_t                   mask_y,
-+			       int32_t                   dest_x,
-+			       int32_t                   dest_y,
-+			       int32_t                   width,
-+			       int32_t                   height)
-+{
-+    uint32_t *src;
-+    uint16_t *dst;
-+    int src_stride, dst_stride;
-+
-+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
-+			   dst_stride, dst, 1);
-+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t,
-+			   src_stride, src, 1);
-+
-+    if (width < 7)
-+	armv4_composite_src_8888_0565_asm (dst, src, width,
-+					   dst_stride, src_stride, height);
-+    else
-+	armv6_composite_src_8888_0565_asm (dst, src, width,
-+					   dst_stride, src_stride, height);
-+}
-+
- #endif
- 
- static const pixman_fast_path_t arm_simd_fast_paths[] =
-@@ -624,6 +900,10 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
- #if defined(__ARM_EABI__) && defined(__linux__)
-     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, armv6_composite_over_n_8_0565),
-     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, armv6_composite_over_n_8_0565),
-+    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, armv6_composite_src_8888_0565),
-+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, armv6_composite_src_8888_0565),
-+    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, armv6_composite_src_8888_0565),
-+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, armv6_composite_src_8888_0565),
- #endif
-     { PIXMAN_OP_NONE },
- };
---
-cgit v0.8.3-6-g21f6
diff --git a/recipes/xorg-lib/pixman/tls.patch b/recipes/xorg-lib/pixman/tls.patch
deleted file mode 100644
index 316caed65f..0000000000
--- a/recipes/xorg-lib/pixman/tls.patch
+++ /dev/null
@@ -1,59 +0,0 @@
-From 714559dccda3165a72f0a9935c1edc3aef535f30 Mon Sep 17 00:00:00 2001
-From: Søren Sandmann Pedersen <ssp@redhat.com>
-Date: Wed, 07 Apr 2010 05:44:12 +0000
-Subject: Fixes for pthread thread local storage.
-
-The tls_name_key variable is passed to tls_name_get(), and the first
-time this happens it isn't initialized. tls_name_get() then passes it
-on to tls_name_alloc() which passes it on to pthread_setspecific()
-leading to undefined behavior.
-
-None of this is actually necessary at all because there is only one
-such variable per thread local variable, so it doesn't need to passed
-as a parameter at all.
-
-All of this was pointed out by Tor Lillqvist on the cairo mailing
-list.
----
-diff --git a/pixman/pixman-compiler.h b/pixman/pixman-compiler.h
-index cdac0d8..531c8c9 100644
---- a/pixman/pixman-compiler.h
-+++ b/pixman/pixman-compiler.h
-@@ -99,16 +99,16 @@
-     }									\
- 									\
-     static type *							\
--    tls_ ## name ## _alloc (key)					\
-+    tls_ ## name ## _alloc (void)					\
-     {									\
- 	type *value = calloc (1, sizeof (type));			\
- 	if (value)							\
--	    pthread_setspecific (key, value);				\
-+	    pthread_setspecific (tls_ ## name ## _key, value);		\
- 	return value;							\
-     }									\
- 									\
-     static force_inline type *						\
--    tls_ ## name ## _get (key)						\
-+    tls_ ## name ## _get (void)						\
-     {									\
- 	type *value = NULL;						\
- 	if (pthread_once (&tls_ ## name ## _once_control,		\
-@@ -116,13 +116,13 @@
- 	{								\
- 	    value = pthread_getspecific (tls_ ## name ## _key);		\
- 	    if (!value)							\
--		value = tls_ ## name ## _alloc (key);			\
-+		value = tls_ ## name ## _alloc ();			\
- 	}								\
- 	return value;							\
-     }
- 
- #   define PIXMAN_GET_THREAD_LOCAL(name)				\
--    tls_ ## name ## _get (tls_ ## name ## _key)
-+    tls_ ## name ## _get ()
- 
- #else
- 
---
-cgit v0.8.3-6-g21f6