aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKoen Kooi <koen@openembedded.org>2011-04-05 13:00:12 +0200
committerSteffen Sledz <sledz@dresearch-fe.de>2011-04-29 14:08:33 +0200
commit384b270989d3f4218c6fc01f8a1e1a61b622c99a (patch)
tree9d0f469880414fad4e13ed93b17d129ef9967223
parentfadd40e8b90197786aa5edc4a6620036e3aa972e (diff)
downloadopenembedded-384b270989d3f4218c6fc01f8a1e1a61b622c99a.zip
openembedded-384b270989d3f4218c6fc01f8a1e1a61b622c99a.tar.gz
openembedded-384b270989d3f4218c6fc01f8a1e1a61b622c99a.tar.bz2
pixman: add 0.21.6 + fixes
Signed-off-by: Koen Kooi <koen@openembedded.org> Acked-by: Martin Jansa <Martin.Jansa@gmail.com>
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0002-Fix-compilation-on-Win32.patch42
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0003-test-Fix-tests-for-compilation-on-Windows.patch232
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0004-test-Add-Makefile-for-Win32.patch92
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0005-Do-not-include-unused-headers.patch40
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0006-test-Silence-MSVC-warnings.patch63
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch466
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch136
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch156
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch288
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch156
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch36
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch77
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch131
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch60
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch130
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch271
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch226
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch51
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch50
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch49
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch166
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0023-In-delegate_-src-dest-_iter_init-call-delegate-direc.patch54
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0024-Fill-out-parts-of-iters-in-_pixman_implementation_-s.patch111
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0025-Simplify-the-prototype-for-iterator-initializers.patch442
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0026-test-Randomize-some-tests-if-PIXMAN_RANDOMIZE_TESTS-.patch187
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0027-Add-simple-support-for-the-r8g8b8a8-and-r8g8b8x8-for.patch206
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0028-Add-support-for-the-r8g8b8a8-and-r8g8b8x8-formats-to.patch110
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0029-test-Fix-infinite-loop-in-composite.patch37
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0030-ARM-tweaked-horizontal-weights-update-in-NEON-biline.patch82
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0031-ARM-use-aligned-memory-writes-in-NEON-bilinear-scali.patch124
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0032-ARM-support-for-software-pipelining-in-bilinear-macr.patch70
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0033-ARM-use-less-ARM-instructions-in-NEON-bilinear-scali.patch168
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0034-ARM-support-different-levels-of-loop-unrolling-in-bi.patch156
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0035-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch166
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0036-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch283
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0037-Generic-C-implementation-of-pixman_blt-with-overlapp.patch114
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0038-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch91
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0039-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch91
-rw-r--r--recipes/xorg-lib/pixman-0.21.6/0040-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch94
-rw-r--r--recipes/xorg-lib/pixman_0.21.6.bb53
40 files changed, 5557 insertions, 0 deletions
diff --git a/recipes/xorg-lib/pixman-0.21.6/0002-Fix-compilation-on-Win32.patch b/recipes/xorg-lib/pixman-0.21.6/0002-Fix-compilation-on-Win32.patch
new file mode 100644
index 0000000..16b6ff1
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0002-Fix-compilation-on-Win32.patch
@@ -0,0 +1,42 @@
+From 20ed723a5a42fb8636bc9a5f32974dec1b66a785 Mon Sep 17 00:00:00 2001
+From: Andrea Canciani <ranma42@gmail.com>
+Date: Thu, 24 Feb 2011 10:44:04 +0100
+Subject: [PATCH 02/40] Fix compilation on Win32
+
+Makefile.win32 contained a typo and was missing the dependency from
+the built sources.
+---
+ pixman/Makefile.win32 | 6 ++++--
+ 1 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/pixman/Makefile.win32 b/pixman/Makefile.win32
+index 775fb5e..b5f9397 100644
+--- a/pixman/Makefile.win32
++++ b/pixman/Makefile.win32
+@@ -56,6 +56,8 @@ SOURCES = \
+ pixman-general.c \
+ $(NULL)
+
++BUILT_SOURCES = pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c
++
+ # MMX compilation flags
+ ifeq ($(MMX_VAR),on)
+ CFLAGS += $(MMX_CFLAGS)
+@@ -122,7 +124,7 @@ endif
+ endif
+
+ # pixman compilation and linking
+-$(CFG_VAR)/%.obj: %.c
++$(CFG_VAR)/%.obj: %.c $(BUILT_SOURCES)
+ @mkdir -p $(CFG_VAR)
+ @$(CC) -c $(CFLAGS) -Fo"$@" $<
+
+@@ -141,4 +143,4 @@ pixman-combine64.h: pixman-combine.h.template make-combine.pl
+
+ clean_r:
+ @rm -f $(CFG_VAR)/*.obj $(CFG_VAR)/*.lib $(CFG_VAR)/*.pdb $(CFG)/*.ilk || exit 0
+- @rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk pixman-combine32.c pixman-combine64.c pixman-combine64.c pixman-combine64.h || exit 0
++ @rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk $(BUILT_SOURCES) || exit 0
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0003-test-Fix-tests-for-compilation-on-Windows.patch b/recipes/xorg-lib/pixman-0.21.6/0003-test-Fix-tests-for-compilation-on-Windows.patch
new file mode 100644
index 0000000..33351a9
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0003-test-Fix-tests-for-compilation-on-Windows.patch
@@ -0,0 +1,232 @@
+From 11305b4ecdd36a17592c5c75de9157874853ab20 Mon Sep 17 00:00:00 2001
+From: Andrea Canciani <ranma42@gmail.com>
+Date: Tue, 22 Feb 2011 21:46:37 +0100
+Subject: [PATCH 03/40] test: Fix tests for compilation on Windows
+
+The Microsoft C compiler cannot handle subobject initialization and
+Win32 does not provide snprintf.
+
+Work around these limitations by using normal struct initialization
+and using sprintf (a manual check shows that the buffer size is
+sufficient).
+---
+ test/composite.c | 29 +++++++++++++--------------
+ test/fetch-test.c | 52 ++++++++++++++++++++++----------------------------
+ test/trap-crasher.c | 20 +++++++++---------
+ 3 files changed, 47 insertions(+), 54 deletions(-)
+
+diff --git a/test/composite.c b/test/composite.c
+index e14f954..08c6689 100644
+--- a/test/composite.c
++++ b/test/composite.c
+@@ -617,18 +617,18 @@ eval_diff (color_t *expected, color_t *test, pixman_format_code_t format)
+ }
+
+ static char *
+-describe_image (image_t *info, char *buf, int buflen)
++describe_image (image_t *info, char *buf)
+ {
+ if (info->size)
+ {
+- snprintf (buf, buflen, "%s %dx%d%s",
+- info->format->name,
+- info->size, info->size,
+- info->repeat ? "R" :"");
++ sprintf (buf, "%s %dx%d%s",
++ info->format->name,
++ info->size, info->size,
++ info->repeat ? "R" :"");
+ }
+ else
+ {
+- snprintf (buf, buflen, "solid");
++ sprintf (buf, "solid");
+ }
+
+ return buf;
+@@ -710,10 +710,9 @@ composite_test (image_t *dst,
+ {
+ char buf[40];
+
+- snprintf (buf, sizeof (buf),
+- "%s %scomposite",
+- op->name,
+- component_alpha ? "CA " : "");
++ sprintf (buf, "%s %scomposite",
++ op->name,
++ component_alpha ? "CA " : "");
+
+ printf ("%s test error of %.4f --\n"
+ " R G B A\n"
+@@ -735,9 +734,9 @@ composite_test (image_t *dst,
+ mask->color->b, mask->color->a,
+ dst->color->r, dst->color->g,
+ dst->color->b, dst->color->a);
+- printf ("src: %s, ", describe_image (src, buf, sizeof (buf)));
+- printf ("mask: %s, ", describe_image (mask, buf, sizeof (buf)));
+- printf ("dst: %s\n\n", describe_image (dst, buf, sizeof (buf)));
++ printf ("src: %s, ", describe_image (src, buf));
++ printf ("mask: %s, ", describe_image (mask, buf));
++ printf ("dst: %s\n\n", describe_image (dst, buf));
+ }
+ else
+ {
+@@ -747,8 +746,8 @@ composite_test (image_t *dst,
+ src->color->b, src->color->a,
+ dst->color->r, dst->color->g,
+ dst->color->b, dst->color->a);
+- printf ("src: %s, ", describe_image (src, buf, sizeof (buf)));
+- printf ("dst: %s\n\n", describe_image (dst, buf, sizeof (buf)));
++ printf ("src: %s, ", describe_image (src, buf));
++ printf ("dst: %s\n\n", describe_image (dst, buf));
+ }
+
+ success = FALSE;
+diff --git a/test/fetch-test.c b/test/fetch-test.c
+index 2ca16dd..314a072 100644
+--- a/test/fetch-test.c
++++ b/test/fetch-test.c
+@@ -8,7 +8,7 @@
+
+ static pixman_indexed_t mono_palette =
+ {
+- .rgba = { 0x00000000, 0x00ffffff },
++ 0, { 0x00000000, 0x00ffffff },
+ };
+
+
+@@ -24,57 +24,53 @@ typedef struct {
+ static testcase_t testcases[] =
+ {
+ {
+- .format = PIXMAN_a8r8g8b8,
+- .width = 2, .height = 2,
+- .stride = 8,
+- .src = { 0x00112233, 0x44556677,
+- 0x8899aabb, 0xccddeeff },
+- .dst = { 0x00112233, 0x44556677,
+- 0x8899aabb, 0xccddeeff },
+- .indexed = NULL,
++ PIXMAN_a8r8g8b8,
++ 2, 2,
++ 8,
++ { 0x00112233, 0x44556677,
++ 0x8899aabb, 0xccddeeff },
++ { 0x00112233, 0x44556677,
++ 0x8899aabb, 0xccddeeff },
++ NULL,
+ },
+ {
+- .format = PIXMAN_g1,
+- .width = 8, .height = 2,
+- .stride = 4,
++ PIXMAN_g1,
++ 8, 2,
++ 4,
+ #ifdef WORDS_BIGENDIAN
+- .src =
+ {
+ 0xaa000000,
+ 0x55000000
+ },
+ #else
+- .src =
+ {
+ 0x00000055,
+ 0x000000aa
+ },
+ #endif
+- .dst =
+ {
+ 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000,
+ 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff
+ },
+- .indexed = &mono_palette,
++ &mono_palette,
+ },
+ #if 0
+ {
+- .format = PIXMAN_g8,
+- .width = 4, .height = 2,
+- .stride = 4,
+- .src = { 0x01234567,
+- 0x89abcdef },
+- .dst = { 0x00010101, 0x00232323, 0x00454545, 0x00676767,
+- 0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, },
++ PIXMAN_g8,
++ 4, 2,
++ 4,
++ { 0x01234567,
++ 0x89abcdef },
++ { 0x00010101, 0x00232323, 0x00454545, 0x00676767,
++ 0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, },
+ },
+ #endif
+ /* FIXME: make this work on big endian */
+ {
+- .format = PIXMAN_yv12,
+- .width = 8, .height = 2,
+- .stride = 8,
++ PIXMAN_yv12,
++ 8, 2,
++ 8,
+ #ifdef WORDS_BIGENDIAN
+- .src =
+ {
+ 0x00ff00ff, 0x00ff00ff,
+ 0xff00ff00, 0xff00ff00,
+@@ -82,7 +78,6 @@ static testcase_t testcases[] =
+ 0x800080ff
+ },
+ #else
+- .src =
+ {
+ 0xff00ff00, 0xff00ff00,
+ 0x00ff00ff, 0x00ff00ff,
+@@ -90,7 +85,6 @@ static testcase_t testcases[] =
+ 0xff800080
+ },
+ #endif
+- .dst =
+ {
+ 0xff000000, 0xffffffff, 0xffb80000, 0xffffe113,
+ 0xff000000, 0xffffffff, 0xff0023ee, 0xff4affff,
+diff --git a/test/trap-crasher.c b/test/trap-crasher.c
+index 42b82f6..7485e62 100644
+--- a/test/trap-crasher.c
++++ b/test/trap-crasher.c
+@@ -7,21 +7,21 @@ main()
+ pixman_image_t *dst;
+ pixman_trapezoid_t traps[1] = {
+ {
+- .top = 2147483646,
+- .bottom = 2147483647,
+- .left = {
+- .p1 = { .x = 0, .y = 0 },
+- .p2 = { .x = 0, .y = 2147483647 }
++ 2147483646,
++ 2147483647,
++ {
++ { 0, 0 },
++ { 0, 2147483647 }
+ },
+- .right = {
+- .p1 = { .x = 65536, .y = 0 },
+- .p2 = { .x = 0, .y = 2147483647 }
++ {
++ { 65536, 0 },
++ { 0, 2147483647 }
+ }
+ },
+ };
+-
++
+ dst = pixman_image_create_bits (PIXMAN_a8, 1, 1, NULL, -1);
+-
++
+ pixman_add_trapezoids (dst, 0, 0, sizeof (traps)/sizeof (traps[0]), traps);
+ return (0);
+ }
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0004-test-Add-Makefile-for-Win32.patch b/recipes/xorg-lib/pixman-0.21.6/0004-test-Add-Makefile-for-Win32.patch
new file mode 100644
index 0000000..94ed0b4
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0004-test-Add-Makefile-for-Win32.patch
@@ -0,0 +1,92 @@
+From 72f5e5f608506c18c484bc5bc3e58bd83aeb7691 Mon Sep 17 00:00:00 2001
+From: Andrea Canciani <ranma42@gmail.com>
+Date: Tue, 22 Feb 2011 22:04:49 +0100
+Subject: [PATCH 04/40] test: Add Makefile for Win32
+
+---
+ test/Makefile.win32 | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 73 insertions(+), 0 deletions(-)
+ create mode 100644 test/Makefile.win32
+
+diff --git a/test/Makefile.win32 b/test/Makefile.win32
+new file mode 100644
+index 0000000..c71afe1
+--- /dev/null
++++ b/test/Makefile.win32
+@@ -0,0 +1,73 @@
++CC = cl
++LINK = link
++
++CFG_VAR = $(CFG)
++ifeq ($(CFG_VAR),)
++CFG_VAR=release
++endif
++
++CFLAGS = -MD -nologo -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE -D_BIND_TO_CURRENT_VCLIBS_VERSION -D_MT -I../pixman -I. -I../
++TEST_LDADD = ../pixman/$(CFG_VAR)/pixman-1.lib
++INCLUDES = -I../pixman -I$(top_builddir)/pixman
++
++# optimization flags
++ifeq ($(CFG_VAR),debug)
++CFLAGS += -Od -Zi
++else
++CFLAGS += -O2
++endif
++
++SOURCES = \
++ a1-trap-test.c \
++ pdf-op-test.c \
++ region-test.c \
++ region-translate-test.c \
++ fetch-test.c \
++ oob-test.c \
++ trap-crasher.c \
++ alpha-loop.c \
++ scaling-crash-test.c \
++ gradient-crash-test.c \
++ alphamap.c \
++ stress-test.c \
++ composite-traps-test.c \
++ blitters-test.c \
++ scaling-test.c \
++ affine-test.c \
++ composite.c \
++ utils.c
++
++TESTS = \
++ $(CFG_VAR)/a1-trap-test.exe \
++ $(CFG_VAR)/pdf-op-test.exe \
++ $(CFG_VAR)/region-test.exe \
++ $(CFG_VAR)/region-translate-test.exe \
++ $(CFG_VAR)/fetch-test.exe \
++ $(CFG_VAR)/oob-test.exe \
++ $(CFG_VAR)/trap-crasher.exe \
++ $(CFG_VAR)/alpha-loop.exe \
++ $(CFG_VAR)/scaling-crash-test.exe \
++ $(CFG_VAR)/gradient-crash-test.exe \
++ $(CFG_VAR)/alphamap.exe \
++ $(CFG_VAR)/stress-test.exe \
++ $(CFG_VAR)/composite-traps-test.exe \
++ $(CFG_VAR)/blitters-test.exe \
++ $(CFG_VAR)/scaling-test.exe \
++ $(CFG_VAR)/affine-test.exe \
++ $(CFG_VAR)/composite.exe
++
++
++OBJECTS = $(patsubst %.c, $(CFG_VAR)/%.obj, $(SOURCES))
++
++$(CFG_VAR)/%.obj: %.c
++ @mkdir -p $(CFG_VAR)
++ @$(CC) -c $(CFLAGS) -Fo"$@" $<
++
++$(CFG_VAR)/%.exe: $(CFG_VAR)/%.obj
++ $(LINK) /NOLOGO /OUT:$@ $< $(CFG_VAR)/utils.obj $(TEST_LDADD)
++
++all: $(OBJECTS) $(TESTS)
++ @exit 0
++
++clean:
++ @rm -f $(CFG_VAR)/*.obj $(CFG_VAR)/*.pdb || exit 0
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0005-Do-not-include-unused-headers.patch b/recipes/xorg-lib/pixman-0.21.6/0005-Do-not-include-unused-headers.patch
new file mode 100644
index 0000000..60f9528
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0005-Do-not-include-unused-headers.patch
@@ -0,0 +1,40 @@
+From 8868778ea1fdc8e70da76b3b00ea78106c5840d8 Mon Sep 17 00:00:00 2001
+From: Andrea Canciani <ranma42@gmail.com>
+Date: Tue, 22 Feb 2011 22:43:48 +0100
+Subject: [PATCH 05/40] Do not include unused headers
+
+pixman-combine32.h is included without being used both in
+pixman-image.c and in pixman-general.c.
+---
+ pixman/pixman-general.c | 2 --
+ pixman/pixman-image.c | 1 -
+ 2 files changed, 0 insertions(+), 3 deletions(-)
+
+diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
+index 16ea3a4..872fb7e 100644
+--- a/pixman/pixman-general.c
++++ b/pixman/pixman-general.c
+@@ -36,8 +36,6 @@
+ #include <stdlib.h>
+ #include <string.h>
+ #include "pixman-private.h"
+-#include "pixman-combine32.h"
+-#include "pixman-private.h"
+
+ static void
+ general_src_iter_init (pixman_implementation_t *imp,
+diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
+index 9103ca6..84bacf8 100644
+--- a/pixman/pixman-image.c
++++ b/pixman/pixman-image.c
+@@ -30,7 +30,6 @@
+ #include <assert.h>
+
+ #include "pixman-private.h"
+-#include "pixman-combine32.h"
+
+ pixman_bool_t
+ _pixman_init_gradient (gradient_t * gradient,
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0006-test-Silence-MSVC-warnings.patch b/recipes/xorg-lib/pixman-0.21.6/0006-test-Silence-MSVC-warnings.patch
new file mode 100644
index 0000000..80d7943
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0006-test-Silence-MSVC-warnings.patch
@@ -0,0 +1,63 @@
+From 9ebde285fa990bfa1524f166fbfb1368c346b14a Mon Sep 17 00:00:00 2001
+From: Andrea Canciani <ranma42@gmail.com>
+Date: Thu, 24 Feb 2011 12:53:39 +0100
+Subject: [PATCH 06/40] test: Silence MSVC warnings
+
+MSVC does not notice non-returning functions (abort() / assert(0))
+and warns about paths which end with them in non-void functions:
+
+c:\cygwin\home\ranma42\code\fdo\pixman\test\fetch-test.c(114) :
+warning C4715: 'reader' : not all control paths return a value
+c:\cygwin\home\ranma42\code\fdo\pixman\test\stress-test.c(133) :
+warning C4715: 'real_reader' : not all control paths return a value
+c:\cygwin\home\ranma42\code\fdo\pixman\test\composite.c(431) :
+warning C4715: 'calc_op' : not all control paths return a value
+
+These warnings can be silenced by adding a return after the
+termination call.
+---
+ test/composite.c | 1 +
+ test/fetch-test.c | 1 +
+ test/stress-test.c | 2 +-
+ 3 files changed, 3 insertions(+), 1 deletions(-)
+
+diff --git a/test/composite.c b/test/composite.c
+index 08c6689..a86e5ed 100644
+--- a/test/composite.c
++++ b/test/composite.c
+@@ -426,6 +426,7 @@ calc_op (pixman_op_t op, double src, double dst, double srca, double dsta)
+ case PIXMAN_OP_HSL_LUMINOSITY:
+ default:
+ abort();
++ return 0; /* silence MSVC */
+ }
+ #undef mult_chan
+ }
+diff --git a/test/fetch-test.c b/test/fetch-test.c
+index 314a072..60bc765 100644
+--- a/test/fetch-test.c
++++ b/test/fetch-test.c
+@@ -110,6 +110,7 @@ reader (const void *src, int size)
+ return *(uint32_t *)src;
+ default:
+ assert(0);
++ return 0; /* silence MSVC */
+ }
+ }
+
+diff --git a/test/stress-test.c b/test/stress-test.c
+index bcbc1f8..166dc6d 100644
+--- a/test/stress-test.c
++++ b/test/stress-test.c
+@@ -128,7 +128,7 @@ real_reader (const void *src, int size)
+ return *(uint32_t *)src;
+ default:
+ assert (0);
+- break;
++ return 0; /* silence MSVC */
+ }
+ }
+
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch b/recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch
new file mode 100644
index 0000000..c5dab5c
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch
@@ -0,0 +1,466 @@
+From d506bf68fd0e9a1c5dd484daee70631699918387 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 21 Feb 2011 01:29:02 +0200
+Subject: [PATCH 07/40] Main loop template for fast single pass bilinear scaling
+
+Can be used for implementing SIMD optimized fast path
+functions which work with bilinear scaled source images.
+
+Similar to the template for nearest scaling main loop, the
+following types of mask are supported:
+1. no mask
+2. non-scaled a8 mask with SAMPLES_COVER_CLIP flag
+3. solid mask
+
+PAD repeat is fully supported. NONE repeat is partially
+supported (right now only works if source image has alpha
+channel or when alpha channel of the source image does not
+have any effect on the compositing operation).
+---
+ pixman/pixman-fast-path.h | 432 +++++++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 432 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h
+index d081222..1885d47 100644
+--- a/pixman/pixman-fast-path.h
++++ b/pixman/pixman-fast-path.h
+@@ -587,4 +587,436 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
+
++/*****************************************************************************/
++
++/*
++ * Identify 5 zones in each scanline for bilinear scaling. Depending on
++ * whether 2 pixels to be interpolated are fetched from the image itself,
++ * from the padding area around it or from both image and padding area.
++ */
++static force_inline void
++bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
++ pixman_fixed_t vx,
++ pixman_fixed_t unit_x,
++ int32_t * left_pad,
++ int32_t * left_tz,
++ int32_t * width,
++ int32_t * right_tz,
++ int32_t * right_pad)
++{
++ int width1 = *width, left_pad1, right_pad1;
++ int width2 = *width, left_pad2, right_pad2;
++
++ pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
++ &width1, &left_pad1, &right_pad1);
++ pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
++ unit_x, &width2, &left_pad2, &right_pad2);
++
++ *left_pad = left_pad2;
++ *left_tz = left_pad1 - left_pad2;
++ *right_tz = right_pad2 - right_pad1;
++ *right_pad = right_pad1;
++ *width -= *left_pad + *left_tz + *right_tz + *right_pad;
++}
++
++/*
++ * Main loop template for single pass bilinear scaling. It needs to be
++ * provided with 'scanline_func' which should do the compositing operation.
++ * The needed function has the following prototype:
++ *
++ * scanline_func (dst_type_t * dst,
++ * const mask_type_ * mask,
++ * const src_type_t * src_top,
++ * const src_type_t * src_bottom,
++ * int32_t width,
++ * int weight_top,
++ * int weight_bottom,
++ * pixman_fixed_t vx,
++ * pixman_fixed_t unit_x,
++ * pixman_fixed_t max_vx,
++ * pixman_bool_t zero_src)
++ *
++ * Where:
++ * dst - destination scanline buffer for storing results
++ * mask - mask buffer (or single value for solid mask)
++ * src_top, src_bottom - two source scanlines
++ * width - number of pixels to process
++ * weight_top - weight of the top row for interpolation
++ * weight_bottom - weight of the bottom row for interpolation
++ * vx - initial position for fetching the first pair of
++ * pixels from the source buffer
++ * unit_x - position increment needed to move to the next pair
++ * of pixels
++ * max_vx - image size as a fixed point value, can be used for
++ * implementing NORMAL repeat (when it is supported)
++ * zero_src - boolean hint variable, which is set to TRUE when
++ * all source pixels are fetched from zero padding
++ * zone for NONE repeat
++ *
++ * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256,
++ * but sometimes it may be less than that for NONE repeat when handling
++ * fuzzy antialiased top or bottom image edges. Also both top and
++ * bottom weight variables are guaranteed to have value in 0-255
++ * range and can fit into unsigned byte or be used with 8-bit SIMD
++ * multiplication instructions.
++ */
++#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
++ dst_type_t, repeat_mode, have_mask, mask_is_solid) \
++static void \
++fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
++ pixman_op_t op, \
++ pixman_image_t * src_image, \
++ pixman_image_t * mask_image, \
++ pixman_image_t * dst_image, \
++ int32_t src_x, \
++ int32_t src_y, \
++ int32_t mask_x, \
++ int32_t mask_y, \
++ int32_t dst_x, \
++ int32_t dst_y, \
++ int32_t width, \
++ int32_t height) \
++{ \
++ dst_type_t *dst_line; \
++ mask_type_t *mask_line; \
++ src_type_t *src_first_line; \
++ int y1, y2; \
++ pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
++ pixman_vector_t v; \
++ pixman_fixed_t vx, vy; \
++ pixman_fixed_t unit_x, unit_y; \
++ int32_t left_pad, left_tz, right_tz, right_pad; \
++ \
++ dst_type_t *dst; \
++ mask_type_t solid_mask; \
++ const mask_type_t *mask = &solid_mask; \
++ int src_stride, mask_stride, dst_stride; \
++ \
++ PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
++ if (have_mask) \
++ { \
++ if (mask_is_solid) \
++ { \
++ solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \
++ mask_stride = 0; \
++ } \
++ else \
++ { \
++ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
++ mask_stride, mask_line, 1); \
++ } \
++ } \
++ /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
++ * transformed from destination space to source space */ \
++ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
++ \
++ /* reference point is the center of the pixel */ \
++ v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
++ v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
++ v.vector[2] = pixman_fixed_1; \
++ \
++ if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
++ return; \
++ \
++ unit_x = src_image->common.transform->matrix[0][0]; \
++ unit_y = src_image->common.transform->matrix[1][1]; \
++ \
++ v.vector[0] -= pixman_fixed_1 / 2; \
++ v.vector[1] -= pixman_fixed_1 / 2; \
++ \
++ vy = v.vector[1]; \
++ \
++ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
++ PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
++ { \
++ bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \
++ &left_pad, &left_tz, &width, &right_tz, &right_pad); \
++ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
++ { \
++ /* PAD repeat does not need special handling for 'transition zones' and */ \
++ /* they can be combined with 'padding zones' safely */ \
++ left_pad += left_tz; \
++ right_pad += right_tz; \
++ left_tz = right_tz = 0; \
++ } \
++ v.vector[0] += left_pad * unit_x; \
++ } \
++ \
++ while (--height >= 0) \
++ { \
++ int weight1, weight2; \
++ dst = dst_line; \
++ dst_line += dst_stride; \
++ vx = v.vector[0]; \
++ if (have_mask && !mask_is_solid) \
++ { \
++ mask = mask_line; \
++ mask_line += mask_stride; \
++ } \
++ \
++ y1 = pixman_fixed_to_int (vy); \
++ weight2 = (vy >> 8) & 0xff; \
++ if (weight2) \
++ { \
++ /* normal case, both row weights are in 0-255 range and fit unsigned byte */ \
++ y2 = y1 + 1; \
++ weight1 = 256 - weight2; \
++ } \
++ else \
++ { \
++ /* set both top and bottom row to the same scanline, and weights to 128+128 */ \
++ y2 = y1; \
++ weight1 = weight2 = 128; \
++ } \
++ vy += unit_y; \
++ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
++ { \
++ src_type_t *src1, *src2; \
++ src_type_t buf1[2]; \
++ src_type_t buf2[2]; \
++ repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \
++ repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \
++ src1 = src_first_line + src_stride * y1; \
++ src2 = src_first_line + src_stride * y2; \
++ \
++ if (left_pad > 0) \
++ { \
++ buf1[0] = buf1[1] = src1[0]; \
++ buf2[0] = buf2[1] = src2[0]; \
++ scanline_func (dst, mask, \
++ buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \
++ dst += left_pad; \
++ if (have_mask && !mask_is_solid) \
++ mask += left_pad; \
++ } \
++ if (width > 0) \
++ { \
++ scanline_func (dst, mask, \
++ src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
++ dst += width; \
++ if (have_mask && !mask_is_solid) \
++ mask += width; \
++ } \
++ if (right_pad > 0) \
++ { \
++ buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
++ buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
++ scanline_func (dst, mask, \
++ buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \
++ } \
++ } \
++ else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
++ { \
++ src_type_t *src1, *src2; \
++ src_type_t buf1[2]; \
++ src_type_t buf2[2]; \
++ /* handle top/bottom zero padding by just setting weights to 0 if needed */ \
++ if (y1 < 0) \
++ { \
++ weight1 = 0; \
++ y1 = 0; \
++ } \
++ if (y1 >= src_image->bits.height) \
++ { \
++ weight1 = 0; \
++ y1 = src_image->bits.height - 1; \
++ } \
++ if (y2 < 0) \
++ { \
++ weight2 = 0; \
++ y2 = 0; \
++ } \
++ if (y2 >= src_image->bits.height) \
++ { \
++ weight2 = 0; \
++ y2 = src_image->bits.height - 1; \
++ } \
++ src1 = src_first_line + src_stride * y1; \
++ src2 = src_first_line + src_stride * y2; \
++ \
++ if (left_pad > 0) \
++ { \
++ buf1[0] = buf1[1] = 0; \
++ buf2[0] = buf2[1] = 0; \
++ scanline_func (dst, mask, \
++ buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \
++ dst += left_pad; \
++ if (have_mask && !mask_is_solid) \
++ mask += left_pad; \
++ } \
++ if (left_tz > 0) \
++ { \
++ buf1[0] = 0; \
++ buf1[1] = src1[0]; \
++ buf2[0] = 0; \
++ buf2[1] = src2[0]; \
++ scanline_func (dst, mask, \
++ buf1, buf2, left_tz, weight1, weight2, \
++ pixman_fixed_frac (vx), unit_x, 0, FALSE); \
++ dst += left_tz; \
++ if (have_mask && !mask_is_solid) \
++ mask += left_tz; \
++ vx += left_tz * unit_x; \
++ } \
++ if (width > 0) \
++ { \
++ scanline_func (dst, mask, \
++ src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
++ dst += width; \
++ if (have_mask && !mask_is_solid) \
++ mask += width; \
++ vx += width * unit_x; \
++ } \
++ if (right_tz > 0) \
++ { \
++ buf1[0] = src1[src_image->bits.width - 1]; \
++ buf1[1] = 0; \
++ buf2[0] = src2[src_image->bits.width - 1]; \
++ buf2[1] = 0; \
++ scanline_func (dst, mask, \
++ buf1, buf2, right_tz, weight1, weight2, \
++ pixman_fixed_frac (vx), unit_x, 0, FALSE); \
++ dst += right_tz; \
++ if (have_mask && !mask_is_solid) \
++ mask += right_tz; \
++ } \
++ if (right_pad > 0) \
++ { \
++ buf1[0] = buf1[1] = 0; \
++ buf2[0] = buf2[1] = 0; \
++ scanline_func (dst, mask, \
++ buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \
++ } \
++ } \
++ else \
++ { \
++ scanline_func (dst, mask, src_first_line + src_stride * y1, \
++ src_first_line + src_stride * y2, width, \
++ weight1, weight2, vx, unit_x, max_vx, FALSE); \
++ } \
++ } \
++}
++
++/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
++#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
++ dst_type_t, repeat_mode, have_mask, mask_is_solid) \
++ FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
++ dst_type_t, repeat_mode, have_mask, mask_is_solid)
++
++#define SCALED_BILINEAR_FLAGS \
++ (FAST_PATH_SCALE_TRANSFORM | \
++ FAST_PATH_NO_ALPHA_MAP | \
++ FAST_PATH_BILINEAR_FILTER | \
++ FAST_PATH_NO_ACCESSORS | \
++ FAST_PATH_NARROW_FORMAT)
++
++#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ (SCALED_BILINEAR_FLAGS | \
++ FAST_PATH_PAD_REPEAT | \
++ FAST_PATH_X_UNIT_POSITIVE), \
++ PIXMAN_null, 0, \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ (SCALED_BILINEAR_FLAGS | \
++ FAST_PATH_NONE_REPEAT | \
++ FAST_PATH_X_UNIT_POSITIVE), \
++ PIXMAN_null, 0, \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
++ PIXMAN_null, 0, \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ (SCALED_BILINEAR_FLAGS | \
++ FAST_PATH_PAD_REPEAT | \
++ FAST_PATH_X_UNIT_POSITIVE), \
++ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ (SCALED_BILINEAR_FLAGS | \
++ FAST_PATH_NONE_REPEAT | \
++ FAST_PATH_X_UNIT_POSITIVE), \
++ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
++ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ (SCALED_BILINEAR_FLAGS | \
++ FAST_PATH_PAD_REPEAT | \
++ FAST_PATH_X_UNIT_POSITIVE), \
++ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ (SCALED_BILINEAR_FLAGS | \
++ FAST_PATH_NONE_REPEAT | \
++ FAST_PATH_X_UNIT_POSITIVE), \
++ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
++ }
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
++ { PIXMAN_OP_ ## op, \
++ PIXMAN_ ## s, \
++ SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
++ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
++ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
++ fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
++ }
++
++/* Prefer the use of 'cover' variant, because it is faster */
++#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \
++ SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \
++ SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \
++ SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func)
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \
++ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
++ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
++ SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func)
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \
++ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
++ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
++ SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
++
+ #endif
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch b/recipes/xorg-lib/pixman-0.21.6/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch
new file mode 100644
index 0000000..18dfcaa
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch
@@ -0,0 +1,136 @@
+From 0df43b8ae5031dd83775d00b57b6bed809db0e89 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 21 Feb 2011 02:07:09 +0200
+Subject: [PATCH 08/40] test: check correctness of 'bilinear_pad_repeat_get_scanline_bounds'
+
+Individual correctness check for the new bilinear scaling related
+supplementary function. This test program uses a bit wider range
+of input arguments, not covered by other tests.
+---
+ test/Makefile.am | 2 +
+ test/scaling-helpers-test.c | 93 +++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 95 insertions(+), 0 deletions(-)
+ create mode 100644 test/scaling-helpers-test.c
+
+diff --git a/test/Makefile.am b/test/Makefile.am
+index 057e9ce..9dc7219 100644
+--- a/test/Makefile.am
++++ b/test/Makefile.am
+@@ -13,6 +13,7 @@ TESTPROGRAMS = \
+ trap-crasher \
+ alpha-loop \
+ scaling-crash-test \
++ scaling-helpers-test \
+ gradient-crash-test \
+ alphamap \
+ stress-test \
+@@ -33,6 +34,7 @@ alpha_loop_SOURCES = alpha-loop.c utils.c utils.h
+ composite_SOURCES = composite.c utils.c utils.h
+ gradient_crash_test_SOURCES = gradient-crash-test.c utils.c utils.h
+ stress_test_SOURCES = stress-test.c utils.c utils.h
++scaling_helpers_test_SOURCES = scaling-helpers-test.c utils.c utils.h
+
+ # Benchmarks
+
+diff --git a/test/scaling-helpers-test.c b/test/scaling-helpers-test.c
+new file mode 100644
+index 0000000..c186138
+--- /dev/null
++++ b/test/scaling-helpers-test.c
+@@ -0,0 +1,93 @@
++#include <config.h>
++#include <stdint.h>
++#include <stdlib.h>
++#include <stdio.h>
++#include <assert.h>
++#include "utils.h"
++#include "pixman-fast-path.h"
++
++/* A trivial reference implementation for
++ * 'bilinear_pad_repeat_get_scanline_bounds'
++ */
++static void
++bilinear_pad_repeat_get_scanline_bounds_ref (int32_t source_image_width,
++ pixman_fixed_t vx_,
++ pixman_fixed_t unit_x,
++ int32_t * left_pad,
++ int32_t * left_tz,
++ int32_t * width,
++ int32_t * right_tz,
++ int32_t * right_pad)
++{
++ int w = *width;
++ *left_pad = 0;
++ *left_tz = 0;
++ *width = 0;
++ *right_tz = 0;
++ *right_pad = 0;
++ int64_t vx = vx_;
++ while (--w >= 0)
++ {
++ if (vx < 0)
++ {
++ if (vx + pixman_fixed_1 < 0)
++ *left_pad += 1;
++ else
++ *left_tz += 1;
++ }
++ else if (vx + pixman_fixed_1 >= pixman_int_to_fixed (source_image_width))
++ {
++ if (vx >= pixman_int_to_fixed (source_image_width))
++ *right_pad += 1;
++ else
++ *right_tz += 1;
++ }
++ else
++ {
++ *width += 1;
++ }
++ vx += unit_x;
++ }
++}
++
++int
++main (void)
++{
++ int i;
++ for (i = 0; i < 10000; i++)
++ {
++ int32_t left_pad1, left_tz1, width1, right_tz1, right_pad1;
++ int32_t left_pad2, left_tz2, width2, right_tz2, right_pad2;
++ pixman_fixed_t vx = lcg_rand_N(10000 << 16) - (3000 << 16);
++ int32_t width = lcg_rand_N(10000);
++ int32_t source_image_width = lcg_rand_N(10000) + 1;
++ pixman_fixed_t unit_x = lcg_rand_N(10 << 16) + 1;
++ width1 = width2 = width;
++
++ bilinear_pad_repeat_get_scanline_bounds_ref (source_image_width,
++ vx,
++ unit_x,
++ &left_pad1,
++ &left_tz1,
++ &width1,
++ &right_tz1,
++ &right_pad1);
++
++ bilinear_pad_repeat_get_scanline_bounds (source_image_width,
++ vx,
++ unit_x,
++ &left_pad2,
++ &left_tz2,
++ &width2,
++ &right_tz2,
++ &right_pad2);
++
++ assert (left_pad1 == left_pad2);
++ assert (left_tz1 == left_tz2);
++ assert (width1 == width2);
++ assert (right_tz1 == right_tz2);
++ assert (right_pad1 == right_pad2);
++ }
++
++ return 0;
++}
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch b/recipes/xorg-lib/pixman-0.21.6/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch
new file mode 100644
index 0000000..b85f781
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch
@@ -0,0 +1,156 @@
+From 350029396d911941591149cc82b5e68a78ad6747 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 21 Feb 2011 20:18:02 +0200
+Subject: [PATCH 09/40] SSE2 optimization for bilinear scaled 'src_8888_8888'
+
+A primitive naive implementation of bilinear scaling using SSE2 intrinsics,
+which only handles one pixel at a time. It is approximately 2x faster than
+pixman general compositing path. Single pass processing without intermediate
+temporary buffer contributes to ~15% and loop unrolling contributes to ~20%
+of this speedup.
+
+Benchmark on Intel Core i7 (x86-64):
+ Using cairo-perf-trace:
+ before: image firefox-planet-gnome 12.566 12.610 0.23% 6/6
+ after: image firefox-planet-gnome 10.961 11.013 0.19% 5/6
+
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+ before: op=1, src=20028888, dst=20028888, speed=70.48 MPix/s
+ after: op=1, src=20028888, dst=20028888, speed=165.38 MPix/s
+---
+ pixman/pixman-sse2.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 112 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
+index 88287b4..696005f 100644
+--- a/pixman/pixman-sse2.c
++++ b/pixman/pixman-sse2.c
+@@ -5567,6 +5567,114 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
+ scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
+
++static void
++bilinear_interpolate_line_sse2 (uint32_t * out,
++ const uint32_t * top,
++ const uint32_t * bottom,
++ int wt,
++ int wb,
++ pixman_fixed_t x,
++ pixman_fixed_t ux,
++ int width)
++{
++ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);
++ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);
++ const __m128i xmm_xorc = _mm_set_epi16 (0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff);
++ const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1);
++ const __m128i xmm_ux = _mm_set_epi16 (ux, ux, ux, ux, ux, ux, ux, ux);
++ const __m128i xmm_zero = _mm_setzero_si128 ();
++ __m128i xmm_x = _mm_set_epi16 (x, x, x, x, x, x, x, x);
++ uint32_t pix1, pix2, pix3, pix4;
++
++ #define INTERPOLATE_ONE_PIXEL(pix) \
++ do { \
++ __m128i xmm_wh, xmm_lo, xmm_hi, a; \
++ /* fetch 2x2 pixel block into sse2 register */ \
++ uint32_t tl = top [pixman_fixed_to_int (x)]; \
++ uint32_t tr = top [pixman_fixed_to_int (x) + 1]; \
++ uint32_t bl = bottom [pixman_fixed_to_int (x)]; \
++ uint32_t br = bottom [pixman_fixed_to_int (x) + 1]; \
++ a = _mm_set_epi32 (tr, tl, br, bl); \
++ x += ux; \
++ /* vertical interpolation */ \
++ a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero), \
++ xmm_wt), \
++ _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero), \
++ xmm_wb)); \
++ /* calculate horizontal weights */ \
++ xmm_wh = _mm_add_epi16 (xmm_addc, \
++ _mm_xor_si128 (xmm_xorc, \
++ _mm_srli_epi16 (xmm_x, 8))); \
++ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
++ /* horizontal interpolation */ \
++ xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \
++ xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \
++ a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \
++ _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \
++ /* shift and pack the result */ \
++ a = _mm_srli_epi32 (a, 16); \
++ a = _mm_packs_epi32 (a, a); \
++ a = _mm_packus_epi16 (a, a); \
++ pix = _mm_cvtsi128_si32 (a); \
++ } while (0)
++
++ while ((width -= 4) >= 0)
++ {
++ INTERPOLATE_ONE_PIXEL (pix1);
++ INTERPOLATE_ONE_PIXEL (pix2);
++ INTERPOLATE_ONE_PIXEL (pix3);
++ INTERPOLATE_ONE_PIXEL (pix4);
++ *out++ = pix1;
++ *out++ = pix2;
++ *out++ = pix3;
++ *out++ = pix4;
++ }
++ if (width & 2)
++ {
++ INTERPOLATE_ONE_PIXEL (pix1);
++ INTERPOLATE_ONE_PIXEL (pix2);
++ *out++ = pix1;
++ *out++ = pix2;
++ }
++ if (width & 1)
++ {
++ INTERPOLATE_ONE_PIXEL (pix1);
++ *out = pix1;
++ }
++
++ #undef INTERPOLATE_ONE_PIXEL
++}
++
++static force_inline void
++scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
++ const uint32_t * mask,
++ const uint32_t * src_top,
++ const uint32_t * src_bottom,
++ int32_t w,
++ int wt,
++ int wb,
++ pixman_fixed_t vx,
++ pixman_fixed_t unit_x,
++ pixman_fixed_t max_vx,
++ pixman_bool_t zero_src)
++{
++ bilinear_interpolate_line_sse2 (dst, src_top, src_bottom,
++ wt, wb, vx, unit_x, w);
++}
++
++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
++ scaled_bilinear_scanline_sse2_8888_8888_SRC,
++ uint32_t, uint32_t, uint32_t,
++ COVER, FALSE, FALSE)
++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,
++ scaled_bilinear_scanline_sse2_8888_8888_SRC,
++ uint32_t, uint32_t, uint32_t,
++ PAD, FALSE, FALSE)
++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,
++ scaled_bilinear_scanline_sse2_8888_8888_SRC,
++ uint32_t, uint32_t, uint32_t,
++ NONE, FALSE, FALSE)
++
+ static const pixman_fast_path_t sse2_fast_paths[] =
+ {
+ /* PIXMAN_OP_OVER */
+@@ -5668,6 +5776,10 @@ static const pixman_fast_path_t sse2_fast_paths[] =
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
+
++ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
++ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
++ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888),
++
+ { PIXMAN_OP_NONE },
+ };
+
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch b/recipes/xorg-lib/pixman-0.21.6/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch
new file mode 100644
index 0000000..4d41162
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch
@@ -0,0 +1,288 @@
+From 17feaa9c50bb8521b0366345efe181bd99754957 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Tue, 22 Feb 2011 18:45:03 +0200
+Subject: [PATCH 10/40] ARM: NEON optimization for bilinear scaled 'src_8888_8888'
+
+Initial NEON optimization for bilinear scaling. Can be probably
+improved more.
+
+Benchmark on ARM Cortex-A8:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+ before: op=1, src=20028888, dst=20028888, speed=6.70 MPix/s
+ after: op=1, src=20028888, dst=20028888, speed=44.27 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S | 197 ++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c | 45 ++++++++++
+ 2 files changed, 242 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 47daf45..c168e10 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2391,3 +2391,200 @@ generate_composite_function_nearest_scanline \
+ 10, /* dst_r_basereg */ \
+ 8, /* src_basereg */ \
+ 15 /* mask_basereg */
++
++/******************************************************************************/
++
++/* Supplementary macro for setting function attributes */
++.macro pixman_asm_function fname
++ .func fname
++ .global fname
++#ifdef __ELF__
++ .hidden fname
++ .type fname, %function
++#endif
++fname:
++.endm
++
++.macro bilinear_interpolate_last_pixel
++ mov TMP1, X, asr #16
++ mov TMP2, X, asr #16
++ add TMP1, TOP, TMP1, asl #2
++ add TMP2, BOTTOM, TMP2, asl #2
++ vld1.32 {d0}, [TMP1]
++ vshr.u16 d30, d24, #8
++ vld1.32 {d1}, [TMP2]
++ vmull.u8 q1, d0, d28
++ vmlal.u8 q1, d1, d29
++ /* 5 cycles bubble */
++ vshll.u16 q0, d2, #8
++ vmlsl.u16 q0, d2, d30
++ vmlal.u16 q0, d3, d30
++ /* 5 cycles bubble */
++ vshrn.u32 d0, q0, #16
++ /* 3 cycles bubble */
++ vmovn.u16 d0, q0
++ /* 1 cycle bubble */
++ vst1.32 {d0[0]}, [OUT, :32]!
++.endm
++
++.macro bilinear_interpolate_two_pixels
++ mov TMP1, X, asr #16
++ mov TMP2, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP1, asl #2
++ add TMP2, BOTTOM, TMP2, asl #2
++ vld1.32 {d0}, [TMP1]
++ vld1.32 {d1}, [TMP2]
++ vmull.u8 q1, d0, d28
++ vmlal.u8 q1, d1, d29
++ mov TMP1, X, asr #16
++ mov TMP2, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP1, asl #2
++ add TMP2, BOTTOM, TMP2, asl #2
++ vld1.32 {d20}, [TMP1]
++ vld1.32 {d21}, [TMP2]
++ vmull.u8 q11, d20, d28
++ vmlal.u8 q11, d21, d29
++ vshr.u16 q15, q12, #8
++ vadd.u16 q12, q12, q13
++ vshll.u16 q0, d2, #8
++ vmlsl.u16 q0, d2, d30
++ vmlal.u16 q0, d3, d30
++ vshll.u16 q10, d22, #8
++ vmlsl.u16 q10, d22, d31
++ vmlal.u16 q10, d23, d31
++ vshrn.u32 d30, q0, #16
++ vshrn.u32 d31, q10, #16
++ vmovn.u16 d0, q15
++ vst1.32 {d0}, [OUT]!
++.endm
++
++.macro bilinear_interpolate_four_pixels
++ mov TMP1, X, asr #16
++ mov TMP2, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP1, asl #2
++ add TMP2, BOTTOM, TMP2, asl #2
++ vld1.32 {d0}, [TMP1]
++ vld1.32 {d1}, [TMP2]
++ vmull.u8 q1, d0, d28
++ vmlal.u8 q1, d1, d29
++ mov TMP1, X, asr #16
++ mov TMP2, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP1, asl #2
++ add TMP2, BOTTOM, TMP2, asl #2
++ vld1.32 {d20}, [TMP1]
++ vld1.32 {d21}, [TMP2]
++ vmull.u8 q11, d20, d28
++ vmlal.u8 q11, d21, d29
++ vshr.u16 q15, q12, #8
++ vadd.u16 q12, q12, q13
++ vshll.u16 q0, d2, #8
++ vmlsl.u16 q0, d2, d30
++ vmlal.u16 q0, d3, d30
++ vshll.u16 q10, d22, #8
++ vmlsl.u16 q10, d22, d31
++ vmlal.u16 q10, d23, d31
++ mov TMP1, X, asr #16
++ mov TMP2, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP1, asl #2
++ add TMP2, BOTTOM, TMP2, asl #2
++ vld1.32 {d4}, [TMP1]
++ vld1.32 {d5}, [TMP2]
++ vmull.u8 q3, d4, d28
++ vmlal.u8 q3, d5, d29
++ mov TMP1, X, asr #16
++ mov TMP2, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP1, asl #2
++ add TMP2, BOTTOM, TMP2, asl #2
++ vld1.32 {d16}, [TMP1]
++ vld1.32 {d17}, [TMP2]
++ vmull.u8 q9, d16, d28
++ vmlal.u8 q9, d17, d29
++ vshr.u16 q15, q12, #8
++ vadd.u16 q12, q12, q13
++ vshll.u16 q2, d6, #8
++ vmlsl.u16 q2, d6, d30
++ vmlal.u16 q2, d7, d30
++ vshll.u16 q8, d18, #8
++ vmlsl.u16 q8, d18, d31
++ vmlal.u16 q8, d19, d31
++ vshrn.u32 d0, q0, #16
++ vshrn.u32 d1, q10, #16
++ vshrn.u32 d4, q2, #16
++ vshrn.u32 d5, q8, #16
++ vmovn.u16 d0, q0
++ vmovn.u16 d1, q2
++ vst1.32 {d0, d1}, [OUT]!
++.endm
++
++
++/*
++ * pixman_scaled_bilinear_scanline_8888_8888_SRC (uint32_t * out,
++ * const uint32_t * top,
++ * const uint32_t * bottom,
++ * int wt,
++ * int wb,
++ * pixman_fixed_t x,
++ * pixman_fixed_t ux,
++ * int width)
++ */
++
++pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon
++ OUT .req r0
++ TOP .req r1
++ BOTTOM .req r2
++ WT .req r3
++ WB .req r4
++ X .req r5
++ UX .req r6
++ WIDTH .req ip
++ TMP1 .req r3
++ TMP2 .req r4
++
++ mov ip, sp
++ push {r4, r5, r6, r7}
++ ldmia ip, {WB, X, UX, WIDTH}
++
++ cmp WIDTH, #0
++ ble 3f
++ vdup.u16 q12, X
++ vdup.u16 q13, UX
++ vdup.u8 d28, WT
++ vdup.u8 d29, WB
++ vadd.u16 d25, d25, d26
++ vadd.u16 q13, q13, q13
++
++ subs WIDTH, WIDTH, #4
++ blt 1f
++0:
++ bilinear_interpolate_four_pixels
++ subs WIDTH, WIDTH, #4
++ bge 0b
++1:
++ tst WIDTH, #2
++ beq 2f
++ bilinear_interpolate_two_pixels
++2:
++ tst WIDTH, #1
++ beq 3f
++ bilinear_interpolate_last_pixel
++3:
++ pop {r4, r5, r6, r7}
++ bx lr
++
++ .unreq OUT
++ .unreq TOP
++ .unreq BOTTOM
++ .unreq WT
++ .unreq WB
++ .unreq X
++ .unreq UX
++ .unreq WIDTH
++ .unreq TMP1
++ .unreq TMP2
++.endfunc
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 3e0c0d1..c7c0254 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -232,6 +232,47 @@ pixman_blt_neon (uint32_t *src_bits,
+ }
+ }
+
++void
++pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (uint32_t * out,
++ const uint32_t * top,
++ const uint32_t * bottom,
++ int wt,
++ int wb,
++ pixman_fixed_t x,
++ pixman_fixed_t ux,
++ int width);
++
++static force_inline void
++scaled_bilinear_scanline_neon_8888_8888_SRC (uint32_t * dst,
++ const uint32_t * mask,
++ const uint32_t * src_top,
++ const uint32_t * src_bottom,
++ int32_t w,
++ int wt,
++ int wb,
++ pixman_fixed_t vx,
++ pixman_fixed_t unit_x,
++ pixman_fixed_t max_vx,
++ pixman_bool_t zero_src)
++{
++ pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top,
++ src_bottom, wt, wb,
++ vx, unit_x, w);
++}
++
++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_cover_SRC,
++ scaled_bilinear_scanline_neon_8888_8888_SRC,
++ uint32_t, uint32_t, uint32_t,
++ COVER, FALSE, FALSE)
++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_pad_SRC,
++ scaled_bilinear_scanline_neon_8888_8888_SRC,
++ uint32_t, uint32_t, uint32_t,
++ PAD, FALSE, FALSE)
++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_none_SRC,
++ scaled_bilinear_scanline_neon_8888_8888_SRC,
++ uint32_t, uint32_t, uint32_t,
++ NONE, FALSE, FALSE)
++
+ static const pixman_fast_path_t arm_neon_fast_paths[] =
+ {
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565),
+@@ -343,6 +384,10 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+ PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565),
+ PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565),
+
++ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888),
++ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
++ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
++
+ { PIXMAN_OP_NONE },
+ };
+
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch b/recipes/xorg-lib/pixman-0.21.6/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch
new file mode 100644
index 0000000..9761860
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch
@@ -0,0 +1,156 @@
+From 84f3c5a71a2de1a96dcf0c7f9ab0a8ee1b1b158f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Mon, 7 Mar 2011 13:45:54 -0500
+Subject: [PATCH 11/40] test: In image_endian_swap() use pixman_image_get_format() to get the bpp.
+
+There is no reason to pass in the bpp as an argument; it can be gotten
+directly from the image.
+---
+ test/affine-test.c | 6 +++---
+ test/blitters-test.c | 4 ++--
+ test/composite-traps-test.c | 2 +-
+ test/scaling-test.c | 6 +++---
+ test/utils.c | 9 +++++++--
+ test/utils.h | 2 +-
+ 6 files changed, 17 insertions(+), 12 deletions(-)
+
+diff --git a/test/affine-test.c b/test/affine-test.c
+index b7a1fa6..ed8000c 100644
+--- a/test/affine-test.c
++++ b/test/affine-test.c
+@@ -95,8 +95,8 @@ test_composite (int testnum,
+ dst_img = pixman_image_create_bits (
+ dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
+
+- image_endian_swap (src_img, src_bpp * 8);
+- image_endian_swap (dst_img, dst_bpp * 8);
++ image_endian_swap (src_img);
++ image_endian_swap (dst_img);
+
+ pixman_transform_init_identity (&transform);
+
+@@ -251,7 +251,7 @@ test_composite (int testnum,
+ dstbuf[i] &= 0xFFFFFF;
+ }
+
+- image_endian_swap (dst_img, dst_bpp * 8);
++ image_endian_swap (dst_img);
+
+ if (verbose)
+ {
+diff --git a/test/blitters-test.c b/test/blitters-test.c
+index 42181ef..63e7cb3 100644
+--- a/test/blitters-test.c
++++ b/test/blitters-test.c
+@@ -61,7 +61,7 @@ create_random_image (pixman_format_code_t *allowed_formats,
+ pixman_image_set_indexed (img, &(y_palette[PIXMAN_FORMAT_BPP (fmt)]));
+ }
+
+- image_endian_swap (img, PIXMAN_FORMAT_BPP (fmt));
++ image_endian_swap (img);
+
+ if (used_fmt) *used_fmt = fmt;
+ return img;
+@@ -101,7 +101,7 @@ free_random_image (uint32_t initcrc,
+ /* swap endiannes in order to provide identical results on both big
+ * and litte endian systems
+ */
+- image_endian_swap (img, PIXMAN_FORMAT_BPP (fmt));
++ image_endian_swap (img);
+ crc32 = compute_crc32 (initcrc, data, stride * height);
+ }
+
+diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c
+index 8f32778..298537d 100644
+--- a/test/composite-traps-test.c
++++ b/test/composite-traps-test.c
+@@ -218,7 +218,7 @@ test_composite (int testnum,
+ dst_bits[i] &= 0xFFFFFF;
+ }
+
+- image_endian_swap (dst_img, dst_bpp * 8);
++ image_endian_swap (dst_img);
+
+ if (verbose)
+ {
+diff --git a/test/scaling-test.c b/test/scaling-test.c
+index dbb9d39..82370f7 100644
+--- a/test/scaling-test.c
++++ b/test/scaling-test.c
+@@ -140,8 +140,8 @@ test_composite (int testnum,
+ dst_img = pixman_image_create_bits (
+ dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
+
+- image_endian_swap (src_img, src_bpp * 8);
+- image_endian_swap (dst_img, dst_bpp * 8);
++ image_endian_swap (src_img);
++ image_endian_swap (dst_img);
+
+ if (lcg_rand_n (4) > 0)
+ {
+@@ -330,7 +330,7 @@ test_composite (int testnum,
+ dstbuf[i] &= 0xFFFFFF;
+ }
+
+- image_endian_swap (dst_img, dst_bpp * 8);
++ image_endian_swap (dst_img);
+
+ if (verbose)
+ {
+diff --git a/test/utils.c b/test/utils.c
+index 2f21398..4bf02e1 100644
+--- a/test/utils.c
++++ b/test/utils.c
+@@ -133,11 +133,12 @@ compute_crc32 (uint32_t in_crc32,
+ /* perform endian conversion of pixel data
+ */
+ void
+-image_endian_swap (pixman_image_t *img, int bpp)
++image_endian_swap (pixman_image_t *img)
+ {
+ int stride = pixman_image_get_stride (img);
+ uint32_t *data = pixman_image_get_data (img);
+ int height = pixman_image_get_height (img);
++ int bpp = PIXMAN_FORMAT_BPP (pixman_image_get_format (img));
+ int i, j;
+
+ /* swap bytes only on big endian systems */
+@@ -145,10 +146,13 @@ image_endian_swap (pixman_image_t *img, int bpp)
+ if (*(volatile uint8_t *)&endian_check_var != 0x12)
+ return;
+
++ if (bpp == 8)
++ return;
++
+ for (i = 0; i < height; i++)
+ {
+ uint8_t *line_data = (uint8_t *)data + stride * i;
+- /* swap bytes only for 16, 24 and 32 bpp for now */
++
+ switch (bpp)
+ {
+ case 1:
+@@ -208,6 +212,7 @@ image_endian_swap (pixman_image_t *img, int bpp)
+ }
+ break;
+ default:
++ assert (FALSE);
+ break;
+ }
+ }
+diff --git a/test/utils.h b/test/utils.h
+index 9c7bdb1..a5183f7 100644
+--- a/test/utils.h
++++ b/test/utils.h
+@@ -60,7 +60,7 @@ compute_crc32 (uint32_t in_crc32,
+ /* perform endian conversion of pixel data
+ */
+ void
+-image_endian_swap (pixman_image_t *img, int bpp);
++image_endian_swap (pixman_image_t *img);
+
+ /* Allocate memory that is bounded by protected pages,
+ * so that out-of-bounds access will cause segfaults
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch b/recipes/xorg-lib/pixman-0.21.6/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch
new file mode 100644
index 0000000..9fc4cdd
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch
@@ -0,0 +1,36 @@
+From 84e361c8e357e26f299213fbeefe64c73447b116 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Fri, 4 Mar 2011 15:51:18 -0500
+Subject: [PATCH 12/40] test: Do endian swapping of the source and destination images.
+
+Otherwise the test fails on big endian. Fix for bug 34767, reported by
+Siarhei Siamashka.
+---
+ test/composite-traps-test.c | 4 ++++
+ 1 files changed, 4 insertions(+), 0 deletions(-)
+
+diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c
+index 298537d..cf30281 100644
+--- a/test/composite-traps-test.c
++++ b/test/composite-traps-test.c
+@@ -139,6 +139,8 @@ test_composite (int testnum,
+ pixman_image_set_source_clipping (src_img, 1);
+ pixman_region_fini (&clip);
+ }
++
++ image_endian_swap (src_img);
+ }
+
+ /* Create destination image */
+@@ -157,6 +159,8 @@ test_composite (int testnum,
+
+ dst_img = pixman_image_create_bits (
+ dst_format, dst_width, dst_height, dst_bits, dst_stride);
++
++ image_endian_swap (dst_img);
+ }
+
+ /* Create traps */
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch b/recipes/xorg-lib/pixman-0.21.6/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch
new file mode 100644
index 0000000..9d43404
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch
@@ -0,0 +1,77 @@
+From bb3d1b67fd0f42ae00af811c624ea1c44541034d Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Sun, 6 Mar 2011 16:17:12 +0200
+Subject: [PATCH 13/40] ARM: use prefetch in nearest scaled 'src_0565_0565'
+
+Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+ before: op=1, src=10020565, dst=10020565, speed=75.02 MPix/s
+ after: op=1, src=10020565, dst=10020565, speed=73.63 MPix/s
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+ before: op=1, src=10020565, dst=10020565, speed=176.12 MPix/s
+ after: op=1, src=10020565, dst=10020565, speed=267.50 MPix/s
+---
+ pixman/pixman-arm-simd-asm.S | 27 +++++++++++++++++++++++++--
+ 1 files changed, 25 insertions(+), 2 deletions(-)
+
+diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
+index 7567700..dd1366d 100644
+--- a/pixman/pixman-arm-simd-asm.S
++++ b/pixman/pixman-arm-simd-asm.S
+@@ -348,6 +348,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ TMP1 .req r4
+ TMP2 .req r5
+ VXMASK .req r6
++ PF_OFFS .req r7
+
+ ldr UNIT_X, [sp]
+ push {r4, r5, r6, r7}
+@@ -366,12 +367,33 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ strh TMP2, [DST], #2
+ .endm
+
++ /*
++ * stop prefetch before reaching the end of scanline (a good behaving
++ * value selected based on some benchmarks with short scanlines)
++ */
++ #define PREFETCH_BRAKING_DISTANCE 32
++
+ /* now do the scaling */
+ and TMP1, VXMASK, VX, lsr #15
+ add VX, VX, UNIT_X
+- subs W, #4
++ subs W, #(8 + PREFETCH_BRAKING_DISTANCE)
++ blt 2f
++ /* set prefetch distance to 80 pixels ahead */
++ add PF_OFFS, VX, UNIT_X, lsl #6
++ add PF_OFFS, PF_OFFS, UNIT_X, lsl #4
++1: /* main loop, process 8 pixels per iteration with prefetch */
++ subs W, W, #8
++ add PF_OFFS, UNIT_X, lsl #3
++ scale_2_pixels
++ scale_2_pixels
++ scale_2_pixels
++ scale_2_pixels
++ pld [SRC, PF_OFFS, lsr #15]
++ bge 1b
++2:
++ subs W, #(4 - 8 - PREFETCH_BRAKING_DISTANCE)
+ blt 2f
+-1: /* main loop, process 4 pixels per iteration */
++1: /* process the remaining pixels */
+ scale_2_pixels
+ scale_2_pixels
+ subs W, W, #4
+@@ -394,6 +416,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ .unreq TMP1
+ .unreq TMP2
+ .unreq VXMASK
++ .unreq PF_OFFS
+ /* return */
+ pop {r4, r5, r6, r7}
+ bx lr
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch b/recipes/xorg-lib/pixman-0.21.6/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch
new file mode 100644
index 0000000..115d517
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch
@@ -0,0 +1,131 @@
+From f3e17872f5522e25da8e32de83e62bee8cc198d7 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 7 Mar 2011 03:10:43 +0200
+Subject: [PATCH 14/40] ARM: common macro for nearest scaling fast paths
+
+The code of nearest scaled 'src_0565_0565' function was generalized
+and moved to a common macro, so that it can be reused for other
+fast paths.
+---
+ pixman/pixman-arm-simd-asm.S | 60 +++++++++++++++++++++++++----------------
+ 1 files changed, 36 insertions(+), 24 deletions(-)
+
+diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
+index dd1366d..a9775e2 100644
+--- a/pixman/pixman-arm-simd-asm.S
++++ b/pixman/pixman-arm-simd-asm.S
+@@ -331,15 +331,29 @@ pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
+ .endfunc
+
+ /*
+- * Note: This function is only using armv4t instructions (not even armv6),
++ * Note: This code is only using armv5te instructions (not even armv6),
+ * but is scheduled for ARM Cortex-A8 pipeline. So it might need to
+ * be split into a few variants, tuned for each microarchitecture.
+ *
+ * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
+ * have efficient write combining), it needs to be changed to use 16-byte
+ * aligned writes using STM instruction.
++ *
++ * Nearest scanline scaler macro template uses the following arguments:
++ * fname - name of the function to generate
++ * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes
++ * t - type suffix for LDR/STR instructions
++ * prefetch_distance - prefetch in the source image by that many
++ * pixels ahead
++ * prefetch_braking_distance - stop prefetching when that many pixels are
++ * remaining before the end of scanline
+ */
+-pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
++
++.macro generate_nearest_scanline_func fname, bpp_shift, t, \
++ prefetch_distance, \
++ prefetch_braking_distance
++
++pixman_asm_function fname
+ W .req r0
+ DST .req r1
+ SRC .req r2
+@@ -352,35 +366,29 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+
+ ldr UNIT_X, [sp]
+ push {r4, r5, r6, r7}
+- mvn VXMASK, #1
++ mvn VXMASK, #((1 << bpp_shift) - 1)
+
+ /* define helper macro */
+ .macro scale_2_pixels
+- ldrh TMP1, [SRC, TMP1]
+- and TMP2, VXMASK, VX, lsr #15
++ ldr&t TMP1, [SRC, TMP1]
++ and TMP2, VXMASK, VX, lsr #(16 - bpp_shift)
+ add VX, VX, UNIT_X
+- strh TMP1, [DST], #2
++ str&t TMP1, [DST], #(1 << bpp_shift)
+
+- ldrh TMP2, [SRC, TMP2]
+- and TMP1, VXMASK, VX, lsr #15
++ ldr&t TMP2, [SRC, TMP2]
++ and TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
+ add VX, VX, UNIT_X
+- strh TMP2, [DST], #2
++ str&t TMP2, [DST], #(1 << bpp_shift)
+ .endm
+
+- /*
+- * stop prefetch before reaching the end of scanline (a good behaving
+- * value selected based on some benchmarks with short scanlines)
+- */
+- #define PREFETCH_BRAKING_DISTANCE 32
+-
+ /* now do the scaling */
+- and TMP1, VXMASK, VX, lsr #15
++ and TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
+ add VX, VX, UNIT_X
+- subs W, #(8 + PREFETCH_BRAKING_DISTANCE)
++ subs W, W, #(8 + prefetch_braking_distance)
+ blt 2f
+- /* set prefetch distance to 80 pixels ahead */
+- add PF_OFFS, VX, UNIT_X, lsl #6
+- add PF_OFFS, PF_OFFS, UNIT_X, lsl #4
++ /* calculate prefetch offset */
++ mov PF_OFFS, #prefetch_distance
++ mla PF_OFFS, UNIT_X, PF_OFFS, VX
+ 1: /* main loop, process 8 pixels per iteration with prefetch */
+ subs W, W, #8
+ add PF_OFFS, UNIT_X, lsl #3
+@@ -388,10 +396,10 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ scale_2_pixels
+ scale_2_pixels
+ scale_2_pixels
+- pld [SRC, PF_OFFS, lsr #15]
++ pld [SRC, PF_OFFS, lsr #(16 - bpp_shift)]
+ bge 1b
+ 2:
+- subs W, #(4 - 8 - PREFETCH_BRAKING_DISTANCE)
++ subs W, W, #(4 - 8 - prefetch_braking_distance)
+ blt 2f
+ 1: /* process the remaining pixels */
+ scale_2_pixels
+@@ -404,8 +412,8 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ scale_2_pixels
+ 2:
+ tst W, #1
+- ldrneh TMP1, [SRC, TMP1]
+- strneh TMP1, [DST], #2
++ ldrne&t TMP1, [SRC, TMP1]
++ strne&t TMP1, [DST]
+ /* cleanup helper macro */
+ .purgem scale_2_pixels
+ .unreq DST
+@@ -421,3 +429,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ pop {r4, r5, r6, r7}
+ bx lr
+ .endfunc
++.endm
++
++generate_nearest_scanline_func \
++ pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch b/recipes/xorg-lib/pixman-0.21.6/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch
new file mode 100644
index 0000000..cc3a68f
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch
@@ -0,0 +1,60 @@
+From 5921c17639fe5fdc595c850e3347281c1c8746ba Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Sun, 6 Mar 2011 22:16:32 +0200
+Subject: [PATCH 15/40] ARM: assembly optimized nearest scaled 'src_8888_8888'
+
+Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+ before: op=1, src=20028888, dst=20028888, speed=44.36 MPix/s
+ after: op=1, src=20028888, dst=20028888, speed=39.79 MPix/s
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+ before: op=1, src=20028888, dst=20028888, speed=102.36 MPix/s
+ after: op=1, src=20028888, dst=20028888, speed=163.12 MPix/s
+---
+ pixman/pixman-arm-simd-asm.S | 3 +++
+ pixman/pixman-arm-simd.c | 9 +++++++++
+ 2 files changed, 12 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
+index a9775e2..858c690 100644
+--- a/pixman/pixman-arm-simd-asm.S
++++ b/pixman/pixman-arm-simd-asm.S
+@@ -433,3 +433,6 @@ pixman_asm_function fname
+
+ generate_nearest_scanline_func \
+ pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
++
++generate_nearest_scanline_func \
++ pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32
+diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
+index 6bbc109..a66f8df 100644
+--- a/pixman/pixman-arm-simd.c
++++ b/pixman/pixman-arm-simd.c
+@@ -389,6 +389,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888,
+
+ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC,
+ uint16_t, uint16_t)
++PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC,
++ uint32_t, uint32_t)
+
+ static const pixman_fast_path_t arm_simd_fast_paths[] =
+ {
+@@ -411,6 +413,13 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
+ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
+
++ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888),
++ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888),
++ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888),
++ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888),
++ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888),
++ PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
++
+ { PIXMAN_OP_NONE },
+ };
+
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch b/recipes/xorg-lib/pixman-0.21.6/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch
new file mode 100644
index 0000000..d8559b0
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch
@@ -0,0 +1,130 @@
+From 66f4ee1b3bccf4516433d61dbf2035551a712fa2 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 10:59:46 +0200
+Subject: [PATCH 16/40] ARM: new bilinear fast path template macro in 'pixman-arm-common.h'
+
+It can be reused in different ARM NEON bilinear scaling fast path functions.
+---
+ pixman/pixman-arm-common.h | 45 ++++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c | 44 ++----------------------------------------
+ 2 files changed, 48 insertions(+), 41 deletions(-)
+
+diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
+index 9b1322b..c3bf986 100644
+--- a/pixman/pixman-arm-common.h
++++ b/pixman/pixman-arm-common.h
+@@ -361,4 +361,49 @@ FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+
++/*****************************************************************************/
++
++#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op, \
++ src_type, dst_type) \
++void \
++pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \
++ dst_type * dst, \
++ const src_type * top, \
++ const src_type * bottom, \
++ int wt, \
++ int wb, \
++ pixman_fixed_t x, \
++ pixman_fixed_t ux, \
++ int width); \
++ \
++static force_inline void \
++scaled_bilinear_scanline_##cputype##_##name##_##op ( \
++ dst_type * dst, \
++ const uint32_t * mask, \
++ const src_type * src_top, \
++ const src_type * src_bottom, \
++ int32_t w, \
++ int wt, \
++ int wb, \
++ pixman_fixed_t vx, \
++ pixman_fixed_t unit_x, \
++ pixman_fixed_t max_vx, \
++ pixman_bool_t zero_src) \
++{ \
++ if ((flags & SKIP_ZERO_SRC) && zero_src) \
++ return; \
++ pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \
++ dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \
++} \
++ \
++FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
++ scaled_bilinear_scanline_##cputype##_##name##_##op, \
++ src_type, uint32_t, dst_type, COVER, FALSE, FALSE) \
++FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
++ scaled_bilinear_scanline_##cputype##_##name##_##op, \
++ src_type, uint32_t, dst_type, NONE, FALSE, FALSE) \
++FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
++ scaled_bilinear_scanline_##cputype##_##name##_##op, \
++ src_type, uint32_t, dst_type, PAD, FALSE, FALSE)
++
+ #endif
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index c7c0254..98ad5f2 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -127,6 +127,9 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
+ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
+ OVER, uint16_t, uint16_t)
+
++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
++ uint32_t, uint32_t)
++
+ void
+ pixman_composite_src_n_8_asm_neon (int32_t w,
+ int32_t h,
+@@ -232,47 +235,6 @@ pixman_blt_neon (uint32_t *src_bits,
+ }
+ }
+
+-void
+-pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (uint32_t * out,
+- const uint32_t * top,
+- const uint32_t * bottom,
+- int wt,
+- int wb,
+- pixman_fixed_t x,
+- pixman_fixed_t ux,
+- int width);
+-
+-static force_inline void
+-scaled_bilinear_scanline_neon_8888_8888_SRC (uint32_t * dst,
+- const uint32_t * mask,
+- const uint32_t * src_top,
+- const uint32_t * src_bottom,
+- int32_t w,
+- int wt,
+- int wb,
+- pixman_fixed_t vx,
+- pixman_fixed_t unit_x,
+- pixman_fixed_t max_vx,
+- pixman_bool_t zero_src)
+-{
+- pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top,
+- src_bottom, wt, wb,
+- vx, unit_x, w);
+-}
+-
+-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_cover_SRC,
+- scaled_bilinear_scanline_neon_8888_8888_SRC,
+- uint32_t, uint32_t, uint32_t,
+- COVER, FALSE, FALSE)
+-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_pad_SRC,
+- scaled_bilinear_scanline_neon_8888_8888_SRC,
+- uint32_t, uint32_t, uint32_t,
+- PAD, FALSE, FALSE)
+-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_none_SRC,
+- scaled_bilinear_scanline_neon_8888_8888_SRC,
+- uint32_t, uint32_t, uint32_t,
+- NONE, FALSE, FALSE)
+-
+ static const pixman_fast_path_t arm_neon_fast_paths[] =
+ {
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, neon_composite_src_0565_0565),
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch b/recipes/xorg-lib/pixman-0.21.6/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch
new file mode 100644
index 0000000..6efc40f
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch
@@ -0,0 +1,271 @@
+From 34098dba6763afd3636a14f9c2a079ab08f23b2d Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 11:34:15 +0200
+Subject: [PATCH 17/40] ARM: NEON: common macro template for bilinear scanline scalers
+
+This allows to generate bilinear scanline scaling functions targeting
+various source and destination color formats. Right now a8r8g8b8/x8r8g8b8
+and r5g6b5 color formats are supported. More formats can be added if needed.
+---
+ pixman/pixman-arm-neon-asm.S | 222 ++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon-asm.h | 17 +++
+ 2 files changed, 239 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index c168e10..f3784f5 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2588,3 +2588,225 @@ pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon
+ .unreq TMP1
+ .unreq TMP2
+ .endfunc
++
++.purgem bilinear_interpolate_last_pixel
++.purgem bilinear_interpolate_two_pixels
++.purgem bilinear_interpolate_four_pixels
++
++/*
++ * Bilinear scaling support code which tries to provide pixel fetching, color
++ * format conversion, and interpolation as separate macros which can be used
++ * as the basic building blocks for constructing bilinear scanline functions.
++ */
++
++.macro bilinear_load_8888 reg1, reg2, tmp
++ mov TMP2, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP2, asl #2
++ add TMP2, BOTTOM, TMP2, asl #2
++ vld1.32 {reg1}, [TMP1]
++ vld1.32 {reg2}, [TMP2]
++.endm
++
++.macro bilinear_load_0565 reg1, reg2, tmp
++ mov TMP2, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP2, asl #1
++ add TMP2, BOTTOM, TMP2, asl #1
++ vld1.32 {reg2[0]}, [TMP1]
++ vld1.32 {reg2[1]}, [TMP2]
++ convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
++.endm
++
++.macro bilinear_store_8888 numpix, tmp1, tmp2
++.if numpix == 4
++ vst1.32 {d0, d1}, [OUT]!
++.elseif numpix == 2
++ vst1.32 {d0}, [OUT]!
++.elseif numpix == 1
++ vst1.32 {d0[0]}, [OUT, :32]!
++.else
++ .error bilinear_store_8888 numpix is unsupported
++.endif
++.endm
++
++.macro bilinear_store_0565 numpix, tmp1, tmp2
++ vuzp.u8 d0, d1
++ vuzp.u8 d2, d3
++ vuzp.u8 d1, d3
++ vuzp.u8 d0, d2
++ convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2
++.if numpix == 4
++ vst1.16 {d2}, [OUT]!
++.elseif numpix == 2
++ vst1.32 {d2[0]}, [OUT]!
++.elseif numpix == 1
++ vst1.16 {d2[0]}, [OUT]!
++.else
++ .error bilinear_store_0565 numpix is unsupported
++.endif
++.endm
++
++.macro bilinear_interpolate_last_pixel src_fmt, dst_fmt
++ bilinear_load_&src_fmt d0, d1, d2
++ vmull.u8 q1, d0, d28
++ vmlal.u8 q1, d1, d29
++ vshr.u16 d30, d24, #8
++ /* 4 cycles bubble */
++ vshll.u16 q0, d2, #8
++ vmlsl.u16 q0, d2, d30
++ vmlal.u16 q0, d3, d30
++ /* 5 cycles bubble */
++ vshrn.u32 d0, q0, #16
++ /* 3 cycles bubble */
++ vmovn.u16 d0, q0
++ /* 1 cycle bubble */
++ bilinear_store_&dst_fmt 1, q2, q3
++.endm
++
++.macro bilinear_interpolate_two_pixels src_fmt, dst_fmt
++ bilinear_load_&src_fmt d0, d1, d2
++ vmull.u8 q1, d0, d28
++ vmlal.u8 q1, d1, d29
++ bilinear_load_&src_fmt d20, d21, d22
++ vmull.u8 q11, d20, d28
++ vmlal.u8 q11, d21, d29
++ vshr.u16 q15, q12, #8
++ vadd.u16 q12, q12, q13
++ vshll.u16 q0, d2, #8
++ vmlsl.u16 q0, d2, d30
++ vmlal.u16 q0, d3, d30
++ vshll.u16 q10, d22, #8
++ vmlsl.u16 q10, d22, d31
++ vmlal.u16 q10, d23, d31
++ vshrn.u32 d30, q0, #16
++ vshrn.u32 d31, q10, #16
++ vmovn.u16 d0, q15
++ bilinear_store_&dst_fmt 2, q2, q3
++.endm
++
++.macro bilinear_interpolate_four_pixels src_fmt, dst_fmt
++ bilinear_load_&src_fmt d0, d1, d2
++ vmull.u8 q1, d0, d28
++ vmlal.u8 q1, d1, d29
++ bilinear_load_&src_fmt d20, d21, d22
++ vmull.u8 q11, d20, d28
++ vmlal.u8 q11, d21, d29
++ bilinear_load_&src_fmt d4, d5, d6
++ vmull.u8 q3, d4, d28
++ vmlal.u8 q3, d5, d29
++ bilinear_load_&src_fmt d16, d17, d18
++ vmull.u8 q9, d16, d28
++ vmlal.u8 q9, d17, d29
++ pld [TMP1, PF_OFFS]
++ vshr.u16 q15, q12, #8
++ vadd.u16 q12, q12, q13
++ vshll.u16 q0, d2, #8
++ vmlsl.u16 q0, d2, d30
++ vmlal.u16 q0, d3, d30
++ vshll.u16 q10, d22, #8
++ vmlsl.u16 q10, d22, d31
++ vmlal.u16 q10, d23, d31
++ vshr.u16 q15, q12, #8
++ vshll.u16 q2, d6, #8
++ vmlsl.u16 q2, d6, d30
++ vmlal.u16 q2, d7, d30
++ vshll.u16 q8, d18, #8
++ pld [TMP2, PF_OFFS]
++ vmlsl.u16 q8, d18, d31
++ vmlal.u16 q8, d19, d31
++ vadd.u16 q12, q12, q13
++ vshrn.u32 d0, q0, #16
++ vshrn.u32 d1, q10, #16
++ vshrn.u32 d4, q2, #16
++ vshrn.u32 d5, q8, #16
++ vmovn.u16 d0, q0
++ vmovn.u16 d1, q2
++ bilinear_store_&dst_fmt 4, q2, q3
++.endm
++
++/*
++ * Main template macro for generating NEON optimized bilinear scanline
++ * functions.
++ *
++ * TODO: use software pipelining and aligned writes to the destination buffer
++ * in order to improve performance
++ *
++ * Bilinear scanline scaler macro template uses the following arguments:
++ * fname - name of the function to generate
++ * src_fmt - source color format (8888 or 0565)
++ * dst_fmt - destination color format (8888 or 0565)
++ * bpp_shift - (1 << bpp_shift) is the size of source pixel in bytes
++ * prefetch_distance - prefetch in the source image by that many
++ * pixels ahead
++ */
++
++.macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \
++ bpp_shift, prefetch_distance
++
++pixman_asm_function fname
++ OUT .req r0
++ TOP .req r1
++ BOTTOM .req r2
++ WT .req r3
++ WB .req r4
++ X .req r5
++ UX .req r6
++ WIDTH .req ip
++ TMP1 .req r3
++ TMP2 .req r4
++ PF_OFFS .req r7
++ TMP3 .req r8
++ TMP4 .req r9
++
++ mov ip, sp
++ push {r4, r5, r6, r7, r8, r9}
++ mov PF_OFFS, #prefetch_distance
++ ldmia ip, {WB, X, UX, WIDTH}
++ mul PF_OFFS, PF_OFFS, UX
++
++ cmp WIDTH, #0
++ ble 3f
++
++ vdup.u16 q12, X
++ vdup.u16 q13, UX
++ vdup.u8 d28, WT
++ vdup.u8 d29, WB
++ vadd.u16 d25, d25, d26
++ vadd.u16 q13, q13, q13
++
++ subs WIDTH, WIDTH, #4
++ blt 1f
++ mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift)
++0:
++ bilinear_interpolate_four_pixels src_fmt, dst_fmt
++ subs WIDTH, WIDTH, #4
++ bge 0b
++1:
++ tst WIDTH, #2
++ beq 2f
++ bilinear_interpolate_two_pixels src_fmt, dst_fmt
++2:
++ tst WIDTH, #1
++ beq 3f
++ bilinear_interpolate_last_pixel src_fmt, dst_fmt
++3:
++ pop {r4, r5, r6, r7, r8, r9}
++ bx lr
++
++ .unreq OUT
++ .unreq TOP
++ .unreq BOTTOM
++ .unreq WT
++ .unreq WB
++ .unreq X
++ .unreq UX
++ .unreq WIDTH
++ .unreq TMP1
++ .unreq TMP2
++ .unreq PF_OFFS
++ .unreq TMP3
++ .unreq TMP4
++.endfunc
++
++.endm
+diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
+index 24fa361..97adc6a 100644
+--- a/pixman/pixman-arm-neon-asm.h
++++ b/pixman/pixman-arm-neon-asm.h
+@@ -1158,3 +1158,20 @@ fname:
+ vsri.u16 out, tmp1, #5
+ vsri.u16 out, tmp2, #11
+ .endm
++
++/*
++ * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels
++ * returned in (out0, out1) registers pair. Requires one temporary
++ * 64-bit register (tmp). 'out1' and 'in' may overlap, the original
++ * value from 'in' is lost
++ */
++.macro convert_four_0565_to_x888_packed in, out0, out1, tmp
++ vshl.u16 out0, in, #5 /* G top 6 bits */
++ vshl.u16 tmp, in, #11 /* B top 5 bits */
++ vsri.u16 in, in, #5 /* R is ready in top bits */
++ vsri.u16 out0, out0, #6 /* G is ready in top bits */
++ vsri.u16 tmp, tmp, #5 /* B is ready in top bits */
++ vshr.u16 out1, in, #8 /* R is in place */
++ vsri.u16 out0, tmp, #8 /* G & B is in place */
++ vzip.u16 out0, out1 /* everything is in place */
++.endm
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch b/recipes/xorg-lib/pixman-0.21.6/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch
new file mode 100644
index 0000000..245e536
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch
@@ -0,0 +1,226 @@
+From 11a0c5badbc59ce967707ef836313cc98f8aec4e Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 11:46:48 +0200
+Subject: [PATCH 18/40] ARM: use common macro template for bilinear scaled 'src_8888_8888'
+
+This is a cleanup for old and now duplicated code. The performance improvement
+is mostly coming from the enabled use of software prefetch, but instructions
+scheduling is also slightly better.
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+ before: op=1, src=20028888, dst=20028888, speed=53.24 MPix/s
+ after: op=1, src=20028888, dst=20028888, speed=74.36 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S | 191 +-----------------------------------------
+ 1 files changed, 3 insertions(+), 188 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index f3784f5..52dc444 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2405,194 +2405,6 @@ generate_composite_function_nearest_scanline \
+ fname:
+ .endm
+
+-.macro bilinear_interpolate_last_pixel
+- mov TMP1, X, asr #16
+- mov TMP2, X, asr #16
+- add TMP1, TOP, TMP1, asl #2
+- add TMP2, BOTTOM, TMP2, asl #2
+- vld1.32 {d0}, [TMP1]
+- vshr.u16 d30, d24, #8
+- vld1.32 {d1}, [TMP2]
+- vmull.u8 q1, d0, d28
+- vmlal.u8 q1, d1, d29
+- /* 5 cycles bubble */
+- vshll.u16 q0, d2, #8
+- vmlsl.u16 q0, d2, d30
+- vmlal.u16 q0, d3, d30
+- /* 5 cycles bubble */
+- vshrn.u32 d0, q0, #16
+- /* 3 cycles bubble */
+- vmovn.u16 d0, q0
+- /* 1 cycle bubble */
+- vst1.32 {d0[0]}, [OUT, :32]!
+-.endm
+-
+-.macro bilinear_interpolate_two_pixels
+- mov TMP1, X, asr #16
+- mov TMP2, X, asr #16
+- add X, X, UX
+- add TMP1, TOP, TMP1, asl #2
+- add TMP2, BOTTOM, TMP2, asl #2
+- vld1.32 {d0}, [TMP1]
+- vld1.32 {d1}, [TMP2]
+- vmull.u8 q1, d0, d28
+- vmlal.u8 q1, d1, d29
+- mov TMP1, X, asr #16
+- mov TMP2, X, asr #16
+- add X, X, UX
+- add TMP1, TOP, TMP1, asl #2
+- add TMP2, BOTTOM, TMP2, asl #2
+- vld1.32 {d20}, [TMP1]
+- vld1.32 {d21}, [TMP2]
+- vmull.u8 q11, d20, d28
+- vmlal.u8 q11, d21, d29
+- vshr.u16 q15, q12, #8
+- vadd.u16 q12, q12, q13
+- vshll.u16 q0, d2, #8
+- vmlsl.u16 q0, d2, d30
+- vmlal.u16 q0, d3, d30
+- vshll.u16 q10, d22, #8
+- vmlsl.u16 q10, d22, d31
+- vmlal.u16 q10, d23, d31
+- vshrn.u32 d30, q0, #16
+- vshrn.u32 d31, q10, #16
+- vmovn.u16 d0, q15
+- vst1.32 {d0}, [OUT]!
+-.endm
+-
+-.macro bilinear_interpolate_four_pixels
+- mov TMP1, X, asr #16
+- mov TMP2, X, asr #16
+- add X, X, UX
+- add TMP1, TOP, TMP1, asl #2
+- add TMP2, BOTTOM, TMP2, asl #2
+- vld1.32 {d0}, [TMP1]
+- vld1.32 {d1}, [TMP2]
+- vmull.u8 q1, d0, d28
+- vmlal.u8 q1, d1, d29
+- mov TMP1, X, asr #16
+- mov TMP2, X, asr #16
+- add X, X, UX
+- add TMP1, TOP, TMP1, asl #2
+- add TMP2, BOTTOM, TMP2, asl #2
+- vld1.32 {d20}, [TMP1]
+- vld1.32 {d21}, [TMP2]
+- vmull.u8 q11, d20, d28
+- vmlal.u8 q11, d21, d29
+- vshr.u16 q15, q12, #8
+- vadd.u16 q12, q12, q13
+- vshll.u16 q0, d2, #8
+- vmlsl.u16 q0, d2, d30
+- vmlal.u16 q0, d3, d30
+- vshll.u16 q10, d22, #8
+- vmlsl.u16 q10, d22, d31
+- vmlal.u16 q10, d23, d31
+- mov TMP1, X, asr #16
+- mov TMP2, X, asr #16
+- add X, X, UX
+- add TMP1, TOP, TMP1, asl #2
+- add TMP2, BOTTOM, TMP2, asl #2
+- vld1.32 {d4}, [TMP1]
+- vld1.32 {d5}, [TMP2]
+- vmull.u8 q3, d4, d28
+- vmlal.u8 q3, d5, d29
+- mov TMP1, X, asr #16
+- mov TMP2, X, asr #16
+- add X, X, UX
+- add TMP1, TOP, TMP1, asl #2
+- add TMP2, BOTTOM, TMP2, asl #2
+- vld1.32 {d16}, [TMP1]
+- vld1.32 {d17}, [TMP2]
+- vmull.u8 q9, d16, d28
+- vmlal.u8 q9, d17, d29
+- vshr.u16 q15, q12, #8
+- vadd.u16 q12, q12, q13
+- vshll.u16 q2, d6, #8
+- vmlsl.u16 q2, d6, d30
+- vmlal.u16 q2, d7, d30
+- vshll.u16 q8, d18, #8
+- vmlsl.u16 q8, d18, d31
+- vmlal.u16 q8, d19, d31
+- vshrn.u32 d0, q0, #16
+- vshrn.u32 d1, q10, #16
+- vshrn.u32 d4, q2, #16
+- vshrn.u32 d5, q8, #16
+- vmovn.u16 d0, q0
+- vmovn.u16 d1, q2
+- vst1.32 {d0, d1}, [OUT]!
+-.endm
+-
+-
+-/*
+- * pixman_scaled_bilinear_scanline_8888_8888_SRC (uint32_t * out,
+- * const uint32_t * top,
+- * const uint32_t * bottom,
+- * int wt,
+- * int wb,
+- * pixman_fixed_t x,
+- * pixman_fixed_t ux,
+- * int width)
+- */
+-
+-pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon
+- OUT .req r0
+- TOP .req r1
+- BOTTOM .req r2
+- WT .req r3
+- WB .req r4
+- X .req r5
+- UX .req r6
+- WIDTH .req ip
+- TMP1 .req r3
+- TMP2 .req r4
+-
+- mov ip, sp
+- push {r4, r5, r6, r7}
+- ldmia ip, {WB, X, UX, WIDTH}
+-
+- cmp WIDTH, #0
+- ble 3f
+- vdup.u16 q12, X
+- vdup.u16 q13, UX
+- vdup.u8 d28, WT
+- vdup.u8 d29, WB
+- vadd.u16 d25, d25, d26
+- vadd.u16 q13, q13, q13
+-
+- subs WIDTH, WIDTH, #4
+- blt 1f
+-0:
+- bilinear_interpolate_four_pixels
+- subs WIDTH, WIDTH, #4
+- bge 0b
+-1:
+- tst WIDTH, #2
+- beq 2f
+- bilinear_interpolate_two_pixels
+-2:
+- tst WIDTH, #1
+- beq 3f
+- bilinear_interpolate_last_pixel
+-3:
+- pop {r4, r5, r6, r7}
+- bx lr
+-
+- .unreq OUT
+- .unreq TOP
+- .unreq BOTTOM
+- .unreq WT
+- .unreq WB
+- .unreq X
+- .unreq UX
+- .unreq WIDTH
+- .unreq TMP1
+- .unreq TMP2
+-.endfunc
+-
+-.purgem bilinear_interpolate_last_pixel
+-.purgem bilinear_interpolate_two_pixels
+-.purgem bilinear_interpolate_four_pixels
+-
+ /*
+ * Bilinear scaling support code which tries to provide pixel fetching, color
+ * format conversion, and interpolation as separate macros which can be used
+@@ -2810,3 +2622,6 @@ pixman_asm_function fname
+ .endfunc
+
+ .endm
++
++generate_bilinear_scanline_func \
++ pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 28
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch b/recipes/xorg-lib/pixman-0.21.6/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch
new file mode 100644
index 0000000..cc17694
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch
@@ -0,0 +1,51 @@
+From 2ee27e7d79637da9173ee1bf3423e5a81534ccb4 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 11:53:04 +0200
+Subject: [PATCH 19/40] ARM: NEON optimization for bilinear scaled 'src_8888_0565'
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+ before: op=1, src=20028888, dst=10020565, speed=6.56 MPix/s
+ after: op=1, src=20028888, dst=10020565, speed=61.65 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S | 3 +++
+ pixman/pixman-arm-neon.c | 5 +++++
+ 2 files changed, 8 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 52dc444..f0b42ca 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2625,3 +2625,6 @@ pixman_asm_function fname
+
+ generate_bilinear_scanline_func \
+ pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 28
++
++generate_bilinear_scanline_func \
++ pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 28
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 98ad5f2..ba6de66 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -129,6 +129,8 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
+
+ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
+ uint32_t, uint32_t)
++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
++ uint32_t, uint16_t)
+
+ void
+ pixman_composite_src_n_8_asm_neon (int32_t w,
+@@ -350,6 +352,9 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
+
++ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
++ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
++
+ { PIXMAN_OP_NONE },
+ };
+
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch b/recipes/xorg-lib/pixman-0.21.6/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch
new file mode 100644
index 0000000..1924b3a
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch
@@ -0,0 +1,50 @@
+From 29003c3befe2159396d181ef9ac1caaadcabf382 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 13:21:53 +0200
+Subject: [PATCH 20/40] ARM: NEON optimization for bilinear scaled 'src_0565_x888'
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+ before: op=1, src=10020565, dst=20020888, speed=3.39 MPix/s
+ after: op=1, src=10020565, dst=20020888, speed=36.82 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S | 3 +++
+ pixman/pixman-arm-neon.c | 4 ++++
+ 2 files changed, 7 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index f0b42ca..9245db9 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2628,3 +2628,6 @@ generate_bilinear_scanline_func \
+
+ generate_bilinear_scanline_func \
+ pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 28
++
++generate_bilinear_scanline_func \
++ pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 28
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index ba6de66..18e26eb 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -131,6 +131,8 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
+ uint32_t, uint32_t)
+ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
+ uint32_t, uint16_t)
++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
++ uint16_t, uint32_t)
+
+ void
+ pixman_composite_src_n_8_asm_neon (int32_t w,
+@@ -355,6 +357,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
+
++ SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
++
+ { PIXMAN_OP_NONE },
+ };
+
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch b/recipes/xorg-lib/pixman-0.21.6/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch
new file mode 100644
index 0000000..a0193d1
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch
@@ -0,0 +1,49 @@
+From fe99673719091d4a880d031add1369332a75731b Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 13:27:41 +0200
+Subject: [PATCH 21/40] ARM: NEON optimization for bilinear scaled 'src_0565_0565'
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+ before: op=1, src=10020565, dst=10020565, speed=3.30 MPix/s
+ after: op=1, src=10020565, dst=10020565, speed=32.29 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S | 3 +++
+ pixman/pixman-arm-neon.c | 3 +++
+ 2 files changed, 6 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 9245db9..2b6875b 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2631,3 +2631,6 @@ generate_bilinear_scanline_func \
+
+ generate_bilinear_scanline_func \
+ pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 28
++
++generate_bilinear_scanline_func \
++ pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, 1, 28
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 18e26eb..0a10ca1 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -133,6 +133,8 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
+ uint32_t, uint16_t)
+ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
+ uint16_t, uint32_t)
++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
++ uint16_t, uint16_t)
+
+ void
+ pixman_composite_src_n_8_asm_neon (int32_t w,
+@@ -358,6 +360,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
+
+ SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
++ SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565),
+
+ { PIXMAN_OP_NONE },
+ };
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch b/recipes/xorg-lib/pixman-0.21.6/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch
new file mode 100644
index 0000000..20019f4
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch
@@ -0,0 +1,166 @@
+From 70a923882ca24664344ba91a649e7aa12c3063f7 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 13:55:48 +0200
+Subject: [PATCH 22/40] ARM: a bit faster NEON bilinear scaling for r5g6b5 source images
+
+Instructions scheduling improved in the code responsible for fetching r5g6b5
+pixels and converting them to the intermediate x8r8g8b8 color format used in
+the interpolation part of code. Still a lot of NEON stalls are remaining,
+which can be resolved later by the use of pipelining.
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+ before: op=1, src=10020565, dst=10020565, speed=32.29 MPix/s
+ op=1, src=10020565, dst=20020888, speed=36.82 MPix/s
+ after: op=1, src=10020565, dst=10020565, speed=41.35 MPix/s
+ op=1, src=10020565, dst=20020888, speed=49.16 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S | 118 +++++++++++++++++++++++++++++++++++------
+ 1 files changed, 100 insertions(+), 18 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 2b6875b..71b30ac 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2430,6 +2430,101 @@ fname:
+ convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
+ .endm
+
++.macro bilinear_load_and_vertical_interpolate_two_8888 \
++ acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
++
++ bilinear_load_8888 reg1, reg2, tmp1
++ vmull.u8 acc1, reg1, d28
++ vmlal.u8 acc1, reg2, d29
++ bilinear_load_8888 reg3, reg4, tmp2
++ vmull.u8 acc2, reg3, d28
++ vmlal.u8 acc2, reg4, d29
++.endm
++
++.macro bilinear_load_and_vertical_interpolate_four_8888 \
++ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
++ yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
++
++ bilinear_load_and_vertical_interpolate_two_8888 \
++ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
++ bilinear_load_and_vertical_interpolate_two_8888 \
++ yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
++.endm
++
++.macro bilinear_load_and_vertical_interpolate_two_0565 \
++ acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
++
++ mov TMP2, X, asr #16
++ add X, X, UX
++ mov TMP4, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP2, asl #1
++ add TMP2, BOTTOM, TMP2, asl #1
++ add TMP3, TOP, TMP4, asl #1
++ add TMP4, BOTTOM, TMP4, asl #1
++ vld1.32 {acc2lo[0]}, [TMP1]
++ vld1.32 {acc2hi[0]}, [TMP3]
++ vld1.32 {acc2lo[1]}, [TMP2]
++ vld1.32 {acc2hi[1]}, [TMP4]
++ convert_0565_to_x888 acc2, reg3, reg2, reg1
++ vzip.u8 reg1, reg3
++ vzip.u8 reg2, reg4
++ vzip.u8 reg3, reg4
++ vzip.u8 reg1, reg2
++ vmull.u8 acc1, reg1, d28
++ vmlal.u8 acc1, reg2, d29
++ vmull.u8 acc2, reg3, d28
++ vmlal.u8 acc2, reg4, d29
++.endm
++
++.macro bilinear_load_and_vertical_interpolate_four_0565 \
++ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
++ yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
++
++ mov TMP2, X, asr #16
++ add X, X, UX
++ mov TMP4, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP2, asl #1
++ add TMP2, BOTTOM, TMP2, asl #1
++ add TMP3, TOP, TMP4, asl #1
++ add TMP4, BOTTOM, TMP4, asl #1
++ vld1.32 {xacc2lo[0]}, [TMP1]
++ vld1.32 {xacc2hi[0]}, [TMP3]
++ vld1.32 {xacc2lo[1]}, [TMP2]
++ vld1.32 {xacc2hi[1]}, [TMP4]
++ convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
++ mov TMP2, X, asr #16
++ add X, X, UX
++ mov TMP4, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP2, asl #1
++ add TMP2, BOTTOM, TMP2, asl #1
++ add TMP3, TOP, TMP4, asl #1
++ add TMP4, BOTTOM, TMP4, asl #1
++ vld1.32 {yacc2lo[0]}, [TMP1]
++ vzip.u8 xreg1, xreg3
++ vld1.32 {yacc2hi[0]}, [TMP3]
++ vzip.u8 xreg2, xreg4
++ vld1.32 {yacc2lo[1]}, [TMP2]
++ vzip.u8 xreg3, xreg4
++ vld1.32 {yacc2hi[1]}, [TMP4]
++ vzip.u8 xreg1, xreg2
++ convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
++ vmull.u8 xacc1, xreg1, d28
++ vzip.u8 yreg1, yreg3
++ vmlal.u8 xacc1, xreg2, d29
++ vzip.u8 yreg2, yreg4
++ vmull.u8 xacc2, xreg3, d28
++ vzip.u8 yreg3, yreg4
++ vmlal.u8 xacc2, xreg4, d29
++ vzip.u8 yreg1, yreg2
++ vmull.u8 yacc1, yreg1, d28
++ vmlal.u8 yacc1, yreg2, d29
++ vmull.u8 yacc2, yreg3, d28
++ vmlal.u8 yacc2, yreg4, d29
++.endm
++
+ .macro bilinear_store_8888 numpix, tmp1, tmp2
+ .if numpix == 4
+ vst1.32 {d0, d1}, [OUT]!
+@@ -2477,12 +2572,8 @@ fname:
+ .endm
+
+ .macro bilinear_interpolate_two_pixels src_fmt, dst_fmt
+- bilinear_load_&src_fmt d0, d1, d2
+- vmull.u8 q1, d0, d28
+- vmlal.u8 q1, d1, d29
+- bilinear_load_&src_fmt d20, d21, d22
+- vmull.u8 q11, d20, d28
+- vmlal.u8 q11, d21, d29
++ bilinear_load_and_vertical_interpolate_two_&src_fmt \
++ q1, q11, d0, d1, d20, d21, d22, d23
+ vshr.u16 q15, q12, #8
+ vadd.u16 q12, q12, q13
+ vshll.u16 q0, d2, #8
+@@ -2498,18 +2589,9 @@ fname:
+ .endm
+
+ .macro bilinear_interpolate_four_pixels src_fmt, dst_fmt
+- bilinear_load_&src_fmt d0, d1, d2
+- vmull.u8 q1, d0, d28
+- vmlal.u8 q1, d1, d29
+- bilinear_load_&src_fmt d20, d21, d22
+- vmull.u8 q11, d20, d28
+- vmlal.u8 q11, d21, d29
+- bilinear_load_&src_fmt d4, d5, d6
+- vmull.u8 q3, d4, d28
+- vmlal.u8 q3, d5, d29
+- bilinear_load_&src_fmt d16, d17, d18
+- vmull.u8 q9, d16, d28
+- vmlal.u8 q9, d17, d29
++ bilinear_load_and_vertical_interpolate_four_&src_fmt \
++ q1, q11, d0, d1, d20, d21, d22, d23 \
++ q3, q9, d4, d5, d16, d17, d18, d19
+ pld [TMP1, PF_OFFS]
+ vshr.u16 q15, q12, #8
+ vadd.u16 q12, q12, q13
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0023-In-delegate_-src-dest-_iter_init-call-delegate-direc.patch b/recipes/xorg-lib/pixman-0.21.6/0023-In-delegate_-src-dest-_iter_init-call-delegate-direc.patch
new file mode 100644
index 0000000..96343f1
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0023-In-delegate_-src-dest-_iter_init-call-delegate-direc.patch
@@ -0,0 +1,54 @@
+From be4eaa0e4f79af38b7b89c5b09ca88d3a88d9396 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Sat, 12 Mar 2011 19:06:02 -0500
+Subject: [PATCH 23/40] In delegate_{src,dest}_iter_init() call delegate directly.
+
+There is no reason to go through
+_pixman_implementation_{src,dest}_iter_init(), especially since
+_pixman_implementation_src_iter_init() is doing various other checks
+that only need to be done once.
+
+Also call delegate->src_iter_init() directly in pixman-sse2.c
+---
+ pixman/pixman-implementation.c | 4 ++--
+ pixman/pixman-sse2.c | 2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c
+index adaf9c6..892767e 100644
+--- a/pixman/pixman-implementation.c
++++ b/pixman/pixman-implementation.c
+@@ -122,7 +122,7 @@ delegate_src_iter_init (pixman_implementation_t *imp,
+ uint8_t * buffer,
+ iter_flags_t flags)
+ {
+- _pixman_implementation_src_iter_init (
++ imp->delegate->src_iter_init (
+ imp->delegate, iter, image, x, y, width, height, buffer, flags);
+ }
+
+@@ -137,7 +137,7 @@ delegate_dest_iter_init (pixman_implementation_t *imp,
+ uint8_t * buffer,
+ iter_flags_t flags)
+ {
+- _pixman_implementation_dest_iter_init (
++ imp->delegate->dest_iter_init (
+ imp->delegate, iter, image, x, y, width, height, buffer, flags);
+ }
+
+diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
+index 696005f..d4a34e9 100644
+--- a/pixman/pixman-sse2.c
++++ b/pixman/pixman-sse2.c
+@@ -6013,7 +6013,7 @@ sse2_src_iter_init (pixman_implementation_t *imp,
+ }
+ }
+
+- _pixman_implementation_src_iter_init (
++ imp->delegate->src_iter_init (
+ imp->delegate, iter, image, x, y, width, height, buffer, flags);
+ }
+
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0024-Fill-out-parts-of-iters-in-_pixman_implementation_-s.patch b/recipes/xorg-lib/pixman-0.21.6/0024-Fill-out-parts-of-iters-in-_pixman_implementation_-s.patch
new file mode 100644
index 0000000..44fd38a
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0024-Fill-out-parts-of-iters-in-_pixman_implementation_-s.patch
@@ -0,0 +1,111 @@
+From 74d0f44b6d6d613d24541b849835da0464cc6fd0 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Sat, 12 Mar 2011 19:12:35 -0500
+Subject: [PATCH 24/40] Fill out parts of iters in _pixman_implementation_{src,dest}_iter_init()
+
+This makes _pixman_implementation_{src,dest}_iter_init() responsible
+for filling parts of the information in the iterators. Specifically,
+the information passed as arguments is stored in the iterator.
+
+Also add a height field to pixman_iter_t().
+---
+ pixman/pixman-general.c | 6 ------
+ pixman/pixman-implementation.c | 16 ++++++++++++++++
+ pixman/pixman-private.h | 11 ++++++++---
+ pixman/pixman-sse2.c | 2 --
+ 4 files changed, 24 insertions(+), 11 deletions(-)
+
+diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
+index 872fb7e..1a0fa7c 100644
+--- a/pixman/pixman-general.c
++++ b/pixman/pixman-general.c
+@@ -44,12 +44,6 @@ general_src_iter_init (pixman_implementation_t *imp,
+ int x, int y, int width, int height,
+ uint8_t *buffer, iter_flags_t flags)
+ {
+- iter->image = image;
+- iter->x = x;
+- iter->y = y;
+- iter->width = width;
+- iter->buffer = (uint32_t *)buffer;
+-
+ if (image->type == SOLID)
+ {
+ _pixman_solid_fill_iter_init (
+diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c
+index 892767e..bdd4543 100644
+--- a/pixman/pixman-implementation.c
++++ b/pixman/pixman-implementation.c
+@@ -274,6 +274,14 @@ _pixman_implementation_src_iter_init (pixman_implementation_t *imp,
+ uint8_t *buffer,
+ iter_flags_t flags)
+ {
++ iter->image = image;
++ iter->buffer = (uint32_t *)buffer;
++ iter->x = x;
++ iter->y = y;
++ iter->width = width;
++ iter->height = height;
++ iter->flags = flags;
++
+ if (!image)
+ {
+ iter->get_scanline = get_scanline_null;
+@@ -301,6 +309,14 @@ _pixman_implementation_dest_iter_init (pixman_implementation_t *imp,
+ uint8_t *buffer,
+ iter_flags_t flags)
+ {
++ iter->image = image;
++ iter->buffer = (uint32_t *)buffer;
++ iter->x = x;
++ iter->y = y;
++ iter->width = width;
++ iter->height = height;
++ iter->flags = flags;
++
+ (*imp->dest_iter_init) (
+ imp, iter, image, x, y, width, height, buffer, flags);
+ }
+diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
+index 1473dc4..ea9545f 100644
+--- a/pixman/pixman-private.h
++++ b/pixman/pixman-private.h
+@@ -212,14 +212,19 @@ typedef enum
+
+ struct pixman_iter_t
+ {
+- pixman_iter_get_scanline_t get_scanline;
+- pixman_iter_write_back_t write_back;
+-
++ /* These are initialized by _pixman_implementation_{src,dest}_init */
+ pixman_image_t * image;
+ uint32_t * buffer;
+ int x, y;
+ int width;
++ int height;
++ iter_flags_t flags;
++
++ /* These function pointers are initialized by the implementation */
++ pixman_iter_get_scanline_t get_scanline;
++ pixman_iter_write_back_t write_back;
+
++ /* These fields are scratch data that implementations can use */
+ uint8_t * bits;
+ int stride;
+ };
+diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
+index d4a34e9..43a6bf2 100644
+--- a/pixman/pixman-sse2.c
++++ b/pixman/pixman-sse2.c
+@@ -6004,8 +6004,6 @@ sse2_src_iter_init (pixman_implementation_t *imp,
+
+ iter->bits = b + s * y + x * PIXMAN_FORMAT_BPP (f->format) / 8;
+ iter->stride = s;
+- iter->width = width;
+- iter->buffer = (uint32_t *)buffer;
+
+ iter->get_scanline = f->get_scanline;
+ return;
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0025-Simplify-the-prototype-for-iterator-initializers.patch b/recipes/xorg-lib/pixman-0.21.6/0025-Simplify-the-prototype-for-iterator-initializers.patch
new file mode 100644
index 0000000..1bfd6b4
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0025-Simplify-the-prototype-for-iterator-initializers.patch
@@ -0,0 +1,442 @@
+From 6b27768d81c254a4f1d05473157328d5a5d99b9c Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Sat, 12 Mar 2011 19:42:58 -0500
+Subject: [PATCH 25/40] Simplify the prototype for iterator initializers.
+
+All of the information previously passed to the iterator initializers
+is now available in the iterator itself, so there is no need to pass
+it as arguments anymore.
+---
+ pixman/pixman-bits-image.c | 20 +++++---------
+ pixman/pixman-conical-gradient.c | 7 +---
+ pixman/pixman-general.c | 52 ++++++++------------------------------
+ pixman/pixman-implementation.c | 30 ++++-----------------
+ pixman/pixman-linear-gradient.c | 16 +++--------
+ pixman/pixman-private.h | 40 ++++++-----------------------
+ pixman/pixman-radial-gradient.c | 7 +---
+ pixman/pixman-solid-fill.c | 17 +++++-------
+ pixman/pixman-sse2.c | 25 +++++++++--------
+ 9 files changed, 61 insertions(+), 153 deletions(-)
+
+diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
+index a865d71..835ecfb 100644
+--- a/pixman/pixman-bits-image.c
++++ b/pixman/pixman-bits-image.c
+@@ -1362,12 +1362,9 @@ src_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+ }
+
+ void
+-_pixman_bits_image_src_iter_init (pixman_image_t *image,
+- pixman_iter_t *iter,
+- int x, int y, int width, int height,
+- uint8_t *buffer, iter_flags_t flags)
++_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+ {
+- if (flags & ITER_NARROW)
++ if (iter->flags & ITER_NARROW)
+ iter->get_scanline = src_get_scanline_narrow;
+ else
+ iter->get_scanline = src_get_scanline_wide;
+@@ -1472,28 +1469,25 @@ dest_write_back_direct (pixman_iter_t *iter)
+ }
+
+ void
+-_pixman_bits_image_dest_iter_init (pixman_image_t *image,
+- pixman_iter_t *iter,
+- int x, int y, int width, int height,
+- uint8_t *buffer, iter_flags_t flags)
++_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+ {
+- if (flags & ITER_NARROW)
++ if (iter->flags & ITER_NARROW)
+ {
+ if (((image->common.flags &
+ (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_NO_ACCESSORS)) ==
+ (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_NO_ACCESSORS)) &&
+ (image->bits.format == PIXMAN_a8r8g8b8 ||
+ (image->bits.format == PIXMAN_x8r8g8b8 &&
+- (flags & ITER_LOCALIZED_ALPHA))))
++ (iter->flags & ITER_LOCALIZED_ALPHA))))
+ {
+- iter->buffer = image->bits.bits + y * image->bits.rowstride + x;
++ iter->buffer = image->bits.bits + iter->y * image->bits.rowstride + iter->x;
+
+ iter->get_scanline = _pixman_iter_get_scanline_noop;
+ iter->write_back = dest_write_back_direct;
+ }
+ else
+ {
+- if ((flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
++ if ((iter->flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
+ (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
+ {
+ iter->get_scanline = _pixman_iter_get_scanline_noop;
+diff --git a/pixman/pixman-conical-gradient.c b/pixman/pixman-conical-gradient.c
+index 9d7d2e8..791d4f3 100644
+--- a/pixman/pixman-conical-gradient.c
++++ b/pixman/pixman-conical-gradient.c
+@@ -171,12 +171,9 @@ conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+ }
+
+ void
+-_pixman_conical_gradient_iter_init (pixman_image_t *image,
+- pixman_iter_t *iter,
+- int x, int y, int width, int height,
+- uint8_t *buffer, iter_flags_t flags)
++_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+ {
+- if (flags & ITER_NARROW)
++ if (iter->flags & ITER_NARROW)
+ iter->get_scanline = conical_get_scanline_narrow;
+ else
+ iter->get_scanline = conical_get_scanline_wide;
+diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
+index 1a0fa7c..727affc 100644
+--- a/pixman/pixman-general.c
++++ b/pixman/pixman-general.c
+@@ -38,60 +38,30 @@
+ #include "pixman-private.h"
+
+ static void
+-general_src_iter_init (pixman_implementation_t *imp,
+- pixman_iter_t *iter,
+- pixman_image_t *image,
+- int x, int y, int width, int height,
+- uint8_t *buffer, iter_flags_t flags)
++general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+ {
++ pixman_image_t *image = iter->image;
++
+ if (image->type == SOLID)
+- {
+- _pixman_solid_fill_iter_init (
+- image, iter, x, y, width, height, buffer, flags);
+- }
++ _pixman_solid_fill_iter_init (image, iter);
+ else if (image->type == LINEAR)
+- {
+- _pixman_linear_gradient_iter_init (
+- image, iter, x, y, width, height, buffer, flags);
+- }
++ _pixman_linear_gradient_iter_init (image, iter);
+ else if (image->type == RADIAL)
+- {
+- _pixman_radial_gradient_iter_init (
+- image, iter, x, y, width, height, buffer, flags);
+- }
++ _pixman_radial_gradient_iter_init (image, iter);
+ else if (image->type == CONICAL)
+- {
+- _pixman_conical_gradient_iter_init (
+- image, iter, x, y, width, height, buffer, flags);
+- }
++ _pixman_conical_gradient_iter_init (image, iter);
+ else if (image->type == BITS)
+- {
+- _pixman_bits_image_src_iter_init (
+- image, iter, x, y, width, height, buffer, flags);
+- }
++ _pixman_bits_image_src_iter_init (image, iter);
+ else
+- {
+ _pixman_log_error (FUNC, "Pixman bug: unknown image type\n");
+- }
+ }
+
+ static void
+-general_dest_iter_init (pixman_implementation_t *imp,
+- pixman_iter_t *iter,
+- pixman_image_t *image,
+- int x, int y, int width, int height,
+- uint8_t *buffer, iter_flags_t flags)
++general_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+ {
+- iter->image = image;
+- iter->x = x;
+- iter->y = y;
+- iter->width = width;
+- iter->buffer = (uint32_t *)buffer;
+-
+- if (image->type == BITS)
++ if (iter->image->type == BITS)
+ {
+- _pixman_bits_image_dest_iter_init (
+- image, iter, x, y, width, height, buffer, flags);
++ _pixman_bits_image_dest_iter_init (iter->image, iter);
+ }
+ else
+ {
+diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c
+index bdd4543..f1d3f99 100644
+--- a/pixman/pixman-implementation.c
++++ b/pixman/pixman-implementation.c
+@@ -113,32 +113,16 @@ delegate_fill (pixman_implementation_t *imp,
+
+ static void
+ delegate_src_iter_init (pixman_implementation_t *imp,
+- pixman_iter_t * iter,
+- pixman_image_t * image,
+- int x,
+- int y,
+- int width,
+- int height,
+- uint8_t * buffer,
+- iter_flags_t flags)
++ pixman_iter_t * iter)
+ {
+- imp->delegate->src_iter_init (
+- imp->delegate, iter, image, x, y, width, height, buffer, flags);
++ imp->delegate->src_iter_init (imp->delegate, iter);
+ }
+
+ static void
+ delegate_dest_iter_init (pixman_implementation_t *imp,
+- pixman_iter_t * iter,
+- pixman_image_t * image,
+- int x,
+- int y,
+- int width,
+- int height,
+- uint8_t * buffer,
+- iter_flags_t flags)
++ pixman_iter_t * iter)
+ {
+- imp->delegate->dest_iter_init (
+- imp->delegate, iter, image, x, y, width, height, buffer, flags);
++ imp->delegate->dest_iter_init (imp->delegate, iter);
+ }
+
+ pixman_implementation_t *
+@@ -293,8 +277,7 @@ _pixman_implementation_src_iter_init (pixman_implementation_t *imp,
+ }
+ else
+ {
+- (*imp->src_iter_init) (
+- imp, iter, image, x, y, width, height, buffer, flags);
++ (*imp->src_iter_init) (imp, iter);
+ }
+ }
+
+@@ -317,6 +300,5 @@ _pixman_implementation_dest_iter_init (pixman_implementation_t *imp,
+ iter->height = height;
+ iter->flags = flags;
+
+- (*imp->dest_iter_init) (
+- imp, iter, image, x, y, width, height, buffer, flags);
++ (*imp->dest_iter_init) (imp, iter);
+ }
+diff --git a/pixman/pixman-linear-gradient.c b/pixman/pixman-linear-gradient.c
+index 07303fc..6e1ea24 100644
+--- a/pixman/pixman-linear-gradient.c
++++ b/pixman/pixman-linear-gradient.c
+@@ -233,18 +233,12 @@ linear_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+ }
+
+ void
+-_pixman_linear_gradient_iter_init (pixman_image_t *image,
+- pixman_iter_t *iter,
+- int x,
+- int y,
+- int width,
+- int height,
+- uint8_t *buffer,
+- iter_flags_t flags)
++_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+ {
+- if (linear_gradient_is_horizontal (image, x, y, width, height))
++ if (linear_gradient_is_horizontal (
++ iter->image, iter->x, iter->y, iter->width, iter->height))
+ {
+- if (flags & ITER_NARROW)
++ if (iter->flags & ITER_NARROW)
+ linear_get_scanline_narrow (iter, NULL);
+ else
+ linear_get_scanline_wide (iter, NULL);
+@@ -253,7 +247,7 @@ _pixman_linear_gradient_iter_init (pixman_image_t *image,
+ }
+ else
+ {
+- if (flags & ITER_NARROW)
++ if (iter->flags & ITER_NARROW)
+ iter->get_scanline = linear_get_scanline_narrow;
+ else
+ iter->get_scanline = linear_get_scanline_wide;
+diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
+index ea9545f..60060a9 100644
+--- a/pixman/pixman-private.h
++++ b/pixman/pixman-private.h
+@@ -233,39 +233,22 @@ void
+ _pixman_bits_image_setup_accessors (bits_image_t *image);
+
+ void
+-_pixman_bits_image_src_iter_init (pixman_image_t *image,
+- pixman_iter_t *iter,
+- int x, int y, int width, int height,
+- uint8_t *buffer, iter_flags_t flags);
++_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter);
++
+ void
+-_pixman_bits_image_dest_iter_init (pixman_image_t *image,
+- pixman_iter_t *iter,
+- int x, int y, int width, int height,
+- uint8_t *buffer, iter_flags_t flags);
++_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter);
+
+ void
+-_pixman_solid_fill_iter_init (pixman_image_t *image,
+- pixman_iter_t *iter,
+- int x, int y, int width, int height,
+- uint8_t *buffer, iter_flags_t flags);
++_pixman_solid_fill_iter_init (pixman_image_t *image, pixman_iter_t *iter);
+
+ void
+-_pixman_linear_gradient_iter_init (pixman_image_t *image,
+- pixman_iter_t *iter,
+- int x, int y, int width, int height,
+- uint8_t *buffer, iter_flags_t flags);
++_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter);
+
+ void
+-_pixman_radial_gradient_iter_init (pixman_image_t *image,
+- pixman_iter_t *iter,
+- int x, int y, int width, int height,
+- uint8_t *buffer, iter_flags_t flags);
++_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter);
+
+ void
+-_pixman_conical_gradient_iter_init (pixman_image_t *image,
+- pixman_iter_t *iter,
+- int x, int y, int width, int height,
+- uint8_t *buffer, iter_flags_t flags);
++_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter);
+
+ pixman_image_t *
+ _pixman_image_allocate (void);
+@@ -413,14 +396,7 @@ typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp,
+ int height,
+ uint32_t xor);
+ typedef void (*pixman_iter_init_func_t) (pixman_implementation_t *imp,
+- pixman_iter_t *iter,
+- pixman_image_t *image,
+- int x,
+- int y,
+- int width,
+- int height,
+- uint8_t *buffer,
+- iter_flags_t flags);
++ pixman_iter_t *iter);
+
+ void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp);
+ void _pixman_setup_combiner_functions_64 (pixman_implementation_t *imp);
+diff --git a/pixman/pixman-radial-gradient.c b/pixman/pixman-radial-gradient.c
+index 6523b82..5e9fd73 100644
+--- a/pixman/pixman-radial-gradient.c
++++ b/pixman/pixman-radial-gradient.c
+@@ -400,12 +400,9 @@ radial_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+ }
+
+ void
+-_pixman_radial_gradient_iter_init (pixman_image_t *image,
+- pixman_iter_t *iter,
+- int x, int y, int width, int height,
+- uint8_t *buffer, iter_flags_t flags)
++_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+ {
+- if (flags & ITER_NARROW)
++ if (iter->flags & ITER_NARROW)
+ iter->get_scanline = radial_get_scanline_narrow;
+ else
+ iter->get_scanline = radial_get_scanline_wide;
+diff --git a/pixman/pixman-solid-fill.c b/pixman/pixman-solid-fill.c
+index 67681f2..852e135 100644
+--- a/pixman/pixman-solid-fill.c
++++ b/pixman/pixman-solid-fill.c
+@@ -27,24 +27,21 @@
+ #include "pixman-private.h"
+
+ void
+-_pixman_solid_fill_iter_init (pixman_image_t *image,
+- pixman_iter_t *iter,
+- int x, int y, int width, int height,
+- uint8_t *buffer, iter_flags_t flags)
++_pixman_solid_fill_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+ {
+- if (flags & ITER_NARROW)
++ if (iter->flags & ITER_NARROW)
+ {
+- uint32_t *b = (uint32_t *)buffer;
+- uint32_t *e = b + width;
+- uint32_t color = image->solid.color_32;
++ uint32_t *b = (uint32_t *)iter->buffer;
++ uint32_t *e = b + iter->width;
++ uint32_t color = iter->image->solid.color_32;
+
+ while (b < e)
+ *(b++) = color;
+ }
+ else
+ {
+- uint64_t *b = (uint64_t *)buffer;
+- uint64_t *e = b + width;
++ uint64_t *b = (uint64_t *)iter->buffer;
++ uint64_t *e = b + iter->width;
+ uint64_t color = image->solid.color_64;
+
+ while (b < e)
+diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
+index 43a6bf2..533b858 100644
+--- a/pixman/pixman-sse2.c
++++ b/pixman/pixman-sse2.c
+@@ -5978,19 +5978,21 @@ static const fetcher_info_t fetchers[] =
+ };
+
+ static void
+-sse2_src_iter_init (pixman_implementation_t *imp,
+- pixman_iter_t *iter,
+- pixman_image_t *image,
+- int x, int y, int width, int height,
+- uint8_t *buffer, iter_flags_t flags)
++sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+ {
++ pixman_image_t *image = iter->image;
++ int x = iter->x;
++ int y = iter->y;
++ int width = iter->width;
++ int height = iter->height;
++
+ #define FLAGS \
+ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
+
+- if ((flags & ITER_NARROW) &&
+- (image->common.flags & FLAGS) == FLAGS &&
+- x >= 0 && y >= 0 &&
+- x + width <= image->bits.width &&
++ if ((iter->flags & ITER_NARROW) &&
++ (image->common.flags & FLAGS) == FLAGS &&
++ x >= 0 && y >= 0 &&
++ x + width <= image->bits.width &&
+ y + height <= image->bits.height)
+ {
+ const fetcher_info_t *f;
+@@ -6002,7 +6004,7 @@ sse2_src_iter_init (pixman_implementation_t *imp,
+ uint8_t *b = (uint8_t *)image->bits.bits;
+ int s = image->bits.rowstride * 4;
+
+- iter->bits = b + s * y + x * PIXMAN_FORMAT_BPP (f->format) / 8;
++ iter->bits = b + s * iter->y + x * PIXMAN_FORMAT_BPP (f->format) / 8;
+ iter->stride = s;
+
+ iter->get_scanline = f->get_scanline;
+@@ -6011,8 +6013,7 @@ sse2_src_iter_init (pixman_implementation_t *imp,
+ }
+ }
+
+- imp->delegate->src_iter_init (
+- imp->delegate, iter, image, x, y, width, height, buffer, flags);
++ imp->delegate->src_iter_init (imp->delegate, iter);
+ }
+
+ #if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0026-test-Randomize-some-tests-if-PIXMAN_RANDOMIZE_TESTS-.patch b/recipes/xorg-lib/pixman-0.21.6/0026-test-Randomize-some-tests-if-PIXMAN_RANDOMIZE_TESTS-.patch
new file mode 100644
index 0000000..8fc5b77
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0026-test-Randomize-some-tests-if-PIXMAN_RANDOMIZE_TESTS-.patch
@@ -0,0 +1,187 @@
+From 7eb0abb5e819046537b9f809c7ec332c6679c557 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Mon, 14 Mar 2011 14:56:22 -0400
+Subject: [PATCH 26/40] test: Randomize some tests if PIXMAN_RANDOMIZE_TESTS is set
+
+This patch makes so that composite and stress-test will start from a
+random seed if the PIXMAN_RANDOMIZE_TESTS environment variable is
+set. Running the test suite in this mode is useful to get more test
+coverage.
+
+Also, in stress-test.c make it so that setting the initial seed causes
+threads to be turned off. This makes it much easier to see when
+something fails.
+---
+ test/composite.c | 17 ++++++++++++-----
+ test/stress-test.c | 37 ++++++++++++++++++++++++++-----------
+ test/utils.c | 10 ++++++++++
+ test/utils.h | 3 +++
+ 4 files changed, 51 insertions(+), 16 deletions(-)
+
+diff --git a/test/composite.c b/test/composite.c
+index a86e5ed..e6d52b9 100644
+--- a/test/composite.c
++++ b/test/composite.c
+@@ -868,7 +868,7 @@ main (int argc, char **argv)
+ {
+ #define N_TESTS (8 * 1024 * 1024)
+ int result = 0;
+- int i;
++ uint32_t i;
+
+ if (argc > 1)
+ {
+@@ -890,15 +890,22 @@ main (int argc, char **argv)
+ }
+ }
+
++ uint32_t seed;
++
++ if (getenv ("PIXMAN_RANDOMIZE_TESTS"))
++ seed = get_random_seed();
++ else
++ seed = 1;
++
+ #ifdef USE_OPENMP
+-# pragma omp parallel for default(none) shared(result) shared(argv)
++# pragma omp parallel for default(none) shared(result, argv, seed)
+ #endif
+- for (i = 1; i <= N_TESTS; ++i)
++ for (i = seed; i <= N_TESTS; ++i)
+ {
+ if (!result && !run_test (i))
+ {
+- printf ("Test %d failed.\n", i);
+-
++ printf ("Test 0x%08X failed.\n", i);
++
+ result = i;
+ }
+ }
+diff --git a/test/stress-test.c b/test/stress-test.c
+index 166dc6d..d496f93 100644
+--- a/test/stress-test.c
++++ b/test/stress-test.c
+@@ -1,4 +1,6 @@
++#include <stdio.h>
+ #include "utils.h"
++#include <sys/types.h>
+
+ #if 0
+ #define fence_malloc malloc
+@@ -730,11 +732,17 @@ static const pixman_op_t op_list[] =
+ };
+
+ static void
+-run_test (uint32_t seed)
++run_test (uint32_t seed, pixman_bool_t verbose, uint32_t mod)
+ {
+ pixman_image_t *source, *mask, *dest;
+ pixman_op_t op;
+
++ if (verbose)
++ {
++ if (mod == 0 || (seed % mod) == 0)
++ printf ("Seed 0x%08x\n", seed);
++ }
++
+ lcg_srand (seed);
+
+ source = create_random_image ();
+@@ -787,6 +795,7 @@ main (int argc, char **argv)
+ uint32_t seed = 1;
+ uint32_t n_tests = 0xffffffff;
+ uint32_t mod = 0;
++ pixman_bool_t use_threads = TRUE;
+ uint32_t i;
+
+ pixman_disable_out_of_bounds_workaround ();
+@@ -811,6 +820,7 @@ main (int argc, char **argv)
+ else if (strcmp (argv[i], "-s") == 0 && i + 1 < argc)
+ {
+ get_int (argv[i + 1], &seed);
++ use_threads = FALSE;
+ i++;
+ }
+ else if (strcmp (argv[i], "-n") == 0 && i + 1 < argc)
+@@ -825,7 +835,7 @@ main (int argc, char **argv)
+
+ printf ("Options:\n\n"
+ "-n <number> Number of tests to run\n"
+- "-s <seed> Seed of first test\n"
++ "-s <seed> Seed of first test (ignored if PIXMAN_RANDOMIZE_TESTS is set)\n"
+ "-v Print out seeds\n"
+ "-v <n> Print out every n'th seed\n\n");
+
+@@ -836,19 +846,24 @@ main (int argc, char **argv)
+ if (n_tests == 0xffffffff)
+ n_tests = 8000;
+
+- /* FIXME: seed 2005763 fails in set_lum() with divide by zero */
++ if (getenv ("PIXMAN_RANDOMIZE_TESTS"))
++ {
++ seed = get_random_seed();
++ printf ("First seed: 0x%08x\n", seed);
++ }
++
++ if (use_threads)
++ {
+ #ifdef USE_OPENMP
+ # pragma omp parallel for default(none) shared(verbose, n_tests, mod, seed)
+ #endif
+- for (i = seed; i < seed + n_tests; ++i)
++ for (i = seed; i < seed + n_tests; ++i)
++ run_test (i, verbose, mod);
++ }
++ else
+ {
+- if (verbose)
+- {
+- if (mod == 0 || (i % mod) == 0)
+- printf ("Seed %d\n", i);
+- }
+-
+- run_test (i);
++ for (i = seed; i < seed + n_tests; ++i)
++ run_test (i, verbose, mod);
+ }
+
+ return 0;
+diff --git a/test/utils.c b/test/utils.c
+index 4bf02e1..56701c4 100644
+--- a/test/utils.c
++++ b/test/utils.c
+@@ -455,6 +455,16 @@ gettime (void)
+ #endif
+ }
+
++uint32_t
++get_random_seed (void)
++{
++ double d = gettime();
++
++ lcg_srand (*(uint32_t *)&d);
++
++ return lcg_rand_u32 ();
++}
++
+ static const char *global_msg;
+
+ static void
+diff --git a/test/utils.h b/test/utils.h
+index a5183f7..615ad78 100644
+--- a/test/utils.h
++++ b/test/utils.h
+@@ -79,6 +79,9 @@ make_random_bytes (int n_bytes);
+ double
+ gettime (void);
+
++uint32_t
++get_random_seed (void);
++
+ /* main body of the fuzzer test */
+ int
+ fuzzer_test_main (const char *test_name,
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0027-Add-simple-support-for-the-r8g8b8a8-and-r8g8b8x8-for.patch b/recipes/xorg-lib/pixman-0.21.6/0027-Add-simple-support-for-the-r8g8b8a8-and-r8g8b8x8-for.patch
new file mode 100644
index 0000000..1dbac60
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0027-Add-simple-support-for-the-r8g8b8a8-and-r8g8b8x8-for.patch
@@ -0,0 +1,206 @@
+From f05a90e5f8d1d0af60e2c684cbe9f1327c33135a Mon Sep 17 00:00:00 2001
+From: Alexandros Frantzis <alexandros.frantzis@linaro.org>
+Date: Fri, 18 Mar 2011 14:36:15 +0200
+Subject: [PATCH 27/40] Add simple support for the r8g8b8a8 and r8g8b8x8 formats.
+
+This format is particularly useful on big-endian architectures, where RGBA in
+memory/file order corresponds to r8g8b8a8 as an uint32_t. This is important
+because RGBA is in some cases the only available choice (for example as a pixel
+format in OpenGL ES 2.0).
+---
+ pixman/pixman-access.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman.c | 6 +++
+ pixman/pixman.h | 6 ++-
+ 3 files changed, 108 insertions(+), 1 deletions(-)
+
+diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
+index f1ce0ba..32c4d8b 100644
+--- a/pixman/pixman-access.c
++++ b/pixman/pixman-access.c
+@@ -211,6 +211,46 @@ fetch_scanline_b8g8r8x8 (pixman_image_t *image,
+ }
+
+ static void
++fetch_scanline_r8g8b8a8 (pixman_image_t *image,
++ int x,
++ int y,
++ int width,
++ uint32_t * buffer,
++ const uint32_t *mask)
++{
++ const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
++ const uint32_t *pixel = (uint32_t *)bits + x;
++ const uint32_t *end = pixel + width;
++
++ while (pixel < end)
++ {
++ uint32_t p = READ (image, pixel++);
++
++ *buffer++ = (((p & 0x000000ff) << 24) | (p >> 8));
++ }
++}
++
++static void
++fetch_scanline_r8g8b8x8 (pixman_image_t *image,
++ int x,
++ int y,
++ int width,
++ uint32_t * buffer,
++ const uint32_t *mask)
++{
++ const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
++ const uint32_t *pixel = (uint32_t *)bits + x;
++ const uint32_t *end = pixel + width;
++
++ while (pixel < end)
++ {
++ uint32_t p = READ (image, pixel++);
++
++ *buffer++ = (0xff000000 | (p >> 8));
++ }
++}
++
++static void
+ fetch_scanline_x14r6g6b6 (pixman_image_t *image,
+ int x,
+ int y,
+@@ -1292,6 +1332,28 @@ fetch_pixel_b8g8r8x8 (bits_image_t *image,
+ }
+
+ static uint32_t
++fetch_pixel_r8g8b8a8 (bits_image_t *image,
++ int offset,
++ int line)
++{
++ uint32_t *bits = image->bits + line * image->rowstride;
++ uint32_t pixel = READ (image, (uint32_t *)bits + offset);
++
++ return (((pixel & 0x000000ff) << 24) | (pixel >> 8));
++}
++
++static uint32_t
++fetch_pixel_r8g8b8x8 (bits_image_t *image,
++ int offset,
++ int line)
++{
++ uint32_t *bits = image->bits + line * image->rowstride;
++ uint32_t pixel = READ (image, (uint32_t *)bits + offset);
++
++ return (0xff000000 | (pixel >> 8));
++}
++
++static uint32_t
+ fetch_pixel_x14r6g6b6 (bits_image_t *image,
+ int offset,
+ int line)
+@@ -2028,6 +2090,39 @@ store_scanline_b8g8r8x8 (bits_image_t * image,
+ }
+
+ static void
++store_scanline_r8g8b8a8 (bits_image_t * image,
++ int x,
++ int y,
++ int width,
++ const uint32_t *values)
++{
++ uint32_t *bits = image->bits + image->rowstride * y;
++ uint32_t *pixel = (uint32_t *)bits + x;
++ int i;
++
++ for (i = 0; i < width; ++i)
++ {
++ WRITE (image, pixel++,
++ ((values[i] >> 24) & 0x000000ff) | (values[i] << 8));
++ }
++}
++
++static void
++store_scanline_r8g8b8x8 (bits_image_t * image,
++ int x,
++ int y,
++ int width,
++ const uint32_t *values)
++{
++ uint32_t *bits = image->bits + image->rowstride * y;
++ uint32_t *pixel = (uint32_t *)bits + x;
++ int i;
++
++ for (i = 0; i < width; ++i)
++ WRITE (image, pixel++, (values[i] << 8));
++}
++
++static void
+ store_scanline_x14r6g6b6 (bits_image_t * image,
+ int x,
+ int y,
+@@ -2845,6 +2940,8 @@ static const format_info_t accessors[] =
+ FORMAT_INFO (x8b8g8r8),
+ FORMAT_INFO (b8g8r8a8),
+ FORMAT_INFO (b8g8r8x8),
++ FORMAT_INFO (r8g8b8a8),
++ FORMAT_INFO (r8g8b8x8),
+ FORMAT_INFO (x14r6g6b6),
+
+ /* 24bpp formats */
+diff --git a/pixman/pixman.c b/pixman/pixman.c
+index ec565f9..f21af2f 100644
+--- a/pixman/pixman.c
++++ b/pixman/pixman.c
+@@ -873,6 +873,8 @@ color_to_pixel (pixman_color_t * color,
+ format == PIXMAN_x8b8g8r8 ||
+ format == PIXMAN_b8g8r8a8 ||
+ format == PIXMAN_b8g8r8x8 ||
++ format == PIXMAN_r8g8b8a8 ||
++ format == PIXMAN_r8g8b8x8 ||
+ format == PIXMAN_r5g6b5 ||
+ format == PIXMAN_b5g6r5 ||
+ format == PIXMAN_a8 ||
+@@ -895,6 +897,8 @@ color_to_pixel (pixman_color_t * color,
+ ((c & 0x0000ff00) << 8) |
+ ((c & 0x000000ff) << 24);
+ }
++ if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_RGBA)
++ c = ((c & 0xff000000) >> 24) | (c << 8);
+
+ if (format == PIXMAN_a1)
+ c = c >> 31;
+@@ -1105,6 +1109,8 @@ pixman_format_supported_source (pixman_format_code_t format)
+ case PIXMAN_x8b8g8r8:
+ case PIXMAN_b8g8r8a8:
+ case PIXMAN_b8g8r8x8:
++ case PIXMAN_r8g8b8a8:
++ case PIXMAN_r8g8b8x8:
+ case PIXMAN_r8g8b8:
+ case PIXMAN_b8g8r8:
+ case PIXMAN_r5g6b5:
+diff --git a/pixman/pixman.h b/pixman/pixman.h
+index 1305bc1..59d0760 100644
+--- a/pixman/pixman.h
++++ b/pixman/pixman.h
+@@ -650,11 +650,13 @@ struct pixman_indexed
+ #define PIXMAN_TYPE_YUY2 6
+ #define PIXMAN_TYPE_YV12 7
+ #define PIXMAN_TYPE_BGRA 8
++#define PIXMAN_TYPE_RGBA 9
+
+ #define PIXMAN_FORMAT_COLOR(f) \
+ (PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ARGB || \
+ PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ABGR || \
+- PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_BGRA)
++ PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_BGRA || \
++ PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA)
+
+ /* 32bpp formats */
+ typedef enum {
+@@ -664,6 +666,8 @@ typedef enum {
+ PIXMAN_x8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,8,8,8),
+ PIXMAN_b8g8r8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,8,8,8,8),
+ PIXMAN_b8g8r8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,0,8,8,8),
++ PIXMAN_r8g8b8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,8,8,8,8),
++ PIXMAN_r8g8b8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,0,8,8,8),
+ PIXMAN_x14r6g6b6 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,6,6,6),
+ PIXMAN_x2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,10,10,10),
+ PIXMAN_a2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,2,10,10,10),
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0028-Add-support-for-the-r8g8b8a8-and-r8g8b8x8-formats-to.patch b/recipes/xorg-lib/pixman-0.21.6/0028-Add-support-for-the-r8g8b8a8-and-r8g8b8x8-formats-to.patch
new file mode 100644
index 0000000..7809e2a
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0028-Add-support-for-the-r8g8b8a8-and-r8g8b8x8-formats-to.patch
@@ -0,0 +1,110 @@
+From b514e63cfc58af21f7097db5a1b04292a758782a Mon Sep 17 00:00:00 2001
+From: Alexandros Frantzis <alexandros.frantzis@linaro.org>
+Date: Fri, 18 Mar 2011 14:37:27 +0200
+Subject: [PATCH 28/40] Add support for the r8g8b8a8 and r8g8b8x8 formats to the tests.
+
+---
+ test/blitters-test.c | 9 +++++++--
+ test/composite.c | 9 +++++++++
+ test/fetch-test.c | 10 ++++++++++
+ test/stress-test.c | 2 ++
+ 4 files changed, 28 insertions(+), 2 deletions(-)
+
+diff --git a/test/blitters-test.c b/test/blitters-test.c
+index 63e7cb3..3ecfb09 100644
+--- a/test/blitters-test.c
++++ b/test/blitters-test.c
+@@ -88,8 +88,11 @@ free_random_image (uint32_t initcrc,
+ uint32_t *data = pixman_image_get_data (img);
+ uint32_t mask = (1 << PIXMAN_FORMAT_DEPTH (fmt)) - 1;
+
+- if (PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_BGRA)
++ if (PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_BGRA ||
++ PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_RGBA)
++ {
+ mask <<= (PIXMAN_FORMAT_BPP (fmt) - PIXMAN_FORMAT_DEPTH (fmt));
++ }
+
+ for (i = 0; i < 32; i++)
+ mask |= mask << (i * PIXMAN_FORMAT_BPP (fmt));
+@@ -182,6 +185,8 @@ static pixman_format_code_t img_fmt_list[] = {
+ PIXMAN_x8b8g8r8,
+ PIXMAN_b8g8r8a8,
+ PIXMAN_b8g8r8x8,
++ PIXMAN_r8g8b8a8,
++ PIXMAN_r8g8b8x8,
+ PIXMAN_x14r6g6b6,
+ PIXMAN_r8g8b8,
+ PIXMAN_b8g8r8,
+@@ -412,6 +417,6 @@ main (int argc, const char *argv[])
+ }
+
+ return fuzzer_test_main("blitters", 2000000,
+- 0x1DB8BDF8,
++ 0x265CDFEB,
+ test_composite, argc, argv);
+ }
+diff --git a/test/composite.c b/test/composite.c
+index e6d52b9..b0e0ba4 100644
+--- a/test/composite.c
++++ b/test/composite.c
+@@ -102,6 +102,8 @@ static const format_t formats[] =
+ P(x8b8g8r8),
+ P(b8g8r8a8),
+ P(b8g8r8x8),
++ P(r8g8b8a8),
++ P(r8g8b8x8),
+ P(x2r10g10b10),
+ P(x2b10g10r10),
+ P(a2r10g10b10),
+@@ -556,6 +558,13 @@ get_pixel (pixman_image_t *image,
+ bs = g + gs;
+ break;
+
++ case PIXMAN_TYPE_RGBA:
++ as = 0;
++ bs = PIXMAN_FORMAT_BPP (format) - (b + g + r);
++ gs = b + bs;
++ rs = g + gs;
++ break;
++
+ case PIXMAN_TYPE_A:
+ as = 0;
+ rs = 0;
+diff --git a/test/fetch-test.c b/test/fetch-test.c
+index 60bc765..feb98d9 100644
+--- a/test/fetch-test.c
++++ b/test/fetch-test.c
+@@ -34,6 +34,16 @@ static testcase_t testcases[] =
+ NULL,
+ },
+ {
++ PIXMAN_r8g8b8a8,
++ 2, 2,
++ 8,
++ { 0x11223300, 0x55667744,
++ 0x99aabb88, 0xddeeffcc },
++ { 0x00112233, 0x44556677,
++ 0x8899aabb, 0xccddeeff },
++ NULL,
++ },
++ {
+ PIXMAN_g1,
+ 8, 2,
+ 4,
+diff --git a/test/stress-test.c b/test/stress-test.c
+index d496f93..571420a 100644
+--- a/test/stress-test.c
++++ b/test/stress-test.c
+@@ -19,6 +19,8 @@ static const pixman_format_code_t image_formats[] =
+ PIXMAN_x8b8g8r8,
+ PIXMAN_b8g8r8a8,
+ PIXMAN_b8g8r8x8,
++ PIXMAN_r8g8b8a8,
++ PIXMAN_r8g8b8x8,
+ PIXMAN_x14r6g6b6,
+ PIXMAN_r8g8b8,
+ PIXMAN_b8g8r8,
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0029-test-Fix-infinite-loop-in-composite.patch b/recipes/xorg-lib/pixman-0.21.6/0029-test-Fix-infinite-loop-in-composite.patch
new file mode 100644
index 0000000..d9e4a38
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0029-test-Fix-infinite-loop-in-composite.patch
@@ -0,0 +1,37 @@
+From ad3cbfb073fc325e1b3152898ca71b8255675957 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Tue, 22 Mar 2011 13:42:05 -0400
+Subject: [PATCH 29/40] test: Fix infinite loop in composite
+
+When run in PIXMAN_RANDOMIZE_TESTS mode, this test would go into an
+infinite loop because the loop started at 'seed' but the stop
+condition was still N_TESTS.
+---
+ test/composite.c | 8 ++++----
+ 1 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/test/composite.c b/test/composite.c
+index b0e0ba4..9a001e5 100644
+--- a/test/composite.c
++++ b/test/composite.c
+@@ -909,13 +909,13 @@ main (int argc, char **argv)
+ #ifdef USE_OPENMP
+ # pragma omp parallel for default(none) shared(result, argv, seed)
+ #endif
+- for (i = seed; i <= N_TESTS; ++i)
++ for (i = 0; i <= N_TESTS; ++i)
+ {
+- if (!result && !run_test (i))
++ if (!result && !run_test (i + seed))
+ {
+- printf ("Test 0x%08X failed.\n", i);
++ printf ("Test 0x%08X failed.\n", seed + i);
+
+- result = i;
++ result = seed + i;
+ }
+ }
+
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0030-ARM-tweaked-horizontal-weights-update-in-NEON-biline.patch b/recipes/xorg-lib/pixman-0.21.6/0030-ARM-tweaked-horizontal-weights-update-in-NEON-biline.patch
new file mode 100644
index 0000000..831065c
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0030-ARM-tweaked-horizontal-weights-update-in-NEON-biline.patch
@@ -0,0 +1,82 @@
+From 4a0ade2a1e96fe3f1bca8953be221af0b2908925 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 10 Mar 2011 15:34:10 +0200
+Subject: [PATCH 30/40] ARM: tweaked horizontal weights update in NEON bilinear scaling code
+
+Moving horizontal interpolation weights update instructions from the
+beginning of loop to its end allows to hide some pipeline stalls and
+improve performance.
+---
+ pixman/pixman-arm-neon-asm.S | 20 +++++++++++---------
+ 1 files changed, 11 insertions(+), 9 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 71b30ac..8788e95 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2558,8 +2558,7 @@ fname:
+ bilinear_load_&src_fmt d0, d1, d2
+ vmull.u8 q1, d0, d28
+ vmlal.u8 q1, d1, d29
+- vshr.u16 d30, d24, #8
+- /* 4 cycles bubble */
++ /* 5 cycles bubble */
+ vshll.u16 q0, d2, #8
+ vmlsl.u16 q0, d2, d30
+ vmlal.u16 q0, d3, d30
+@@ -2574,17 +2573,17 @@ fname:
+ .macro bilinear_interpolate_two_pixels src_fmt, dst_fmt
+ bilinear_load_and_vertical_interpolate_two_&src_fmt \
+ q1, q11, d0, d1, d20, d21, d22, d23
+- vshr.u16 q15, q12, #8
+- vadd.u16 q12, q12, q13
+ vshll.u16 q0, d2, #8
+ vmlsl.u16 q0, d2, d30
+ vmlal.u16 q0, d3, d30
+ vshll.u16 q10, d22, #8
+ vmlsl.u16 q10, d22, d31
+ vmlal.u16 q10, d23, d31
+- vshrn.u32 d30, q0, #16
+- vshrn.u32 d31, q10, #16
+- vmovn.u16 d0, q15
++ vshrn.u32 d0, q0, #16
++ vshrn.u32 d1, q10, #16
++ vshr.u16 q15, q12, #8
++ vadd.u16 q12, q12, q13
++ vmovn.u16 d0, q0
+ bilinear_store_&dst_fmt 2, q2, q3
+ .endm
+
+@@ -2593,8 +2592,6 @@ fname:
+ q1, q11, d0, d1, d20, d21, d22, d23 \
+ q3, q9, d4, d5, d16, d17, d18, d19
+ pld [TMP1, PF_OFFS]
+- vshr.u16 q15, q12, #8
+- vadd.u16 q12, q12, q13
+ vshll.u16 q0, d2, #8
+ vmlsl.u16 q0, d2, d30
+ vmlal.u16 q0, d3, d30
+@@ -2614,8 +2611,10 @@ fname:
+ vshrn.u32 d1, q10, #16
+ vshrn.u32 d4, q2, #16
+ vshrn.u32 d5, q8, #16
++ vshr.u16 q15, q12, #8
+ vmovn.u16 d0, q0
+ vmovn.u16 d1, q2
++ vadd.u16 q12, q12, q13
+ bilinear_store_&dst_fmt 4, q2, q3
+ .endm
+
+@@ -2669,6 +2668,9 @@ pixman_asm_function fname
+ vadd.u16 d25, d25, d26
+ vadd.u16 q13, q13, q13
+
++ vshr.u16 q15, q12, #8
++ vadd.u16 q12, q12, q13
++
+ subs WIDTH, WIDTH, #4
+ blt 1f
+ mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift)
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0031-ARM-use-aligned-memory-writes-in-NEON-bilinear-scali.patch b/recipes/xorg-lib/pixman-0.21.6/0031-ARM-use-aligned-memory-writes-in-NEON-bilinear-scali.patch
new file mode 100644
index 0000000..3c8394b
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0031-ARM-use-aligned-memory-writes-in-NEON-bilinear-scali.patch
@@ -0,0 +1,124 @@
+From f36c189475951276766b2653ae9628c4d02dc0c9 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 10 Mar 2011 16:12:23 +0200
+Subject: [PATCH 31/40] ARM: use aligned memory writes in NEON bilinear scaling code
+
+---
+ pixman/pixman-arm-neon-asm.S | 49 ++++++++++++++++++++++++++++++------------
+ 1 files changed, 35 insertions(+), 14 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 8788e95..a4d6a9a 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2527,9 +2527,9 @@ fname:
+
+ .macro bilinear_store_8888 numpix, tmp1, tmp2
+ .if numpix == 4
+- vst1.32 {d0, d1}, [OUT]!
++ vst1.32 {d0, d1}, [OUT, :128]!
+ .elseif numpix == 2
+- vst1.32 {d0}, [OUT]!
++ vst1.32 {d0}, [OUT, :64]!
+ .elseif numpix == 1
+ vst1.32 {d0[0]}, [OUT, :32]!
+ .else
+@@ -2544,11 +2544,11 @@ fname:
+ vuzp.u8 d0, d2
+ convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2
+ .if numpix == 4
+- vst1.16 {d2}, [OUT]!
++ vst1.16 {d2}, [OUT, :64]!
+ .elseif numpix == 2
+- vst1.32 {d2[0]}, [OUT]!
++ vst1.32 {d2[0]}, [OUT, :32]!
+ .elseif numpix == 1
+- vst1.16 {d2[0]}, [OUT]!
++ vst1.16 {d2[0]}, [OUT, :16]!
+ .else
+ .error bilinear_store_0565 numpix is unsupported
+ .endif
+@@ -2622,8 +2622,7 @@ fname:
+ * Main template macro for generating NEON optimized bilinear scanline
+ * functions.
+ *
+- * TODO: use software pipelining and aligned writes to the destination buffer
+- * in order to improve performance
++ * TODO: use software pipelining in order to improve performance
+ *
+ * Bilinear scanline scaler macro template uses the following arguments:
+ * fname - name of the function to generate
+@@ -2635,7 +2634,8 @@ fname:
+ */
+
+ .macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \
+- bpp_shift, prefetch_distance
++ src_bpp_shift, dst_bpp_shift, \
++ prefetch_distance
+
+ pixman_asm_function fname
+ OUT .req r0
+@@ -2666,19 +2666,40 @@ pixman_asm_function fname
+ vdup.u8 d28, WT
+ vdup.u8 d29, WB
+ vadd.u16 d25, d25, d26
+- vadd.u16 q13, q13, q13
+
++ /* ensure good destination alignment */
++ cmp WIDTH, #1
++ blt 0f
++ tst OUT, #(1 << dst_bpp_shift)
++ beq 0f
++ vshr.u16 q15, q12, #8
++ vadd.u16 q12, q12, q13
++ bilinear_interpolate_last_pixel src_fmt, dst_fmt
++ sub WIDTH, WIDTH, #1
++0:
++ vadd.u16 q13, q13, q13
+ vshr.u16 q15, q12, #8
+ vadd.u16 q12, q12, q13
+
++ cmp WIDTH, #2
++ blt 0f
++ tst OUT, #(1 << (dst_bpp_shift + 1))
++ beq 0f
++ bilinear_interpolate_two_pixels src_fmt, dst_fmt
++ sub WIDTH, WIDTH, #2
++0:
++
++ /* start the main loop */
+ subs WIDTH, WIDTH, #4
+ blt 1f
+- mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift)
++ mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
+ 0:
+ bilinear_interpolate_four_pixels src_fmt, dst_fmt
+ subs WIDTH, WIDTH, #4
+ bge 0b
+ 1:
++
++ /* handle the remaining trailing pixels */
+ tst WIDTH, #2
+ beq 2f
+ bilinear_interpolate_two_pixels src_fmt, dst_fmt
+@@ -2708,13 +2729,13 @@ pixman_asm_function fname
+ .endm
+
+ generate_bilinear_scanline_func \
+- pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 28
++ pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 2, 28
+
+ generate_bilinear_scanline_func \
+- pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 28
++ pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 1, 28
+
+ generate_bilinear_scanline_func \
+- pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 28
++ pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 2, 28
+
+ generate_bilinear_scanline_func \
+- pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, 1, 28
++ pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, 1, 1, 28
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0032-ARM-support-for-software-pipelining-in-bilinear-macr.patch b/recipes/xorg-lib/pixman-0.21.6/0032-ARM-support-for-software-pipelining-in-bilinear-macr.patch
new file mode 100644
index 0000000..c67f9c6
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0032-ARM-support-for-software-pipelining-in-bilinear-macr.patch
@@ -0,0 +1,70 @@
+From 6d296598575b8307262fac2cf438d7cc832d09d3 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 16 Mar 2011 16:33:41 +0200
+Subject: [PATCH 32/40] ARM: support for software pipelining in bilinear macros
+
+Now it's possible to override the main loop of bilinear scaling code
+with optimized pipelined implementation.
+---
+ pixman/pixman-arm-neon-asm.S | 31 ++++++++++++++++++++++++++++---
+ 1 files changed, 28 insertions(+), 3 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index a4d6a9a..d84f2cc 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2618,12 +2618,32 @@ fname:
+ bilinear_store_&dst_fmt 4, q2, q3
+ .endm
+
++.macro bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
++.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
++ bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_head
++.else
++ bilinear_interpolate_four_pixels src_fmt, dst_fmt
++.endif
++.endm
++
++.macro bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
++.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
++ bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail
++.endif
++.endm
++
++.macro bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
++.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
++ bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail_head
++.else
++ bilinear_interpolate_four_pixels src_fmt, dst_fmt
++.endif
++.endm
++
+ /*
+ * Main template macro for generating NEON optimized bilinear scanline
+ * functions.
+ *
+- * TODO: use software pipelining in order to improve performance
+- *
+ * Bilinear scanline scaler macro template uses the following arguments:
+ * fname - name of the function to generate
+ * src_fmt - source color format (8888 or 0565)
+@@ -2693,10 +2713,15 @@ pixman_asm_function fname
+ subs WIDTH, WIDTH, #4
+ blt 1f
+ mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
++ bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
++ subs WIDTH, WIDTH, #4
++ blt 5f
+ 0:
+- bilinear_interpolate_four_pixels src_fmt, dst_fmt
++ bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
+ subs WIDTH, WIDTH, #4
+ bge 0b
++5:
++ bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
+ 1:
+
+ /* handle the remaining trailing pixels */
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0033-ARM-use-less-ARM-instructions-in-NEON-bilinear-scali.patch b/recipes/xorg-lib/pixman-0.21.6/0033-ARM-use-less-ARM-instructions-in-NEON-bilinear-scali.patch
new file mode 100644
index 0000000..1d66979
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0033-ARM-use-less-ARM-instructions-in-NEON-bilinear-scali.patch
@@ -0,0 +1,168 @@
+From ec2da8e651767421a8403bf0810445fdec1315ba Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 21 Mar 2011 18:41:53 +0200
+Subject: [PATCH 33/40] ARM: use less ARM instructions in NEON bilinear scaling code
+
+This reduces code size and also puts less pressure on the
+instruction decoder.
+---
+ pixman/pixman-arm-neon-asm.S | 79 ++++++++++++++++++++----------------------
+ 1 files changed, 38 insertions(+), 41 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index d84f2cc..9878bf7 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2412,21 +2412,19 @@ fname:
+ */
+
+ .macro bilinear_load_8888 reg1, reg2, tmp
+- mov TMP2, X, asr #16
++ mov TMP1, X, asr #16
+ add X, X, UX
+- add TMP1, TOP, TMP2, asl #2
+- add TMP2, BOTTOM, TMP2, asl #2
+- vld1.32 {reg1}, [TMP1]
+- vld1.32 {reg2}, [TMP2]
++ add TMP1, TOP, TMP1, asl #2
++ vld1.32 {reg1}, [TMP1], STRIDE
++ vld1.32 {reg2}, [TMP1]
+ .endm
+
+ .macro bilinear_load_0565 reg1, reg2, tmp
+- mov TMP2, X, asr #16
++ mov TMP1, X, asr #16
+ add X, X, UX
+- add TMP1, TOP, TMP2, asl #1
+- add TMP2, BOTTOM, TMP2, asl #1
+- vld1.32 {reg2[0]}, [TMP1]
+- vld1.32 {reg2[1]}, [TMP2]
++ add TMP1, TOP, TMP1, asl #1
++ vld1.32 {reg2[0]}, [TMP1], STRIDE
++ vld1.32 {reg2[1]}, [TMP1]
+ convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
+ .endm
+
+@@ -2454,18 +2452,16 @@ fname:
+ .macro bilinear_load_and_vertical_interpolate_two_0565 \
+ acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
+
+- mov TMP2, X, asr #16
++ mov TMP1, X, asr #16
+ add X, X, UX
+- mov TMP4, X, asr #16
++ add TMP1, TOP, TMP1, asl #1
++ mov TMP2, X, asr #16
+ add X, X, UX
+- add TMP1, TOP, TMP2, asl #1
+- add TMP2, BOTTOM, TMP2, asl #1
+- add TMP3, TOP, TMP4, asl #1
+- add TMP4, BOTTOM, TMP4, asl #1
+- vld1.32 {acc2lo[0]}, [TMP1]
+- vld1.32 {acc2hi[0]}, [TMP3]
+- vld1.32 {acc2lo[1]}, [TMP2]
+- vld1.32 {acc2hi[1]}, [TMP4]
++ add TMP2, TOP, TMP2, asl #1
++ vld1.32 {acc2lo[0]}, [TMP1], STRIDE
++ vld1.32 {acc2hi[0]}, [TMP2], STRIDE
++ vld1.32 {acc2lo[1]}, [TMP1]
++ vld1.32 {acc2hi[1]}, [TMP2]
+ convert_0565_to_x888 acc2, reg3, reg2, reg1
+ vzip.u8 reg1, reg3
+ vzip.u8 reg2, reg4
+@@ -2481,34 +2477,30 @@ fname:
+ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
+ yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
+
+- mov TMP2, X, asr #16
++ mov TMP1, X, asr #16
+ add X, X, UX
+- mov TMP4, X, asr #16
++ add TMP1, TOP, TMP1, asl #1
++ mov TMP2, X, asr #16
+ add X, X, UX
+- add TMP1, TOP, TMP2, asl #1
+- add TMP2, BOTTOM, TMP2, asl #1
+- add TMP3, TOP, TMP4, asl #1
+- add TMP4, BOTTOM, TMP4, asl #1
+- vld1.32 {xacc2lo[0]}, [TMP1]
+- vld1.32 {xacc2hi[0]}, [TMP3]
+- vld1.32 {xacc2lo[1]}, [TMP2]
+- vld1.32 {xacc2hi[1]}, [TMP4]
++ add TMP2, TOP, TMP2, asl #1
++ vld1.32 {xacc2lo[0]}, [TMP1], STRIDE
++ vld1.32 {xacc2hi[0]}, [TMP2], STRIDE
++ vld1.32 {xacc2lo[1]}, [TMP1]
++ vld1.32 {xacc2hi[1]}, [TMP2]
+ convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
+- mov TMP2, X, asr #16
++ mov TMP1, X, asr #16
+ add X, X, UX
+- mov TMP4, X, asr #16
++ add TMP1, TOP, TMP1, asl #1
++ mov TMP2, X, asr #16
+ add X, X, UX
+- add TMP1, TOP, TMP2, asl #1
+- add TMP2, BOTTOM, TMP2, asl #1
+- add TMP3, TOP, TMP4, asl #1
+- add TMP4, BOTTOM, TMP4, asl #1
+- vld1.32 {yacc2lo[0]}, [TMP1]
++ add TMP2, TOP, TMP2, asl #1
++ vld1.32 {yacc2lo[0]}, [TMP1], STRIDE
+ vzip.u8 xreg1, xreg3
+- vld1.32 {yacc2hi[0]}, [TMP3]
++ vld1.32 {yacc2hi[0]}, [TMP2], STRIDE
+ vzip.u8 xreg2, xreg4
+- vld1.32 {yacc2lo[1]}, [TMP2]
++ vld1.32 {yacc2lo[1]}, [TMP1]
+ vzip.u8 xreg3, xreg4
+- vld1.32 {yacc2hi[1]}, [TMP4]
++ vld1.32 {yacc2hi[1]}, [TMP2]
+ vzip.u8 xreg1, xreg2
+ convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
+ vmull.u8 xacc1, xreg1, d28
+@@ -2592,6 +2584,7 @@ fname:
+ q1, q11, d0, d1, d20, d21, d22, d23 \
+ q3, q9, d4, d5, d16, d17, d18, d19
+ pld [TMP1, PF_OFFS]
++ sub TMP1, TMP1, STRIDE
+ vshll.u16 q0, d2, #8
+ vmlsl.u16 q0, d2, d30
+ vmlal.u16 q0, d3, d30
+@@ -2671,6 +2664,7 @@ pixman_asm_function fname
+ PF_OFFS .req r7
+ TMP3 .req r8
+ TMP4 .req r9
++ STRIDE .req r2
+
+ mov ip, sp
+ push {r4, r5, r6, r7, r8, r9}
+@@ -2678,6 +2672,9 @@ pixman_asm_function fname
+ ldmia ip, {WB, X, UX, WIDTH}
+ mul PF_OFFS, PF_OFFS, UX
+
++ sub STRIDE, BOTTOM, TOP
++ .unreq BOTTOM
++
+ cmp WIDTH, #0
+ ble 3f
+
+@@ -2738,7 +2735,6 @@ pixman_asm_function fname
+
+ .unreq OUT
+ .unreq TOP
+- .unreq BOTTOM
+ .unreq WT
+ .unreq WB
+ .unreq X
+@@ -2749,6 +2745,7 @@ pixman_asm_function fname
+ .unreq PF_OFFS
+ .unreq TMP3
+ .unreq TMP4
++ .unreq STRIDE
+ .endfunc
+
+ .endm
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0034-ARM-support-different-levels-of-loop-unrolling-in-bi.patch b/recipes/xorg-lib/pixman-0.21.6/0034-ARM-support-different-levels-of-loop-unrolling-in-bi.patch
new file mode 100644
index 0000000..82661f0
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0034-ARM-support-different-levels-of-loop-unrolling-in-bi.patch
@@ -0,0 +1,156 @@
+From cd20ceb7602348ecbfa0db1756dc548a0bad3c9d Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 17 Mar 2011 19:42:01 +0200
+Subject: [PATCH 34/40] ARM: support different levels of loop unrolling in bilinear scaler
+
+Now an extra 'flag' parameter is supported in bilinear scaline scaling
+function generation macro. It can be used to enable 4 or 8 pixels per
+loop iteration unrolling and provide save/restore code for d8-d15
+registers.
+---
+ pixman/pixman-arm-neon-asm.S | 84 ++++++++++++++++++++++++++++++++++++++----
+ 1 files changed, 76 insertions(+), 8 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 9878bf7..6141770 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2633,6 +2633,36 @@ fname:
+ .endif
+ .endm
+
++.macro bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt
++.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
++ bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_head
++.else
++ bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
++ bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
++.endif
++.endm
++
++.macro bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt
++.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
++ bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail
++.else
++ bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
++.endif
++.endm
++
++.macro bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt
++.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
++ bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail_head
++.else
++ bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
++ bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
++.endif
++.endm
++
++.set BILINEAR_FLAG_UNROLL_4, 0
++.set BILINEAR_FLAG_UNROLL_8, 1
++.set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2
++
+ /*
+ * Main template macro for generating NEON optimized bilinear scanline
+ * functions.
+@@ -2648,7 +2678,7 @@ fname:
+
+ .macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \
+ src_bpp_shift, dst_bpp_shift, \
+- prefetch_distance
++ prefetch_distance, flags
+
+ pixman_asm_function fname
+ OUT .req r0
+@@ -2672,6 +2702,10 @@ pixman_asm_function fname
+ ldmia ip, {WB, X, UX, WIDTH}
+ mul PF_OFFS, PF_OFFS, UX
+
++.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
++ vpush {d8-d15}
++.endif
++
+ sub STRIDE, BOTTOM, TOP
+ .unreq BOTTOM
+
+@@ -2705,8 +2739,34 @@ pixman_asm_function fname
+ bilinear_interpolate_two_pixels src_fmt, dst_fmt
+ sub WIDTH, WIDTH, #2
+ 0:
+-
+- /* start the main loop */
++.if ((flags) & BILINEAR_FLAG_UNROLL_8) != 0
++/*********** 8 pixels per iteration *****************/
++ cmp WIDTH, #4
++ blt 0f
++ tst OUT, #(1 << (dst_bpp_shift + 2))
++ beq 0f
++ bilinear_interpolate_four_pixels src_fmt, dst_fmt
++ sub WIDTH, WIDTH, #4
++0:
++ subs WIDTH, WIDTH, #8
++ blt 1f
++ mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
++ bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt
++ subs WIDTH, WIDTH, #8
++ blt 5f
++0:
++ bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt
++ subs WIDTH, WIDTH, #8
++ bge 0b
++5:
++ bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt
++1:
++ tst WIDTH, #4
++ beq 2f
++ bilinear_interpolate_four_pixels src_fmt, dst_fmt
++2:
++.else
++/*********** 4 pixels per iteration *****************/
+ subs WIDTH, WIDTH, #4
+ blt 1f
+ mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
+@@ -2720,7 +2780,8 @@ pixman_asm_function fname
+ 5:
+ bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
+ 1:
+-
++/****************************************************/
++.endif
+ /* handle the remaining trailing pixels */
+ tst WIDTH, #2
+ beq 2f
+@@ -2730,6 +2791,9 @@ pixman_asm_function fname
+ beq 3f
+ bilinear_interpolate_last_pixel src_fmt, dst_fmt
+ 3:
++.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
++ vpop {d8-d15}
++.endif
+ pop {r4, r5, r6, r7, r8, r9}
+ bx lr
+
+@@ -2751,13 +2815,17 @@ pixman_asm_function fname
+ .endm
+
+ generate_bilinear_scanline_func \
+- pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 2, 28
++ pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, \
++ 2, 2, 28, BILINEAR_FLAG_UNROLL_4
+
+ generate_bilinear_scanline_func \
+- pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 1, 28
++ pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, \
++ 2, 1, 28, BILINEAR_FLAG_UNROLL_4
+
+ generate_bilinear_scanline_func \
+- pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 2, 28
++ pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, \
++ 1, 2, 28, BILINEAR_FLAG_UNROLL_4
+
+ generate_bilinear_scanline_func \
+- pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, 1, 1, 28
++ pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, \
++ 1, 1, 28, BILINEAR_FLAG_UNROLL_4
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0035-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch b/recipes/xorg-lib/pixman-0.21.6/0035-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch
new file mode 100644
index 0000000..c0d485c
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0035-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch
@@ -0,0 +1,166 @@
+From d3b1ca20fe8af20ca097dcc8799ef25cee03dd6b Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 16 Mar 2011 17:24:49 +0200
+Subject: [PATCH 35/40] ARM: pipelined NEON implementation of bilinear scaled 'src_8888_8888'
+
+Performance of the inner loop when working with the data in L1 cache:
+ ARM Cortex-A8: 41 cycles per 4 pixels (no stalls and partial dual issue)
+ ARM Cortex-A9: 48 cycles per 4 pixels (no stalls)
+
+It might be still possible to improve performance even more on ARM Cortex-A8
+with a better use of dual issue.
+
+Benchmark on ARM Cortex-A8 r1p3 @600MHz, 32-bit LPDDR @166MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+ before: op=1, src=20028888, dst=20028888, speed=40.38 MPix/s
+ after: op=1, src=20028888, dst=20028888, speed=48.47 MPix/s
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+ before: op=1, src=20028888, dst=20028888, speed=79.68 MPix/s
+ after: op=1, src=20028888, dst=20028888, speed=93.11 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S | 127 ++++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 127 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 6141770..326e085 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2814,6 +2814,133 @@ pixman_asm_function fname
+
+ .endm
+
++/*****************************************************************************/
++
++.set have_bilinear_interpolate_four_pixels_8888_8888, 1
++
++.macro bilinear_interpolate_four_pixels_8888_8888_head
++ mov TMP1, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP1, asl #2
++ mov TMP2, X, asr #16
++ add X, X, UX
++ add TMP2, TOP, TMP2, asl #2
++
++ vld1.32 {d22}, [TMP1], STRIDE
++ vld1.32 {d23}, [TMP1]
++ mov TMP3, X, asr #16
++ add X, X, UX
++ add TMP3, TOP, TMP3, asl #2
++ vmull.u8 q8, d22, d28
++ vmlal.u8 q8, d23, d29
++
++ vld1.32 {d22}, [TMP2], STRIDE
++ vld1.32 {d23}, [TMP2]
++ mov TMP4, X, asr #16
++ add X, X, UX
++ add TMP4, TOP, TMP4, asl #2
++ vmull.u8 q9, d22, d28
++ vmlal.u8 q9, d23, d29
++
++ vld1.32 {d22}, [TMP3], STRIDE
++ vld1.32 {d23}, [TMP3]
++ vmull.u8 q10, d22, d28
++ vmlal.u8 q10, d23, d29
++
++ vshll.u16 q0, d16, #8
++ vmlsl.u16 q0, d16, d30
++ vmlal.u16 q0, d17, d30
++
++ pld [TMP4, PF_OFFS]
++ vld1.32 {d16}, [TMP4], STRIDE
++ vld1.32 {d17}, [TMP4]
++ pld [TMP4, PF_OFFS]
++ vmull.u8 q11, d16, d28
++ vmlal.u8 q11, d17, d29
++
++ vshll.u16 q1, d18, #8
++ vmlsl.u16 q1, d18, d31
++.endm
++
++.macro bilinear_interpolate_four_pixels_8888_8888_tail
++ vmlal.u16 q1, d19, d31
++ vshr.u16 q15, q12, #8
++ vshll.u16 q2, d20, #8
++ vmlsl.u16 q2, d20, d30
++ vmlal.u16 q2, d21, d30
++ vshll.u16 q3, d22, #8
++ vmlsl.u16 q3, d22, d31
++ vmlal.u16 q3, d23, d31
++ vadd.u16 q12, q12, q13
++ vshrn.u32 d0, q0, #16
++ vshrn.u32 d1, q1, #16
++ vshrn.u32 d4, q2, #16
++ vshr.u16 q15, q12, #8
++ vshrn.u32 d5, q3, #16
++ vmovn.u16 d6, q0
++ vmovn.u16 d7, q2
++ vadd.u16 q12, q12, q13
++ vst1.32 {d6, d7}, [OUT, :128]!
++.endm
++
++.macro bilinear_interpolate_four_pixels_8888_8888_tail_head
++ mov TMP1, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP1, asl #2
++ mov TMP2, X, asr #16
++ add X, X, UX
++ add TMP2, TOP, TMP2, asl #2
++ vmlal.u16 q1, d19, d31
++ vshr.u16 q15, q12, #8
++ vshll.u16 q2, d20, #8
++ vmlsl.u16 q2, d20, d30
++ vmlal.u16 q2, d21, d30
++ vshll.u16 q3, d22, #8
++ vld1.32 {d20}, [TMP1], STRIDE
++ vmlsl.u16 q3, d22, d31
++ vmlal.u16 q3, d23, d31
++ vld1.32 {d21}, [TMP1]
++ vmull.u8 q8, d20, d28
++ vmlal.u8 q8, d21, d29
++ vshrn.u32 d0, q0, #16
++ vshrn.u32 d1, q1, #16
++ vshrn.u32 d4, q2, #16
++ vld1.32 {d22}, [TMP2], STRIDE
++ vshrn.u32 d5, q3, #16
++ vadd.u16 q12, q12, q13
++ vld1.32 {d23}, [TMP2]
++ vmull.u8 q9, d22, d28
++ mov TMP3, X, asr #16
++ add X, X, UX
++ add TMP3, TOP, TMP3, asl #2
++ mov TMP4, X, asr #16
++ add X, X, UX
++ add TMP4, TOP, TMP4, asl #2
++ vmlal.u8 q9, d23, d29
++ vld1.32 {d22}, [TMP3], STRIDE
++ vshr.u16 q15, q12, #8
++ vld1.32 {d23}, [TMP3]
++ vmull.u8 q10, d22, d28
++ vmlal.u8 q10, d23, d29
++ vmovn.u16 d6, q0
++ vshll.u16 q0, d16, #8
++ vmovn.u16 d7, q2
++ vmlsl.u16 q0, d16, d30
++ vmlal.u16 q0, d17, d30
++ pld [TMP4, PF_OFFS]
++ vld1.32 {d16}, [TMP4], STRIDE
++ vadd.u16 q12, q12, q13
++ vld1.32 {d17}, [TMP4]
++ pld [TMP4, PF_OFFS]
++ vmull.u8 q11, d16, d28
++ vmlal.u8 q11, d17, d29
++ vst1.32 {d6, d7}, [OUT, :128]!
++ vshll.u16 q1, d18, #8
++ vmlsl.u16 q1, d18, d31
++.endm
++
++/*****************************************************************************/
++
+ generate_bilinear_scanline_func \
+ pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, \
+ 2, 2, 28, BILINEAR_FLAG_UNROLL_4
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0036-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch b/recipes/xorg-lib/pixman-0.21.6/0036-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch
new file mode 100644
index 0000000..4fca16f
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0036-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch
@@ -0,0 +1,283 @@
+From dfccf9b97acbff6e847e4e52c5dec0a4297d30a0 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 21 Mar 2011 20:25:27 +0200
+Subject: [PATCH 36/40] ARM: pipelined NEON implementation of bilinear scaled 'src_8888_0565'
+
+Benchmark on ARM Cortex-A8 r1p3 @600MHz, 32-bit LPDDR @166MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+ before: op=1, src=20028888, dst=10020565, speed=33.59 MPix/s
+ after: op=1, src=20028888, dst=10020565, speed=46.25 MPix/s
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+ before: op=1, src=20028888, dst=10020565, speed=63.86 MPix/s
+ after: op=1, src=20028888, dst=10020565, speed=84.22 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S | 245 +++++++++++++++++++++++++++++++++++++++++-
+ 1 files changed, 244 insertions(+), 1 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 326e085..e560bdf 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2941,13 +2941,256 @@ pixman_asm_function fname
+
+ /*****************************************************************************/
+
++.set have_bilinear_interpolate_eight_pixels_8888_0565, 1
++
++.macro bilinear_interpolate_eight_pixels_8888_0565_head
++ mov TMP1, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP1, asl #2
++ mov TMP2, X, asr #16
++ add X, X, UX
++ add TMP2, TOP, TMP2, asl #2
++ vld1.32 {d20}, [TMP1], STRIDE
++ vld1.32 {d21}, [TMP1]
++ vmull.u8 q8, d20, d28
++ vmlal.u8 q8, d21, d29
++ vld1.32 {d22}, [TMP2], STRIDE
++ vld1.32 {d23}, [TMP2]
++ vmull.u8 q9, d22, d28
++ mov TMP3, X, asr #16
++ add X, X, UX
++ add TMP3, TOP, TMP3, asl #2
++ mov TMP4, X, asr #16
++ add X, X, UX
++ add TMP4, TOP, TMP4, asl #2
++ vmlal.u8 q9, d23, d29
++ vld1.32 {d22}, [TMP3], STRIDE
++ vld1.32 {d23}, [TMP3]
++ vmull.u8 q10, d22, d28
++ vmlal.u8 q10, d23, d29
++ vshll.u16 q0, d16, #8
++ vmlsl.u16 q0, d16, d30
++ vmlal.u16 q0, d17, d30
++ pld [TMP4, PF_OFFS]
++ vld1.32 {d16}, [TMP4], STRIDE
++ vld1.32 {d17}, [TMP4]
++ pld [TMP4, PF_OFFS]
++ vmull.u8 q11, d16, d28
++ vmlal.u8 q11, d17, d29
++ vshll.u16 q1, d18, #8
++ vmlsl.u16 q1, d18, d31
++
++ mov TMP1, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP1, asl #2
++ mov TMP2, X, asr #16
++ add X, X, UX
++ add TMP2, TOP, TMP2, asl #2
++ vmlal.u16 q1, d19, d31
++ vshr.u16 q15, q12, #8
++ vshll.u16 q2, d20, #8
++ vmlsl.u16 q2, d20, d30
++ vmlal.u16 q2, d21, d30
++ vshll.u16 q3, d22, #8
++ vld1.32 {d20}, [TMP1], STRIDE
++ vmlsl.u16 q3, d22, d31
++ vmlal.u16 q3, d23, d31
++ vld1.32 {d21}, [TMP1]
++ vmull.u8 q8, d20, d28
++ vmlal.u8 q8, d21, d29
++ vshrn.u32 d0, q0, #16
++ vshrn.u32 d1, q1, #16
++ vshrn.u32 d4, q2, #16
++ vld1.32 {d22}, [TMP2], STRIDE
++ vshrn.u32 d5, q3, #16
++ vadd.u16 q12, q12, q13
++ vld1.32 {d23}, [TMP2]
++ vmull.u8 q9, d22, d28
++ mov TMP3, X, asr #16
++ add X, X, UX
++ add TMP3, TOP, TMP3, asl #2
++ mov TMP4, X, asr #16
++ add X, X, UX
++ add TMP4, TOP, TMP4, asl #2
++ vmlal.u8 q9, d23, d29
++ vld1.32 {d22}, [TMP3], STRIDE
++ vshr.u16 q15, q12, #8
++ vld1.32 {d23}, [TMP3]
++ vmull.u8 q10, d22, d28
++ vmlal.u8 q10, d23, d29
++ vmovn.u16 d8, q0
++ vshll.u16 q0, d16, #8
++ vmovn.u16 d9, q2
++ vmlsl.u16 q0, d16, d30
++ vmlal.u16 q0, d17, d30
++ pld [TMP4, PF_OFFS]
++ vld1.32 {d16}, [TMP4], STRIDE
++ vadd.u16 q12, q12, q13
++ vld1.32 {d17}, [TMP4]
++ pld [TMP4, PF_OFFS]
++ vmull.u8 q11, d16, d28
++ vmlal.u8 q11, d17, d29
++ vshll.u16 q1, d18, #8
++ vmlsl.u16 q1, d18, d31
++.endm
++
++.macro bilinear_interpolate_eight_pixels_8888_0565_tail
++ vmlal.u16 q1, d19, d31
++ vshr.u16 q15, q12, #8
++ vshll.u16 q2, d20, #8
++ vmlsl.u16 q2, d20, d30
++ vmlal.u16 q2, d21, d30
++ vshll.u16 q3, d22, #8
++ vmlsl.u16 q3, d22, d31
++ vmlal.u16 q3, d23, d31
++ vadd.u16 q12, q12, q13
++ vshrn.u32 d0, q0, #16
++ vshrn.u32 d1, q1, #16
++ vshrn.u32 d4, q2, #16
++ vshr.u16 q15, q12, #8
++ vshrn.u32 d5, q3, #16
++ vmovn.u16 d10, q0
++ vmovn.u16 d11, q2
++ vadd.u16 q12, q12, q13
++
++ vuzp.u8 d8, d9
++ vuzp.u8 d10, d11
++ vuzp.u8 d9, d11
++ vuzp.u8 d8, d10
++ vshll.u8 q6, d9, #8
++ vshll.u8 q5, d10, #8
++ vshll.u8 q7, d8, #8
++ vsri.u16 q5, q6, #5
++ vsri.u16 q5, q7, #11
++ vst1.32 {d10, d11}, [OUT, :128]!
++.endm
++
++.macro bilinear_interpolate_eight_pixels_8888_0565_tail_head
++ mov TMP1, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP1, asl #2
++ mov TMP2, X, asr #16
++ add X, X, UX
++ add TMP2, TOP, TMP2, asl #2
++ vmlal.u16 q1, d19, d31
++ vshr.u16 q15, q12, #8
++ vuzp.u8 d8, d9
++ vshll.u16 q2, d20, #8
++ vmlsl.u16 q2, d20, d30
++ vmlal.u16 q2, d21, d30
++ vshll.u16 q3, d22, #8
++ vld1.32 {d20}, [TMP1], STRIDE
++ vmlsl.u16 q3, d22, d31
++ vmlal.u16 q3, d23, d31
++ vld1.32 {d21}, [TMP1]
++ vmull.u8 q8, d20, d28
++ vmlal.u8 q8, d21, d29
++ vshrn.u32 d0, q0, #16
++ vshrn.u32 d1, q1, #16
++ vshrn.u32 d4, q2, #16
++ vld1.32 {d22}, [TMP2], STRIDE
++ vshrn.u32 d5, q3, #16
++ vadd.u16 q12, q12, q13
++ vld1.32 {d23}, [TMP2]
++ vmull.u8 q9, d22, d28
++ mov TMP3, X, asr #16
++ add X, X, UX
++ add TMP3, TOP, TMP3, asl #2
++ mov TMP4, X, asr #16
++ add X, X, UX
++ add TMP4, TOP, TMP4, asl #2
++ vmlal.u8 q9, d23, d29
++ vld1.32 {d22}, [TMP3], STRIDE
++ vshr.u16 q15, q12, #8
++ vld1.32 {d23}, [TMP3]
++ vmull.u8 q10, d22, d28
++ vmlal.u8 q10, d23, d29
++ vmovn.u16 d10, q0
++ vshll.u16 q0, d16, #8
++ vmovn.u16 d11, q2
++ vmlsl.u16 q0, d16, d30
++ vmlal.u16 q0, d17, d30
++ pld [TMP4, PF_OFFS]
++ vld1.32 {d16}, [TMP4], STRIDE
++ vadd.u16 q12, q12, q13
++ vld1.32 {d17}, [TMP4]
++ pld [TMP4, PF_OFFS]
++ vmull.u8 q11, d16, d28
++ vmlal.u8 q11, d17, d29
++ vuzp.u8 d10, d11
++ vshll.u16 q1, d18, #8
++ vmlsl.u16 q1, d18, d31
++
++ mov TMP1, X, asr #16
++ add X, X, UX
++ add TMP1, TOP, TMP1, asl #2
++ mov TMP2, X, asr #16
++ add X, X, UX
++ add TMP2, TOP, TMP2, asl #2
++ vmlal.u16 q1, d19, d31
++ vuzp.u8 d9, d11
++ vshr.u16 q15, q12, #8
++ vshll.u16 q2, d20, #8
++ vuzp.u8 d8, d10
++ vmlsl.u16 q2, d20, d30
++ vmlal.u16 q2, d21, d30
++ vshll.u16 q3, d22, #8
++ vld1.32 {d20}, [TMP1], STRIDE
++ vmlsl.u16 q3, d22, d31
++ vmlal.u16 q3, d23, d31
++ vld1.32 {d21}, [TMP1]
++ vmull.u8 q8, d20, d28
++ vmlal.u8 q8, d21, d29
++ vshll.u8 q6, d9, #8
++ vshll.u8 q5, d10, #8
++ vshll.u8 q7, d8, #8
++ vshrn.u32 d0, q0, #16
++ vsri.u16 q5, q6, #5
++ vshrn.u32 d1, q1, #16
++ vsri.u16 q5, q7, #11
++ vshrn.u32 d4, q2, #16
++ vld1.32 {d22}, [TMP2], STRIDE
++ vshrn.u32 d5, q3, #16
++ vadd.u16 q12, q12, q13
++ vld1.32 {d23}, [TMP2]
++ vmull.u8 q9, d22, d28
++ mov TMP3, X, asr #16
++ add X, X, UX
++ add TMP3, TOP, TMP3, asl #2
++ mov TMP4, X, asr #16
++ add X, X, UX
++ add TMP4, TOP, TMP4, asl #2
++ vmlal.u8 q9, d23, d29
++ vld1.32 {d22}, [TMP3], STRIDE
++ vshr.u16 q15, q12, #8
++ vld1.32 {d23}, [TMP3]
++ vmull.u8 q10, d22, d28
++ vmlal.u8 q10, d23, d29
++ vmovn.u16 d8, q0
++ vshll.u16 q0, d16, #8
++ vmovn.u16 d9, q2
++ vmlsl.u16 q0, d16, d30
++ vmlal.u16 q0, d17, d30
++ pld [TMP4, PF_OFFS]
++ vld1.32 {d16}, [TMP4], STRIDE
++ vadd.u16 q12, q12, q13
++ vld1.32 {d17}, [TMP4]
++ pld [TMP4, PF_OFFS]
++ vmull.u8 q11, d16, d28
++ vmlal.u8 q11, d17, d29
++ vshll.u16 q1, d18, #8
++ vst1.32 {d10, d11}, [OUT, :128]!
++ vmlsl.u16 q1, d18, d31
++.endm
++/*****************************************************************************/
++
+ generate_bilinear_scanline_func \
+ pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, \
+ 2, 2, 28, BILINEAR_FLAG_UNROLL_4
+
+ generate_bilinear_scanline_func \
+ pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, \
+- 2, 1, 28, BILINEAR_FLAG_UNROLL_4
++ 2, 1, 28, BILINEAR_FLAG_UNROLL_8 | BILINEAR_FLAG_USE_ALL_NEON_REGS
+
+ generate_bilinear_scanline_func \
+ pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, \
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0037-Generic-C-implementation-of-pixman_blt-with-overlapp.patch b/recipes/xorg-lib/pixman-0.21.6/0037-Generic-C-implementation-of-pixman_blt-with-overlapp.patch
new file mode 100644
index 0000000..e03823b
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0037-Generic-C-implementation-of-pixman_blt-with-overlapp.patch
@@ -0,0 +1,114 @@
+From ab52f97fa306f73b51f797a33614280d31ccb978 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Tue, 16 Mar 2010 16:55:28 +0100
+Subject: [PATCH 37/40] Generic C implementation of pixman_blt with overlapping support
+
+Uses memcpy/memmove functions to copy pixels, can handle the
+case when both source and destination areas are in the same
+image (this is useful for scrolling).
+
+It is assumed that copying direction is only important when
+using the same image for both source and destination (and
+src_stride == dst_stride). Copying direction is undefined
+for the images with different source and destination stride
+which happen to be in the overlapped areas (but this is an
+unrealistic case anyway).
+---
+ pixman/pixman-general.c | 21 ++++++++++++++++++---
+ pixman/pixman-private.h | 43 +++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 61 insertions(+), 3 deletions(-)
+
+diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
+index 727affc..fa448f7 100644
+--- a/pixman/pixman-general.c
++++ b/pixman/pixman-general.c
+@@ -238,9 +238,24 @@ general_blt (pixman_implementation_t *imp,
+ int width,
+ int height)
+ {
+- /* We can't blit unless we have sse2 or mmx */
+-
+- return FALSE;
++ uint8_t *dst_bytes = (uint8_t *)dst_bits;
++ uint8_t *src_bytes = (uint8_t *)src_bits;
++ int bpp;
++
++ if (src_bpp != dst_bpp || src_bpp & 7)
++ return FALSE;
++
++ bpp = src_bpp >> 3;
++ width *= bpp;
++ src_stride *= 4;
++ dst_stride *= 4;
++ pixman_blt_helper (src_bytes + src_y * src_stride + src_x * bpp,
++ dst_bytes + dst_y * dst_stride + dst_x * bpp,
++ src_stride,
++ dst_stride,
++ width,
++ height);
++ return TRUE;
+ }
+
+ static pixman_bool_t
+diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
+index 60060a9..5369ad9 100644
+--- a/pixman/pixman-private.h
++++ b/pixman/pixman-private.h
+@@ -10,6 +10,7 @@
+
+ #include "pixman.h"
+ #include <time.h>
++#include <string.h>
+ #include <assert.h>
+ #include <stdio.h>
+ #include <string.h>
+@@ -899,4 +900,46 @@ void pixman_timer_register (pixman_timer_t *timer);
+
+ #endif /* PIXMAN_TIMERS */
+
++/* a helper function, can blit 8-bit images with src/dst overlapping support */
++static inline void
++pixman_blt_helper (uint8_t *src_bytes,
++ uint8_t *dst_bytes,
++ int src_stride,
++ int dst_stride,
++ int width,
++ int height)
++{
++ /*
++ * The second part of this check is not strictly needed, but it prevents
++ * unnecessary upside-down processing of areas which belong to different
++ * images. Upside-down processing can be slower with fixed-distance-ahead
++ * prefetch and perceived as having more tearing.
++ */
++ if (src_bytes < dst_bytes + width &&
++ src_bytes + src_stride * height > dst_bytes)
++ {
++ src_bytes += src_stride * height - src_stride;
++ dst_bytes += dst_stride * height - dst_stride;
++ dst_stride = -dst_stride;
++ src_stride = -src_stride;
++ /* Horizontal scrolling to the left needs memmove */
++ if (src_bytes + width > dst_bytes)
++ {
++ while (--height >= 0)
++ {
++ memmove (dst_bytes, src_bytes, width);
++ dst_bytes += dst_stride;
++ src_bytes += src_stride;
++ }
++ return;
++ }
++ }
++ while (--height >= 0)
++ {
++ memcpy (dst_bytes, src_bytes, width);
++ dst_bytes += dst_stride;
++ src_bytes += src_stride;
++ }
++}
++
+ #endif /* PIXMAN_PRIVATE_H */
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0038-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch b/recipes/xorg-lib/pixman-0.21.6/0038-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch
new file mode 100644
index 0000000..7c0f7ad
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0038-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch
@@ -0,0 +1,91 @@
+From 2cde9110695c2b595eaf885eee40b118286652f9 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 22 Oct 2009 05:45:47 +0300
+Subject: [PATCH 38/40] Support of overlapping src/dst for pixman_blt_mmx
+
+---
+ pixman/pixman-mmx.c | 55 +++++++++++++++++++++++++++++---------------------
+ 1 files changed, 32 insertions(+), 23 deletions(-)
+
+diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
+index 0272347..5bcbd0e 100644
+--- a/pixman/pixman-mmx.c
++++ b/pixman/pixman-mmx.c
+@@ -2996,34 +2996,43 @@ pixman_blt_mmx (uint32_t *src_bits,
+ {
+ uint8_t * src_bytes;
+ uint8_t * dst_bytes;
+- int byte_width;
++ int bpp;
+
+- if (src_bpp != dst_bpp)
++ if (src_bpp != dst_bpp || src_bpp & 7)
+ return FALSE;
+
+- if (src_bpp == 16)
+- {
+- src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+- dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+- src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+- dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+- byte_width = 2 * width;
+- src_stride *= 2;
+- dst_stride *= 2;
+- }
+- else if (src_bpp == 32)
++ bpp = src_bpp >> 3;
++ width *= bpp;
++ src_stride *= 4;
++ dst_stride *= 4;
++ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
++ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
++
++ if (src_bpp != 16 && src_bpp != 32)
+ {
+- src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+- dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+- src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+- dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+- byte_width = 4 * width;
+- src_stride *= 4;
+- dst_stride *= 4;
++ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
++ width, height);
++ return TRUE;
+ }
+- else
++
++ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
+ {
+- return FALSE;
++ src_bytes += src_stride * height - src_stride;
++ dst_bytes += dst_stride * height - dst_stride;
++ dst_stride = -dst_stride;
++ src_stride = -src_stride;
++
++ if (src_bytes + width > dst_bytes)
++ {
++ /* TODO: reverse scanline copy using MMX */
++ while (--height >= 0)
++ {
++ memmove (dst_bytes, src_bytes, width);
++ dst_bytes += dst_stride;
++ src_bytes += src_stride;
++ }
++ return TRUE;
++ }
+ }
+
+ while (height--)
+@@ -3033,7 +3042,7 @@ pixman_blt_mmx (uint32_t *src_bits,
+ uint8_t *d = dst_bytes;
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+- w = byte_width;
++ w = width;
+
+ while (w >= 2 && ((unsigned long)d & 3))
+ {
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0039-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch b/recipes/xorg-lib/pixman-0.21.6/0039-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch
new file mode 100644
index 0000000..8e89ffe
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0039-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch
@@ -0,0 +1,91 @@
+From b4064e256d293d32035494a6afff1bc9456b84e1 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 22 Oct 2009 05:45:54 +0300
+Subject: [PATCH 39/40] Support of overlapping src/dst for pixman_blt_sse2
+
+---
+ pixman/pixman-sse2.c | 55 +++++++++++++++++++++++++++++--------------------
+ 1 files changed, 32 insertions(+), 23 deletions(-)
+
+diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
+index 533b858..9fa7191 100644
+--- a/pixman/pixman-sse2.c
++++ b/pixman/pixman-sse2.c
+@@ -4691,34 +4691,43 @@ pixman_blt_sse2 (uint32_t *src_bits,
+ {
+ uint8_t * src_bytes;
+ uint8_t * dst_bytes;
+- int byte_width;
++ int bpp;
+
+- if (src_bpp != dst_bpp)
++ if (src_bpp != dst_bpp || src_bpp & 7)
+ return FALSE;
+
+- if (src_bpp == 16)
+- {
+- src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+- dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+- src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+- dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+- byte_width = 2 * width;
+- src_stride *= 2;
+- dst_stride *= 2;
+- }
+- else if (src_bpp == 32)
++ bpp = src_bpp >> 3;
++ width *= bpp;
++ src_stride *= 4;
++ dst_stride *= 4;
++ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
++ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
++
++ if (src_bpp != 16 && src_bpp != 32)
+ {
+- src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+- dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+- src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+- dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+- byte_width = 4 * width;
+- src_stride *= 4;
+- dst_stride *= 4;
++ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
++ width, height);
++ return TRUE;
+ }
+- else
++
++ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
+ {
+- return FALSE;
++ src_bytes += src_stride * height - src_stride;
++ dst_bytes += dst_stride * height - dst_stride;
++ dst_stride = -dst_stride;
++ src_stride = -src_stride;
++
++ if (src_bytes + width > dst_bytes)
++ {
++ /* TODO: reverse scanline copy using SSE2 */
++ while (--height >= 0)
++ {
++ memmove (dst_bytes, src_bytes, width);
++ dst_bytes += dst_stride;
++ src_bytes += src_stride;
++ }
++ return TRUE;
++ }
+ }
+
+ while (height--)
+@@ -4728,7 +4737,7 @@ pixman_blt_sse2 (uint32_t *src_bits,
+ uint8_t *d = dst_bytes;
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+- w = byte_width;
++ w = width;
+
+ while (w >= 2 && ((unsigned long)d & 3))
+ {
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0040-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch b/recipes/xorg-lib/pixman-0.21.6/0040-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch
new file mode 100644
index 0000000..38aeadb
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0040-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch
@@ -0,0 +1,94 @@
+From ed32d593a0e8aa56f8a27f976f188d14a79343a0 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 18 Nov 2009 06:08:48 +0200
+Subject: [PATCH 40/40] Support of overlapping src/dst for pixman_blt_neon
+
+---
+ pixman/pixman-arm-neon.c | 62 +++++++++++++++++++++++++++++++++++++--------
+ 1 files changed, 51 insertions(+), 11 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 0a10ca1..f015eee 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -215,26 +215,66 @@ pixman_blt_neon (uint32_t *src_bits,
+ int width,
+ int height)
+ {
+- if (src_bpp != dst_bpp)
++ uint8_t * src_bytes;
++ uint8_t * dst_bytes;
++ int bpp;
++
++ if (src_bpp != dst_bpp || src_bpp & 7)
+ return FALSE;
+
++ bpp = src_bpp >> 3;
++ width *= bpp;
++ src_stride *= 4;
++ dst_stride *= 4;
++ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
++ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
++
++ if (src_bpp != 16 && src_bpp != 32)
++ {
++ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
++ width, height);
++ return TRUE;
++ }
++
++ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
++ {
++ src_bytes += src_stride * height - src_stride;
++ dst_bytes += dst_stride * height - dst_stride;
++ dst_stride = -dst_stride;
++ src_stride = -src_stride;
++
++ if (src_bytes + width > dst_bytes)
++ {
++ /* TODO: reverse scanline copy using NEON */
++ while (--height >= 0)
++ {
++ memmove (dst_bytes, src_bytes, width);
++ dst_bytes += dst_stride;
++ src_bytes += src_stride;
++ }
++ return TRUE;
++ }
++ }
++
+ switch (src_bpp)
+ {
+ case 16:
+ pixman_composite_src_0565_0565_asm_neon (
+- width, height,
+- (uint16_t *)(((char *) dst_bits) +
+- dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
+- (uint16_t *)(((char *) src_bits) +
+- src_y * src_stride * 4 + src_x * 2), src_stride * 2);
++ width >> 1,
++ height,
++ (uint16_t *) dst_bytes,
++ dst_stride >> 1,
++ (uint16_t *) src_bytes,
++ src_stride >> 1);
+ return TRUE;
+ case 32:
+ pixman_composite_src_8888_8888_asm_neon (
+- width, height,
+- (uint32_t *)(((char *) dst_bits) +
+- dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
+- (uint32_t *)(((char *) src_bits) +
+- src_y * src_stride * 4 + src_x * 4), src_stride);
++ width >> 2,
++ height,
++ (uint32_t *) dst_bytes,
++ dst_stride >> 2,
++ (uint32_t *) src_bytes,
++ src_stride >> 2);
+ return TRUE;
+ default:
+ return FALSE;
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman_0.21.6.bb b/recipes/xorg-lib/pixman_0.21.6.bb
new file mode 100644
index 0000000..d5cc2c5
--- /dev/null
+++ b/recipes/xorg-lib/pixman_0.21.6.bb
@@ -0,0 +1,53 @@
+require pixman.inc
+
+PR = "${INC_PR}.0"
+
+SRC_URI = "http://xorg.freedesktop.org/archive/individual/lib/${BPN}-${PV}.tar.gz \
+file://0002-Fix-compilation-on-Win32.patch \
+file://0003-test-Fix-tests-for-compilation-on-Windows.patch \
+file://0004-test-Add-Makefile-for-Win32.patch \
+file://0005-Do-not-include-unused-headers.patch \
+file://0006-test-Silence-MSVC-warnings.patch \
+file://0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch \
+file://0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch \
+file://0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch \
+file://0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch \
+file://0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch \
+file://0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch \
+file://0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch \
+file://0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch \
+file://0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch \
+file://0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch \
+file://0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch \
+file://0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch \
+file://0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch \
+file://0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch \
+file://0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch \
+file://0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch \
+file://0023-In-delegate_-src-dest-_iter_init-call-delegate-direc.patch \
+file://0024-Fill-out-parts-of-iters-in-_pixman_implementation_-s.patch \
+file://0025-Simplify-the-prototype-for-iterator-initializers.patch \
+file://0026-test-Randomize-some-tests-if-PIXMAN_RANDOMIZE_TESTS-.patch \
+file://0027-Add-simple-support-for-the-r8g8b8a8-and-r8g8b8x8-for.patch \
+file://0028-Add-support-for-the-r8g8b8a8-and-r8g8b8x8-formats-to.patch \
+file://0029-test-Fix-infinite-loop-in-composite.patch \
+file://0030-ARM-tweaked-horizontal-weights-update-in-NEON-biline.patch \
+file://0031-ARM-use-aligned-memory-writes-in-NEON-bilinear-scali.patch \
+file://0032-ARM-support-for-software-pipelining-in-bilinear-macr.patch \
+file://0033-ARM-use-less-ARM-instructions-in-NEON-bilinear-scali.patch \
+file://0034-ARM-support-different-levels-of-loop-unrolling-in-bi.patch \
+file://0035-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch \
+file://0036-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch \
+file://0037-Generic-C-implementation-of-pixman_blt-with-overlapp.patch \
+file://0038-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch \
+file://0039-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch \
+file://0040-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch \
+"
+
+SRC_URI[md5sum] = "b083fce3bd41ffd083e50dfe6612612d"
+SRC_URI[sha256sum] = "ee22c24acc0c1b55a59aeeebfb3552038495d301e334608ac8e6af98172c748c"
+
+NEON = " --disable-arm-neon "
+NEON_armv7a = " "
+
+EXTRA_OECONF = "${NEON} --disable-gtk"