aboutsummaryrefslogtreecommitdiffstats
path: root/recipes/xorg-lib
diff options
context:
space:
mode:
authorKoen Kooi <koen@openembedded.org>2010-10-02 14:42:53 +0200
committerKoen Kooi <koen@openembedded.org>2010-10-02 14:42:53 +0200
commit96c6405186e195126e87ed5408056421bbfe35ab (patch)
treeaff7cb02edae4872a97dd1df9581f6185eab5769 /recipes/xorg-lib
parentc2761b25769ff5abe1c69e4c7fdb12d11d6c3850 (diff)
downloadopenembedded-96c6405186e195126e87ed5408056421bbfe35ab.tar.gz
pixman 0.19.4: add 3 more NEON patches
Diffstat (limited to 'recipes/xorg-lib')
-rw-r--r--recipes/xorg-lib/pixman-0.19.4/0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch2
-rw-r--r--recipes/xorg-lib/pixman-0.19.4/0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch2
-rw-r--r--recipes/xorg-lib/pixman-0.19.4/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch2
-rw-r--r--recipes/xorg-lib/pixman-0.19.4/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch2
-rw-r--r--recipes/xorg-lib/pixman-0.19.4/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch2
-rw-r--r--recipes/xorg-lib/pixman-0.19.4/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch2
-rw-r--r--recipes/xorg-lib/pixman-0.19.4/0007-ARM-added-neon_composite_add_0565_8_0565-fast-path.patch96
-rw-r--r--recipes/xorg-lib/pixman-0.19.4/0008-ARM-added-neon_composite_out_reverse_0565_8_0565-fas.patch110
-rw-r--r--recipes/xorg-lib/pixman-0.19.4/0009-ARM-added-neon_composite_out_reverse_8_0565-fast-pat.patch94
-rw-r--r--recipes/xorg-lib/pixman_0.19.4.bb5
10 files changed, 310 insertions, 7 deletions
diff --git a/recipes/xorg-lib/pixman-0.19.4/0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch b/recipes/xorg-lib/pixman-0.19.4/0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch
index 756c418fce..2ff71ae2d8 100644
--- a/recipes/xorg-lib/pixman-0.19.4/0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch
+++ b/recipes/xorg-lib/pixman-0.19.4/0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch
@@ -1,7 +1,7 @@
From 38aabb3be87ea68e37f34256c778d07f62680ec6 Mon Sep 17 00:00:00 2001
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
Date: Thu, 10 Dec 2009 00:51:50 +0200
-Subject: [PATCH 1/6] ARM: HACK: added NEON optimizations for fetch/store r5g6b5 scanline
+Subject: [PATCH 1/9] ARM: HACK: added NEON optimizations for fetch/store r5g6b5 scanline
---
pixman/pixman-access.c | 23 ++++++++++++++++++++++-
diff --git a/recipes/xorg-lib/pixman-0.19.4/0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch b/recipes/xorg-lib/pixman-0.19.4/0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch
index 324fb41393..178dad99bc 100644
--- a/recipes/xorg-lib/pixman-0.19.4/0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch
+++ b/recipes/xorg-lib/pixman-0.19.4/0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch
@@ -1,7 +1,7 @@
From e3bfd272cf813b8419757a3b59128b3568e5f800 Mon Sep 17 00:00:00 2001
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
Date: Mon, 20 Sep 2010 19:07:33 +0300
-Subject: [PATCH 2/6] Don't discriminate PAD and REFLECT repeat in standard fast paths
+Subject: [PATCH 2/9] Don't discriminate PAD and REFLECT repeat in standard fast paths
Without this fix, setting PAD repeat on a source image prevents
the use of any nonscaled standard fast paths, affecting performance
diff --git a/recipes/xorg-lib/pixman-0.19.4/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch b/recipes/xorg-lib/pixman-0.19.4/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch
index 2264d0d3b6..ca63cadc4b 100644
--- a/recipes/xorg-lib/pixman-0.19.4/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch
+++ b/recipes/xorg-lib/pixman-0.19.4/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch
@@ -1,7 +1,7 @@
From c5099dabb417cab343185d6e22ae4925e53a756f Mon Sep 17 00:00:00 2001
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
Date: Tue, 16 Mar 2010 16:55:28 +0100
-Subject: [PATCH 3/6] Generic C implementation of pixman_blt with overlapping support
+Subject: [PATCH 3/9] Generic C implementation of pixman_blt with overlapping support
Uses memcpy/memmove functions to copy pixels, can handle the
case when both source and destination areas are in the same
diff --git a/recipes/xorg-lib/pixman-0.19.4/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch b/recipes/xorg-lib/pixman-0.19.4/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch
index bbade7f8ba..b8323831e8 100644
--- a/recipes/xorg-lib/pixman-0.19.4/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch
+++ b/recipes/xorg-lib/pixman-0.19.4/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch
@@ -1,7 +1,7 @@
From f8c3deb1f7a26992fe217d1748a1fa5c832bbbd2 Mon Sep 17 00:00:00 2001
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
Date: Thu, 22 Oct 2009 05:45:47 +0300
-Subject: [PATCH 4/6] Support of overlapping src/dst for pixman_blt_mmx
+Subject: [PATCH 4/9] Support of overlapping src/dst for pixman_blt_mmx
---
pixman/pixman-mmx.c | 55 +++++++++++++++++++++++++++++---------------------
diff --git a/recipes/xorg-lib/pixman-0.19.4/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch b/recipes/xorg-lib/pixman-0.19.4/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch
index bf540e06c5..71b5fded02 100644
--- a/recipes/xorg-lib/pixman-0.19.4/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch
+++ b/recipes/xorg-lib/pixman-0.19.4/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch
@@ -1,7 +1,7 @@
From 79fe7f347fe396aa2c917a1928fc18ab9321336c Mon Sep 17 00:00:00 2001
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
Date: Thu, 22 Oct 2009 05:45:54 +0300
-Subject: [PATCH 5/6] Support of overlapping src/dst for pixman_blt_sse2
+Subject: [PATCH 5/9] Support of overlapping src/dst for pixman_blt_sse2
---
pixman/pixman-sse2.c | 55 +++++++++++++++++++++++++++++--------------------
diff --git a/recipes/xorg-lib/pixman-0.19.4/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch b/recipes/xorg-lib/pixman-0.19.4/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch
index cd25193670..8992c05697 100644
--- a/recipes/xorg-lib/pixman-0.19.4/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch
+++ b/recipes/xorg-lib/pixman-0.19.4/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch
@@ -1,7 +1,7 @@
From ea0f7b1ae605bb57ca23e88b38b9c19390596723 Mon Sep 17 00:00:00 2001
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
Date: Wed, 18 Nov 2009 06:08:48 +0200
-Subject: [PATCH 6/6] Support of overlapping src/dst for pixman_blt_neon
+Subject: [PATCH 6/9] Support of overlapping src/dst for pixman_blt_neon
---
pixman/pixman-arm-neon.c | 62 +++++++++++++++++++++++++++++++++++++--------
diff --git a/recipes/xorg-lib/pixman-0.19.4/0007-ARM-added-neon_composite_add_0565_8_0565-fast-path.patch b/recipes/xorg-lib/pixman-0.19.4/0007-ARM-added-neon_composite_add_0565_8_0565-fast-path.patch
new file mode 100644
index 0000000000..d62f12dd5c
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.19.4/0007-ARM-added-neon_composite_add_0565_8_0565-fast-path.patch
@@ -0,0 +1,96 @@
+From c3f1715c4698e90396d02f7b5acc314d99780941 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 23 Sep 2010 22:28:55 +0300
+Subject: [PATCH 7/9] ARM: added 'neon_composite_add_0565_8_0565' fast path
+
+TODO: That's an initial variant, needs performance tuning
+---
+ pixman/pixman-arm-neon-asm.S | 52 ++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c | 4 +++
+ 2 files changed, 56 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index e1a697e..23ddae9 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -1890,3 +1890,55 @@ generate_composite_function \
+ 10, /* dst_r_basereg */ \
+ 8, /* src_basereg */ \
+ 15 /* mask_basereg */
++
++/******************************************************************************/
++
++.macro pixman_composite_add_0565_8_0565_process_pixblock_head
++ /* mask is in d15 */
++ convert_0565_to_x888 q4, d2, d1, d0
++ convert_0565_to_x888 q5, d6, d5, d4
++ /* source pixel data is in {d0, d1, d2, XX} */
++ /* destination pixel data is in {d4, d5, d6, XX} */
++ vmull.u8 q6, d15, d2
++ vmull.u8 q5, d15, d1
++ vmull.u8 q4, d15, d0
++ vrshr.u16 q12, q6, #8
++ vrshr.u16 q11, q5, #8
++ vrshr.u16 q10, q4, #8
++ vraddhn.u16 d2, q6, q12
++ vraddhn.u16 d1, q5, q11
++ vraddhn.u16 d0, q4, q10
++.endm
++
++.macro pixman_composite_add_0565_8_0565_process_pixblock_tail
++ vqadd.u8 q0, q0, q2
++ vqadd.u8 q1, q1, q3
++ /* 32bpp result is in {d0, d1, d2, XX} */
++ convert_8888_to_0565 d2, d1, d0, q14, q15, q3
++.endm
++
++/* TODO: expand macros and do better instructions scheduling */
++.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head
++ vld1.8 {d15}, [MASK]!
++ pixman_composite_add_0565_8_0565_process_pixblock_tail
++ vld1.16 {d8, d9}, [SRC]!
++ vld1.16 {d10, d11}, [DST_R, :128]!
++ cache_preload 8, 8
++ pixman_composite_add_0565_8_0565_process_pixblock_head
++ vst1.16 {d28, d29}, [DST_W, :128]!
++.endm
++
++generate_composite_function \
++ pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \
++ FLAG_DST_READWRITE, \
++ 8, /* number of pixels, processed in a single block */ \
++ 5, /* prefetch distance */ \
++ default_init_need_all_regs, \
++ default_cleanup_need_all_regs, \
++ pixman_composite_add_0565_8_0565_process_pixblock_head, \
++ pixman_composite_add_0565_8_0565_process_pixblock_tail, \
++ pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \
++ 28, /* dst_w_basereg */ \
++ 10, /* dst_r_basereg */ \
++ 8, /* src_basereg */ \
++ 15 /* mask_basereg */
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index db1c2df..a8be7e4 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -82,6 +82,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
+
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
+ uint8_t, 1, uint8_t, 1, uint8_t, 1)
++PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565,
++ uint16_t, 1, uint8_t, 1, uint16_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
+ uint32_t, 1, uint32_t, 1, uint32_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
+@@ -296,6 +298,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8),
+ PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8),
++ PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565),
++ PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8000_8000),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888),
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.19.4/0008-ARM-added-neon_composite_out_reverse_0565_8_0565-fas.patch b/recipes/xorg-lib/pixman-0.19.4/0008-ARM-added-neon_composite_out_reverse_0565_8_0565-fas.patch
new file mode 100644
index 0000000000..e720a6a428
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.19.4/0008-ARM-added-neon_composite_out_reverse_0565_8_0565-fas.patch
@@ -0,0 +1,110 @@
+From 89cbe6eb5df2a1c85ba996caea6479e2434d51a5 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 23 Sep 2010 23:09:46 +0300
+Subject: [PATCH 8/9] ARM: added 'neon_composite_out_reverse_0565_8_0565' fast path
+
+TODO: That's an initial variant, needs performance tuning
+---
+ pixman/pixman-arm-neon-asm.S | 66 ++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c | 4 ++
+ 2 files changed, 70 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 23ddae9..7a599f0 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -1942,3 +1942,69 @@ generate_composite_function \
+ 10, /* dst_r_basereg */ \
+ 8, /* src_basereg */ \
+ 15 /* mask_basereg */
++
++/******************************************************************************/
++
++.macro pixman_composite_out_reverse_0565_8_0565_process_pixblock_head
++ /* mask is in d15 */
++ convert_0565_to_8888 q4, d3, d2, d1, d0
++ convert_0565_to_x888 q5, d6, d5, d4
++ /* source pixel data is in {d0, d1, d2, d3} */
++ /* destination pixel data is in {d4, d5, d6, xx} */
++
++ /* 'in' */
++ vmull.u8 q8, d15, d3
++ vmull.u8 q6, d15, d2
++ vmull.u8 q5, d15, d1
++ vmull.u8 q4, d15, d0
++ vrshr.u16 q13, q8, #8
++ vrshr.u16 q12, q6, #8
++ vrshr.u16 q11, q5, #8
++ vrshr.u16 q10, q4, #8
++ vraddhn.u16 d3, q8, q13
++ vraddhn.u16 d2, q6, q12
++ vraddhn.u16 d1, q5, q11
++ vraddhn.u16 d0, q4, q10
++ vmvn.8 d24, d3 /* get inverted alpha */
++ /* now do alpha blending */
++ vmull.u8 q8, d24, d4
++ vmull.u8 q9, d24, d5
++ vmull.u8 q10, d24, d6
++.endm
++
++.macro pixman_composite_out_reverse_0565_8_0565_process_pixblock_tail
++ vrshr.u16 q14, q8, #8
++ vrshr.u16 q15, q9, #8
++ vrshr.u16 q12, q10, #8
++ vraddhn.u16 d0, q14, q8
++ vraddhn.u16 d1, q15, q9
++ vraddhn.u16 d2, q12, q10
++ /* 32bpp result is in {d0, d1, d2, XX} */
++ convert_8888_to_0565 d2, d1, d0, q14, q15, q3
++.endm
++
++/* TODO: expand macros and do better instructions scheduling */
++.macro pixman_composite_out_reverse_0565_8_0565_process_pixblock_tail_head
++ vld1.8 {d15}, [MASK]!
++ pixman_composite_out_reverse_0565_8_0565_process_pixblock_tail
++ vld1.16 {d8, d9}, [SRC]!
++ vld1.16 {d10, d11}, [DST_R, :128]!
++ cache_preload 8, 8
++ pixman_composite_out_reverse_0565_8_0565_process_pixblock_head
++ vst1.16 {d28, d29}, [DST_W, :128]!
++.endm
++
++generate_composite_function \
++ pixman_composite_out_reverse_0565_8_0565_asm_neon, 16, 8, 16, \
++ FLAG_DST_READWRITE, \
++ 8, /* number of pixels, processed in a single block */ \
++ 5, /* prefetch distance */ \
++ default_init_need_all_regs, \
++ default_cleanup_need_all_regs, \
++ pixman_composite_out_reverse_0565_8_0565_process_pixblock_head, \
++ pixman_composite_out_reverse_0565_8_0565_process_pixblock_tail, \
++ pixman_composite_out_reverse_0565_8_0565_process_pixblock_tail_head, \
++ 28, /* dst_w_basereg */ \
++ 10, /* dst_r_basereg */ \
++ 8, /* src_basereg */ \
++ 15 /* mask_basereg */
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index a8be7e4..da1fdeb 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -94,6 +94,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
+ uint32_t, 1, uint8_t, 1, uint16_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565,
+ uint16_t, 1, uint8_t, 1, uint16_t, 1)
++PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, out_reverse_0565_8_0565,
++ uint16_t, 1, uint8_t, 1, uint16_t, 1)
+
+ void
+ pixman_composite_src_n_8_asm_neon (int32_t w,
+@@ -306,6 +308,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
++ PIXMAN_STD_FAST_PATH (OUT_REVERSE, r5g6b5, a8, r5g6b5, neon_composite_out_reverse_0565_8_0565),
++ PIXMAN_STD_FAST_PATH (OUT_REVERSE, b5g6r5, a8, b5g6r5, neon_composite_out_reverse_0565_8_0565),
+
+ { PIXMAN_OP_NONE },
+ };
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.19.4/0009-ARM-added-neon_composite_out_reverse_8_0565-fast-pat.patch b/recipes/xorg-lib/pixman-0.19.4/0009-ARM-added-neon_composite_out_reverse_8_0565-fast-pat.patch
new file mode 100644
index 0000000000..1c7f6ec81d
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.19.4/0009-ARM-added-neon_composite_out_reverse_8_0565-fast-pat.patch
@@ -0,0 +1,94 @@
+From 130211983628678ccee23535765994aa0b9d5122 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 23 Sep 2010 23:41:50 +0300
+Subject: [PATCH 9/9] ARM: added 'neon_composite_out_reverse_8_0565' fast path
+
+TODO: That's an initial variant, needs performance tuning
+---
+ pixman/pixman-arm-neon-asm.S | 50 ++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c | 4 +++
+ 2 files changed, 54 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 7a599f0..2234b1b 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2008,3 +2008,53 @@ generate_composite_function \
+ 10, /* dst_r_basereg */ \
+ 8, /* src_basereg */ \
+ 15 /* mask_basereg */
++
++/******************************************************************************/
++
++.macro pixman_composite_out_reverse_8_0565_process_pixblock_head
++ /* mask is in d15 */
++ convert_0565_to_x888 q5, d6, d5, d4
++ /* destination pixel data is in {d4, d5, d6, xx} */
++ vmvn.8 d24, d15 /* get inverted alpha */
++ /* now do alpha blending */
++ vmull.u8 q8, d24, d4
++ vmull.u8 q9, d24, d5
++ vmull.u8 q10, d24, d6
++.endm
++
++.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail
++ vrshr.u16 q14, q8, #8
++ vrshr.u16 q15, q9, #8
++ vrshr.u16 q12, q10, #8
++ vraddhn.u16 d0, q14, q8
++ vraddhn.u16 d1, q15, q9
++ vraddhn.u16 d2, q12, q10
++ /* 32bpp result is in {d0, d1, d2, XX} */
++ convert_8888_to_0565 d2, d1, d0, q14, q15, q3
++.endm
++
++/* TODO: expand macros and do better instructions scheduling */
++.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head
++ vld1.8 {d15}, [SRC]!
++ pixman_composite_out_reverse_8_0565_process_pixblock_tail
++ vld1.16 {d10, d11}, [DST_R, :128]!
++ cache_preload 8, 8
++ pixman_composite_out_reverse_8_0565_process_pixblock_head
++ vst1.16 {d28, d29}, [DST_W, :128]!
++.endm
++
++generate_composite_function \
++ pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \
++ FLAG_DST_READWRITE, \
++ 8, /* number of pixels, processed in a single block */ \
++ 5, /* prefetch distance */ \
++ default_init_need_all_regs, \
++ default_cleanup_need_all_regs, \
++ pixman_composite_out_reverse_8_0565_process_pixblock_head, \
++ pixman_composite_out_reverse_8_0565_process_pixblock_tail, \
++ pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \
++ 28, /* dst_w_basereg */ \
++ 10, /* dst_r_basereg */ \
++ 15, /* src_basereg */ \
++ 0 /* mask_basereg */
++
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index da1fdeb..2e37a4a 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -60,6 +60,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
+ uint32_t, 1, uint16_t, 1)
+ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
+ uint32_t, 1, uint32_t, 1)
++PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565,
++ uint8_t, 1, uint16_t, 1)
+
+ PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_0565,
+ uint16_t, 1)
+@@ -310,6 +312,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888),
+ PIXMAN_STD_FAST_PATH (OUT_REVERSE, r5g6b5, a8, r5g6b5, neon_composite_out_reverse_0565_8_0565),
+ PIXMAN_STD_FAST_PATH (OUT_REVERSE, b5g6r5, a8, b5g6r5, neon_composite_out_reverse_0565_8_0565),
++ PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565),
++ PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565),
+
+ { PIXMAN_OP_NONE },
+ };
+--
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman_0.19.4.bb b/recipes/xorg-lib/pixman_0.19.4.bb
index 1f9b9384f5..a0efea1a9e 100644
--- a/recipes/xorg-lib/pixman_0.19.4.bb
+++ b/recipes/xorg-lib/pixman_0.19.4.bb
@@ -3,7 +3,7 @@ require pixman.inc
SRC_URI[archive.md5sum] = "100a2d23f1d5683fdaa5d7ca71a0182b"
SRC_URI[archive.sha256sum] = "04e613f87fec13e5d6e8540587af1112e9ab19f9d550751e848a2d65deb26fd6"
-PR = "${INC_PR}.0"
+PR = "${INC_PR}.1"
SRC_URI += "\
file://0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch \
@@ -12,6 +12,9 @@ SRC_URI += "\
file://0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch \
file://0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch \
file://0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch \
+ file://0007-ARM-added-neon_composite_add_0565_8_0565-fast-path.patch \
+ file://0008-ARM-added-neon_composite_out_reverse_0565_8_0565-fas.patch \
+ file://0009-ARM-added-neon_composite_out_reverse_8_0565-fast-pat.patch \
"
NEON = " --disable-arm-neon "